Soccers
/
getDataTest.py

#coding=utf-8

__author__ = 'Administrator'

from urllib2 import urlopen
import urlparse
import re
import bs4
import codecs
from datetime import date
import time
import twisted


def get_match(url):
    f = urlopen(url)
    html = f.read()
    encoding = "gbk"
    if 'content-type' in f.headers.dict and f.headers.dict['content-type'].count('charset=') == 1:
        encoding = re.findall(r'charset=(.*)', f.headers.dict['content-type'])[0].lower()
    soup = bs4.BeautifulSoup(html, from_encoding=encoding)
    tr = soup.select('.tr1')[0]
    table = tr.find_parent("table")
    trs = table.find_all('tr')
    matches = []
    for tr in trs:
        match = []
        tds = tr.find_all('td')
        if len(tds) == 1:
            theDay = tds[0].get_text()
        elif tr['class'] in [['tr1'], ['tr2']]:
            match.append(theDay)
            for td in tds:
                match.append(td.get_text())
        else:
            pass
        if len(match) > 0:
            matches.append(match)

    return matches


matches = get_match("http://odds.500.com/ouzhi.php?cid=293")
# matches = get_match("http://caipiao.163.com/order/preBet_jczqspfmixp.html")

filename = "soccerData" + str(date.today()) + ".txt"
print(filename)
file = codecs.open(filename, "w", "utf-8")
#codecs.open(fn_out, "w", "utf-8")
file.write(codecs.BOM_UTF8.decode("utf-8"))
for match in matches:
    # print match
    out = ",".join(o for o in match)
    # file.write(out)
    file.writelines(out+"\n")
    print out
    # print match
file.close()

# free feeds/API for got odds for football events

# 1. Pinnacle Sports - http://xml.pinnaclesports.com/xmlfeed.asp
#
# 2. William Hill - http://banners.willhill.com/xml/
#
# 3. Gamebookers - http://xml.gamebookers.com/
#
# 4. SportingBet - http://xmlsports.sportingodds.com/xmlprices.asp?sp=all
#
# 5. Expekt - http://www.expekt.com/exportServlet?category=SOC%25
#
# 6. CentreBet - http://xmlfeed.centrebet.com/