3 Star 0 Fork 0

LJ / Soccers

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
getDataTest.py 1.90 KB
一键复制 编辑 原始数据 按行查看 历史
LJ 提交于 2015-03-01 19:05 . 添加spider Match500 和其它
#coding=utf-8
__author__ = 'Administrator'
from urllib2 import urlopen
import urlparse
import re
import bs4
import codecs
from datetime import date
import time
import twisted
def get_match(url):
f = urlopen(url)
html = f.read()
encoding = "gbk"
if 'content-type' in f.headers.dict and f.headers.dict['content-type'].count('charset=') == 1:
encoding = re.findall(r'charset=(.*)', f.headers.dict['content-type'])[0].lower()
soup = bs4.BeautifulSoup(html, from_encoding=encoding)
tr = soup.select('.tr1')[0]
table = tr.find_parent("table")
trs = table.find_all('tr')
matches = []
for tr in trs:
match = []
tds = tr.find_all('td')
if len(tds) == 1:
theDay = tds[0].get_text()
elif tr['class'] in [['tr1'], ['tr2']]:
match.append(theDay)
for td in tds:
match.append(td.get_text())
else:
pass
if len(match) > 0:
matches.append(match)
return matches
matches = get_match("http://odds.500.com/ouzhi.php?cid=293")
# matches = get_match("http://caipiao.163.com/order/preBet_jczqspfmixp.html")
filename = "soccerData" + str(date.today()) + ".txt"
print(filename)
file = codecs.open(filename, "w", "utf-8")
#codecs.open(fn_out, "w", "utf-8")
file.write(codecs.BOM_UTF8.decode("utf-8"))
for match in matches:
# print match
out = ",".join(o for o in match)
# file.write(out)
file.writelines(out+"\n")
print out
# print match
file.close()
# free feeds/API for got odds for football events
# 1. Pinnacle Sports - http://xml.pinnaclesports.com/xmlfeed.asp
#
# 2. William Hill - http://banners.willhill.com/xml/
#
# 3. Gamebookers - http://xml.gamebookers.com/
#
# 4. SportingBet - http://xmlsports.sportingodds.com/xmlprices.asp?sp=all
#
# 5. Expekt - http://www.expekt.com/exportServlet?category=SOC%25
#
# 6. CentreBet - http://xmlfeed.centrebet.com/
Python
1
https://gitee.com/nikytwo/Soccers.git
git@gitee.com:nikytwo/Soccers.git
nikytwo
Soccers
Soccers
master

搜索帮助