代码拉取完成,页面将自动刷新
# -*- coding: utf-8 -*-
import xlrd
import requests
import time
import pyquery
import xlwt
def read_excel():
# 文件位置
ExcelFile = xlrd.open_workbook(u'E:/paqu/reptilian_01/MetaSploit.xlsx')
# 获取目标EXCEL文件sheet名
#print ExcelFile.sheet_names()
# ------------------------------------
# 若有多个sheet,则需要指定读取目标sheet例如读取sheet2
# sheet2_name=ExcelFile.sheet_names()[1]
# ------------------------------------
# 获取sheet内容【1.根据sheet索引2.根据sheet名称】
# sheet=ExcelFile.sheet_by_index(1)
sheet = ExcelFile.sheet_by_name('Sheet1')
# 打印sheet的名称,行数,列数
# print sheet.name,sheet.nrows,sheet.ncols
# 获取整行或者整列的值
# rows=sheet.row_values(2)#第三行内容
cols = sheet.col_values(1) # 第二列内容
# print cols
f = xlwt.Workbook()
sheet1 = f.add_sheet('ide', cell_overwrite_ok=True)
# 获取单元格内容
for i in range(1,350):
rapid = sheet.cell(i, 1).value.encode('utf-8')
url = rapid
req = requests.get(url)
page = req.text
# 导入 pyquery 处理
pq = pyquery.PyQuery(page)
time.sleep(1)
msf1 = pq.find('#torso > div > article > section:nth-child(13) > code')
msf2 = pq.find('#torso > div > article > section:nth-child(10) > code')
msf3 = pq.find('#torso > div > article > section:nth-child(11) > code')
msf4 = pq.find('#torso > div > article > section:nth-child(12) > code')
sheet1.write(i, 0, i)
sheet1.write(i, 1, rapid)
if msf1.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf1.text())
elif msf2.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf2.text())
elif msf3.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf3.text())
elif msf4.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf4.text())
else :
print str(i) +'Crawl failure'
f.save("1-350.xlsx")
#print rapid
# print sheet.cell_value(1,0).encode('utf-8')
# print sheet.cell(1,0).ctype
# 请求地址
print read_excel()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。