1 Star 0 Fork 1

千尺浪 / 爬虫01

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
pa.py 2.32 KB
一键复制 编辑 原始数据 按行查看 历史
千尺浪 提交于 2018-11-15 18:03 . 20181115
# -*- coding: utf-8 -*-
import xlrd
import requests
import time
import pyquery
import xlwt
def read_excel():
# 文件位置
ExcelFile = xlrd.open_workbook(u'E:/paqu/reptilian_01/MetaSploit.xlsx')
# 获取目标EXCEL文件sheet名
#print ExcelFile.sheet_names()
# ------------------------------------
# 若有多个sheet,则需要指定读取目标sheet例如读取sheet2
# sheet2_name=ExcelFile.sheet_names()[1]
# ------------------------------------
# 获取sheet内容【1.根据sheet索引2.根据sheet名称】
# sheet=ExcelFile.sheet_by_index(1)
sheet = ExcelFile.sheet_by_name('Sheet1')
# 打印sheet的名称,行数,列数
# print sheet.name,sheet.nrows,sheet.ncols
# 获取整行或者整列的值
# rows=sheet.row_values(2)#第三行内容
cols = sheet.col_values(1) # 第二列内容
# print cols
f = xlwt.Workbook()
sheet1 = f.add_sheet('ide', cell_overwrite_ok=True)
# 获取单元格内容
for i in range(1,350):
rapid = sheet.cell(i, 1).value.encode('utf-8')
url = rapid
req = requests.get(url)
page = req.text
# 导入 pyquery 处理
pq = pyquery.PyQuery(page)
time.sleep(1)
msf1 = pq.find('#torso > div > article > section:nth-child(13) > code')
msf2 = pq.find('#torso > div > article > section:nth-child(10) > code')
msf3 = pq.find('#torso > div > article > section:nth-child(11) > code')
msf4 = pq.find('#torso > div > article > section:nth-child(12) > code')
sheet1.write(i, 0, i)
sheet1.write(i, 1, rapid)
if msf1.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf1.text())
elif msf2.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf2.text())
elif msf3.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf3.text())
elif msf4.text() != "" :
print str(i) +'climbed successfully'
sheet1.write(i, 2, msf4.text())
else :
print str(i) +'Crawl failure'
f.save("1-350.xlsx")
#print rapid
# print sheet.cell_value(1,0).encode('utf-8')
# print sheet.cell(1,0).ctype
# 请求地址
print read_excel()
1
https://gitee.com/qianchilang/reptilian_01.git
git@gitee.com:qianchilang/reptilian_01.git
qianchilang
reptilian_01
爬虫01
master

搜索帮助