ebook_downloader
/
Grad_design_last_two.py

# -*- coding: utf-8 -*-
# @Time    : 2018/12/6 15:50
# @Author  : wjh
# @File    : Grad_design_last_two.py
import requests
import pdfkit
import time
import os
import re
import sys

from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
from lxml import etree
from pyquery import PyQuery as pq


from BS import Ui_MainWindow


class GradDesign(QMainWindow, Ui_MainWindow):
    def __init__(self):
        super().__init__()
        self.setupUi(self)
        # 设置程序图标
        self.setWindowIcon(QIcon(r'.\image\title_icon.ico'))
        # 设置按钮图标
        self.search.setIcon(QIcon(QPixmap('./image/search.ico')))
        self.download.setIcon(QIcon(QPixmap('./image/download.ico')))
        # 设置标题
        self.setWindowTitle('小说下载器')
        # 设置窗口居中显示
        screen = QDesktopWidget().screenGeometry()
        size = self.geometry()
        self.move((screen.width() - size.width()) / 2, (screen.height() - size.height()) / 2)
        # 禁止改变窗口大小
        self.setFixedSize(size.width(), size.height())
        # 设置气泡消息
        QToolTip.setFont(QFont('SansSerif', 10))
        self.txt.setToolTip('以<b>txt</b>格式下载保存')
        self.pdf.setToolTip('以<b>pdf</b>格式下载保存')
        self.search.setToolTip('快捷键<b>Alt + s</b>')
        self.download.setToolTip('快捷键<b>Alt + d</b>')
        self.input.setPlaceholderText('例如：斗破苍穹')
        # 设置多选pdf选项为默认选项
        self.txt.setChecked(True)

        # 核心代码开始
        self.url = 'https://www.readnovel.com/search?'
        self.b = '\n请输入相应序号并点击下载<Alt+d>' + '\n'
        self.dic_href_1 = {}
        self.dic_href_2 = {}

    def search_name(self):
        # self.input.setEnabled(True)
        name = self.input.text()
        if name == '':
            self.out.addItem('请输入电子书关键字')
        date = {
            'kw': name
        }
        try:
            # 捕获搜索的初页面错误
            resp_note = requests.get(self.url, params=date, timeout=8)
            if resp_note.status_code != 200:
                self.out.addItem('{{{(>_<)}}}搜索出错，请检查网络')
            else:
                code = etree.HTML(resp_note.text)
                doc = pq(resp_note.text)
                title_hrefs = code.xpath('//div[@class="book-mid-info"]/h4/a/@href')
                title_hrefs = ['https:' + title_href for title_href in title_hrefs]
                title_names = doc('.book-mid-info h4').text().split(' ')
                # 没有结果则重新搜索
                if len(title_names) == 0:
                    self.out.addItem(f'未找到小说《{name}》请重新搜索')
                # 返回给函数
                self.find_book(title_hrefs, title_names)
        except Exception:
            return '连接出现问题，请检查网络'

    def find_book(self, note_href, note_title):
        # 获取搜索出的小说数量
        numb = len(note_href)
        # 搜索出了一本书
        if numb == 1:
            # 创建可匹配的键值对
            dic_name = {}
            dic_name[1] = note_title
            self.dic_href_1[1] = note_href[0]
            a = str(dic_name) + '\n'
            b = '\n请选择格式点击下载'
            result = a + b
            self.out.addItem(result)
            # 清空输入框
            self.input.setText('')
            self.input.setPlaceholderText('输入需要下载的电子书')

        # 搜索出了多本
        if numb > 1:
            dic_name = {}
            for num in range(1, numb + 1):
                dic_name[num] = str(note_title[num - 1])
            for num in range(1, numb + 1):
                self.dic_href_2[num] = note_href[num - 1]
            # 展示搜索结果以方便选择
            a = ''
            for i in list(dic_name.items()):
                a += str(i).replace(',', ' - ').replace('(', '').replace(')', '').replace("'", '') + '\n'
            result = a + self.b
            self.out.addItem(result)
            # 清空输入框
            self.input.setText('')
            self.input.setPlaceholderText('输入需要下载的电子书')


    def download_book(self):
        try:
            if len(self.dic_href_1) == 1: # 搜出来一本书
                self.get_read(list(self.dic_href_1.values())[0])

            elif len(self.dic_href_2) > 1 : # 搜出来多本书
                choice_num = self.input.text()
                if type(int(choice_num)) == type(123) and int(choice_num) < len(self.dic_href_2) + 1:
                    self.get_read(list(self.dic_href_2.values())[int(choice_num) - 1])
                else:
                    QMessageBox.about(self, '(ノ｀Д)ノ', '没有这个选项！')

            else:# len(self.dic_href_1) == 0 and len(self.dic_href_2) == 0:
                QMessageBox.about(self, '(￣m￣）', '请先搜索小说再下载！')
        except:
            QMessageBox.about(self, '(ノ｀Д)ノ', '没有这个选项！')

    """获取所搜索小说的每一章节的链接"""
    def get_read(self, note_href):
            """获取每一章节的链接和标题"""
            # 获取章节页面内容
            resp = requests.get(note_href, timeout=8)
            if resp.status_code != 200:
                QMessageBox.about(self, '{{{(>_<)}}}搜索出错', '网站出错啦！')
            else:
                text = etree.HTML(resp.text)
                # 提取每一章节的链接
                chapter_hrefs = text.xpath(
                    '//div[@class="volume-wrap"]//div/ul/li/a/@href')  # list
                page_href = ['https:' + chapter_href for chapter_href in
                             chapter_hrefs]
                # 获取每一章节的标题
                page_name = text.xpath(
                    '//div[@class="volume-wrap"]//div/ul/li/a/text()')  # list
                # 提取小说名字
                note_title = text.xpath('//h1/em/text()')[0]  # str  # str

                # 选择保存方式
                if self.txt.isChecked() == True:
                    self.download_txt(page_href, page_name, note_title)
                if self.pdf.isChecked() == True:
                    self.download_pdf(page_href, page_name, note_title)

    """下载pdf文件"""
    def download_pdf(self, page_href, page_name, note_title):
        # 定义pdf生成器配置
        config = pdfkit.configuration(
            wkhtmltopdf=r"wkhtmltopdf.exe")
        # 如果文件夹不存在
        if not os.path.exists(note_title + 'pdf文件'):
            os.mkdir(note_title + 'pdf文件')  # 创建文件夹
            os.chdir(note_title + 'pdf文件')  # 进入文件夹
        else:
            os.chdir(note_title + 'pdf文件')
        # 创建一个字典进行目录检索
        dic_info = {}
        # 匹配数据
        result = list(zip(page_href, page_name))
        # 完善待检索信息
        for i in result:
            dic_info[i[1] + '.pdf'] = i[0]
        # 判断章节是否存在，存在则删除字典的相应信息，不存在则保留
        for title in list(dic_info.keys()):
            if not os.path.exists(title):
                continue
            else:
                self.out.addItem(f'检测到文件存在，已为您跳过此下载\t{title}')
                QApplication.processEvents()
                time.sleep(0.2)
                dic_info.pop(title)
        # 对字典存在信息，即目录中不存在的小说进行下载
        for down in dic_info.items():
            try:
                # 生成pdf文件
                pdfkit.from_url(down[1], down[0], configuration=config)
                self.out.addItem('%s\t下载成功' % down[0])
                QApplication.processEvents()
                time.sleep(1)
            except Exception:
                self.out.addItem(f'{down[0]}\t下载出错')

        # 跳出小说下载的文件夹
        os.chdir(os.path.dirname(__file__))
        return None

    """下载txt文件"""
    def download_txt(self, page_href, page_name, note_title):
        # 如果文件夹不存在
        if not os.path.exists(note_title + 'txt文件'):
            os.mkdir(note_title + 'txt文件')  # 创建文件夹
            os.chdir(note_title + 'txt文件')  # 进入文件夹
        else:
            os.chdir(note_title + 'txt文件')
        # 创建一个字典进行目录检索
        dic_info = {}
        # 匹配数据
        result = list(zip(page_href, page_name))
        # 完善待检索信息
        for i in result:
            dic_info[i[1] + '.txt'] = i[0]
        # 判断章节是否存在，存在则删除字典的相应信息，不存在则保留
        for title in list(dic_info.keys()):
            if not os.path.exists(title):
                continue
            else:
                self.out.addItem(f'检测到文件存在，已为您跳过此下载\t{title}')
                QApplication.processEvents()
                time.sleep(0.2)
                dic_info.pop(title)

        # 对字典存在信息，即目录中不存在的小说进行下载
        for down in dic_info.items():  # txt:href
            # 生成txt文件
            date = self.get_text(down[1])
            try:
                with open(down[0], 'w+', encoding='utf-8') as f:
                    f.write(date)
                self.out.addItem(f'下载成功\t{down[0]}')
                QApplication.processEvents()
                time.sleep(1)
            except Exception:
                self.out.addItem(f'{down[0]}\t下载出错')
        self.out.addItem('下载完成')
        # 跳出小说下载的文件夹
        os.chdir(os.path.dirname(__file__))
        return None

    """获取小说文字内容"""
    def get_text(self, href):
        try:
            resp = requests.get(href, timeout=8)
        except Exception as e:
            print(e)
            return None
        doc = pq(resp.text)
        cont = doc('.text-wrap').text().replace('\ue60c', '').replace('\ue650', '').replace('\ue64f', '').replace('\ue653', '').replace('\ue657举报', '')
        if cont:
            return cont
        else:
            QMessageBox.about(self, '{{{(>_<)}}}小说没有内容')
            return None

if __name__ == '__main__':
    app = QApplication(sys.argv)
    win = GradDesign()
    palette = QPalette()
    palette.setBrush(QPalette.Background, QBrush(QPixmap("./image/background .jpg")))
    win.setPalette(palette)
    qssStyle = '''
            QPushButton{
                background-color: #e5fdff
            }
        '''
    win.setStyleSheet(qssStyle)
    win.show()
    sys.exit(app.exec_())