karu
/
markdown.py

import html2text
import requests
import re

from bs4 import BeautifulSoup


class MarkDown:
    def __init__(self, url):
        resp = requests.get(url)
        self.html = resp.content.decode(encoding='UTF-8')
        print(self.html)
        self.soup = BeautifulSoup(self.html, 'html.parser')
        print(self.soup.title.string)

    def to_file(self):
        self.markdownText = html2text.html2text(self.html)
        invalid_chars = '[\\\/:*?"<>|]'
        replace_char = '-'
        filename = re.sub(invalid_chars, replace_char, self.soup.title.string)
        with open(f"{filename}.md", mode="wb") as file:
            file.write(self.markdownText.encode(encoding='UTF-8'))


if __name__ == '__main__':
    url = "https://semver.org/lang/zh-CN/"
    md = MarkDown(url)
    md.to_file()