3 Star 8 Fork 1

冰封飞飞 / 计算机英语词频统计

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
statistics.py 6.81 KB
一键复制 编辑 原始数据 按行查看 历史
'''
统计数据序列化模块
'''
import pickle
import sys
import os
import time
import asyncio
import functools
from log import log
from utils import valuleCheck, statfilelock
import settings
import datastore
from model.models import modelLink, modelHtmlText, modelWordCount, modelDoneLink
RETURN_TO_LINE_HEAD = '\b' * 300
logging = log(filename=settings.STAT_LOG_FILE, level=log.INFO)
class subStat:
'''
不同站点的统计类
wordcount 已经统计的单词总数量
preAnalyzeLinkcount 待分析的链接数量
doneLinkCount 已经分析好的链接数量
'''
__slots__ = ['wordCount', 'preAnalyzeLinkcount', 'preAnalyzeTextCount', 'doneLinkCount']
def __init__(self):
self.wordCount = 0
self.preAnalyzeLinkcount = 0
self.doneLinkCount = 0
self.preAnalyzeTextCount = 0
def __str__(self):
return '分析单词总量:{}, 待分析链接:{}, 待分析文章:{}, 完成分析链接:{}'.format(self.wordCount,
self.preAnalyzeLinkcount, self.preAnalyzeTextCount, self.doneLinkCount)
class statistics:
'''
统计类,wordcount:总共分析的单词数量
linkcount:总共分析的链接数量
'''
__slots__ = ['transNum', 'summary', 'wiki']
def __init__(self):
self.summary = subStat()
self.wiki = subStat()
self.transNum = 0
def __str__(self):
return '总计=[{},已翻译:{}] wikipedia=[{}]'.format(self.summary, self.transNum, self.wiki)
def __initDataFromDb() -> statistics:
'''
如果不存在统计文件,从数据库中已经保存的数据初始化统计数据
'''
data = statistics()
ds = datastore.datastore()
data.summary.preAnalyzeLinkcount = data.wiki.preAnalyzeLinkcount = ds.count(modelLink)
data.summary.preAnalyzeTextCount = data.wiki.preAnalyzeTextCount = ds.count(modelHtmlText)
data.summary.doneLinkCount = data.wiki.doneLinkCount = ds.count(modelDoneLink)
words = ds.all(modelWordCount)
wordCnt = 0
doneTransCnt = 0
for w in words:
wordCnt += w.count
if w.trans:
doneTransCnt += 1
data.summary.wordCount = data.wiki.wordCount = wordCnt
data.transNum = doneTransCnt
with open(settings.STAT_FILE, 'wb+') as f:
pickle.dump(data, f)
logging.info('__initDataFromDb() {}'.format(data))
return data
def __getData() -> statistics:
try:
with open(settings.STAT_FILE, 'rb') as f:
data = pickle.load(f)
except Exception as e:
logging.error('get data {}'.format(e))
data = __initDataFromDb()
logging.info('get data {}'.format(data))
return data
def __clearData():
try:
logging.info('clear data()')
os.remove(settings.STAT_FILE)
except FileNotFoundError:
pass
@statfilelock
@valuleCheck
def addwikiWordCount(num:'新增的统计'):
'''
新增wikipedia单词数量统计
'''
data = __getData()
with open(settings.STAT_FILE, 'wb+') as f:
data.wiki.wordCount += num
data.summary.wordCount += num
pickle.dump(data, f)
def __processwikiPreAnalyzeLinkCount(num):
'''
处理wikipedia待分析链接数量
'''
data = __getData()
with open(settings.STAT_FILE, 'wb+') as f:
data.wiki.preAnalyzeLinkcount += num
data.summary.preAnalyzeLinkcount += num
pickle.dump(data, f)
@statfilelock
@valuleCheck
def addwikiPreAnalyzeLinkCount(num:'新增的统计'):
'''
新增wikipedia待分析链接数量
'''
__processwikiPreAnalyzeLinkCount(num)
@statfilelock
@valuleCheck
def decwikiPreAnalyzeLinkCount(num:'减少的统计'):
'''
减少wikipedia待分析链接的数量
'''
__processwikiPreAnalyzeLinkCount(-num)
@statfilelock
def getwikiPreAnalyzeLinkCount() -> int:
'''
获取wikipedia待分析链接的数量
'''
data = __getData()
return data.wiki.preAnalyzeLinkcount
def __processwikiPreHtmlCount(num):
'''
处理wikipedia待分析文本的数量
'''
data = __getData()
with open(settings.STAT_FILE, 'wb+') as f:
data.wiki.preAnalyzeTextCount += num
data.summary.preAnalyzeTextCount += num
pickle.dump(data, f)
@statfilelock
@valuleCheck
def addwikiPreAnalyzeHtmlCount(num:'新增的统计'):
'''
新增wikipedia待分析文本数量
'''
__processwikiPreHtmlCount(num)
@statfilelock
@valuleCheck
def decwikiPreAnalyzeHtmlCount(num:'减少的统计'):
'''
减少wikipedia待分析文本数量
'''
__processwikiPreHtmlCount(-num)
@statfilelock
def getwikiPreAnalyzeHtmlCount() -> int:
'''
获取wikipedia待分析文本的数量
'''
data = __getData()
return data.wiki.preAnalyzeTextCount
def __processwikiDoneLineCount(num):
data = __getData()
with open(settings.STAT_FILE, 'wb+') as f:
data.wiki.doneLinkCount += num
data.summary.doneLinkCount += num
pickle.dump(data, f)
@statfilelock
@valuleCheck
def addwikiDoneLinkCount(num:'新增的统计'):
'''
新增wikipedia分析完成链接数量
'''
__processwikiDoneLineCount(num)
@statfilelock
@valuleCheck
def decwikiDoneLinkCount(num:'减少的统计'):
'''
减少wikipedia分析完成链接数量
'''
__processwikiDoneLineCount(-num)
@statfilelock
def addTransNum(num:'新增的翻译单词数量'):
'''
添加翻译的单词数量
'''
data = __getData()
with open(settings.STAT_FILE, 'wb+') as f:
data.transNum += num
pickle.dump(data, f)
@statfilelock
def showstat():
'''
显示统计
'''
data = __getData()
print(data)
async def asyncShowstat():
'''
显示统计的协程,每隔1S往控制台打印统计信息。
'''
while True:
data = __getData()
print(data, end='')
print(RETURN_TO_LINE_HEAD, end='') #光标回行首
await asyncio.sleep(0.5)
def syncShowstat():
'''
显示统计的协程,每隔1S往控制台打印统计信息。
'''
while True:
try:
data = __getData()
print(RETURN_TO_LINE_HEAD, end='') #光标回行首
print(data, end='')
sys.stdout.flush()
time.sleep(2)
except Exception as e:
logging.error("syncShowstat error {}".format(e))
@statfilelock
def getWordCount():
'''
获取一共分析的单词数量
'''
data = __getData()
return data.summary.wordCount
def main():
print (__initDataFromDb())
#syncShowstat()
#showstat()
'''
__clearData()
showstat()
addwikiWordCount(100)
addwikiPreAnylyzeLinkCount(20)
addwikiDoneLinkCount(333)
showstat()
asyncio.run(asyncShowstat())
'''
if __name__ == '__main__':
main()
Python
1
https://gitee.com/bingfengfeifei/wordCount.git
git@gitee.com:bingfengfeifei/wordCount.git
bingfengfeifei
wordCount
计算机英语词频统计
master

搜索帮助