部分代码2
程序员文章站
2024-03-20 10:20:34
...
原文链接https://blog.csdn.net/qq_36097393/article/details/83574269
import re
import time
from collections import Counter
t0 = time.clock()
#!/usr/bin/env python
#-*- coding:utf-8 -*-
#author: Enoch time:2018/11/1 0001
def CountWords(file_name,stopName):
if (stopName != None):
stopflag = True
else:
stopflag = False
with open(file_name) as f:
txt = f.read()
txt = txt.lower()
if(stopflag == True):
with open(stopName) as f:
stoplist = f.readlines()
stopNum = len(stoplist)
pattern = r"[a-z][a-z0-9]*"
wordList = re.findall(pattern,txt)
totalNum = len(wordList)
tempc = Counter(wordList)
if (stopflag == True):
for word in stoplist:
word = word.replace('\n','')
del tempc[word]
dicNum = dict(tempc.most_common(10))
dicNum = sorted(dicNum.items(), key=lambda k:k[0])
dicNum = sorted(dicNum, key=lambda k:k[1], reverse=True)
t1 = time.clock()
for letter, fre in dicNum[:2]:
print("|\t{:15}|{:<11.2%}|".format(letter, fre/totalNum))
print(t1 - t0)
CountWords('../gone_with_the_wind.txt','../stopwords.txt')
上一篇: 二分查找的递归实现
下一篇: ASE加密解密算法工具类