python每日一练
程序员文章站
2022-03-15 22:33:29
...
前言
人生苦短,我用python【2018.6.4第一更】
要求
你有一个目录,放了你一个月的日记,都是 txt,为了避免分词的问题,假设内容都是英文,请统计出你认为每篇日记最重要的词。
代码
# -*- coding:utf-8 -*-
import re
import glob
import os
def get_max_wordlist(filePath):
try:
fp = open(filePath, "r")
dictResult = {}
wordlist = []
for line in fp.readlines():
wordList = re.findall(r"[\w\d-]+", line.lower())
for li in wordList:
dictResult[li] = 1 + dictResult.get(li, 0)
# get max wordlist
result = max(dictResult.items(), key=lambda item: item[1])
for dictR in dictResult:
if dictResult[dictR] == result[1]:
wordlist.append(dictR)
# print result[len(result)-1][1]
except Exception, e:
print e.message
finally:
fp.close()
return wordlist
def count_word(path):
fileList = glob.glob(r"./diary/*.txt")
for fi in fileList:
filePath = os.path.abspath(os.getcwd()) + re.sub(r"^\.", "", fi)
result = get_max_wordlist(filePath)
print "in file {} most important words:".format(filePath)+" ".join(result)
# print "in file {} most important words:{1[0]},{1[1]},{1[2]},{1[3]}".format(filePath, result)
def main():
path = "."
count_word(path)
if __name__ == "__main__":
main()
另外学习了一下format的用法
# -*- coding:utf-8 -*-
print "{}.{}".format(12, 32)
print "{name}.{age}".format(name="jsk", age=32)
print "{:>8}.{:<8}".format(111, 222)
print "{:>8}.{:x<8}".format(111, 222)
print "{:.2f}.{:x<8}".format(33.22222222, 222)
print "{:b},{:o},{:d},{:x},{:%}".format(17, 17, 17, 17,17)
总结
睡觉了!
上一篇: python每日一练
下一篇: 如何隐藏url中的index.php