欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python绘图

程序员文章站 2022-03-29 16:08:19
...

一、绘制词云图并统计词频

from wordcloud import WordCloud
import matplotlib.pyplot as plt
import jieba
import csv
import matplotlib.colors as colors
with open(".\\data\\百度文库爬取内容01.txt","r",encoding="utf-8") as f:
    txt=f.read()#返回一个字符串
#自定义颜色
color=colors.ListedColormap(["#000000","#00FF00","#0000FF","#FF0000"])
wd = WordCloudloud(width=1000,#指定词云对象生成图片的宽度,默认400像素
    font_path="C:\Windows\Fonts\simkai.ttf",  #设置字体格式,不然会乱码
    background_color="black",  #设置背景颜色
    mask=plt.imread(".\\data\\background_image.jpeg "),#设置背景图,默认是矩形
              height=700, #指定词云图片的背景颜色,默认为黑色
               stopwords="I a",#指定词云的排除词列表,即不显示的单词列表
               #colormap=color
               )
wd.generate(" ".join(jieba.lcut(txt)))#以空格为分隔符将列表中的所有元素合并成一个新的字符串
print("".join(jieba.lcut(txt)))
#保存词云图
wd.to_file('.\\data\\zhu.png')
#显示词云图
plt.imshow(wd,interpolation="bilinear")
plt.axis("off")
plt.show()
#
# #####################################################
# 使用可以使用sklearn中的CountVectorizer统计词频
from sklearn.feature_extraction.text import CountVectorizer
# 使用CountVectorizer统计词频
cv = CountVectorizer()
contents_count = cv.fit_transform([" ".join(jieba.lcut(txt))])
# 词有哪些
list1 = cv.get_feature_names()
# 词的频率
list2 = contents_count.toarray().tolist()[0]
# 将词与频率一一对应
contents_dict = dict(zip(list1, list2))
# 输出csv文件,newline="",解决输出的csv隔行问题
with open(".\\data\\caifu_output.csv", 'w', newline="") as f:
    writer = csv.writer(f)
    for key, value in contents_dict.items():
        writer.writerow([key, value])
#
# ###############################################################
# #统计单词出现的频率
import csv
import jieba
import re
with open(".\\data\\百度文库爬取内容01.txt","r",encoding="utf-8") as f:
    txt=f.readlines()
dic={}
for line in txt:
    txt=re.sub("[^\u4e00-\u9fa5^a-z^A-Z^0-9]","",line)
    words=jieba.lcut(line)
    for word in words:
        if word in dic.keys():
            dic[word]+=1
        else:
            dic[word]=1
with open(".\\data\\词频统计.csv","w",newline="",encoding='utf-8') as file:
    writer=csv.writer(file)
    for key,value in dic.items():
        writer.writerow([key,value])
##############################




相关标签: Python