2

程序员文章站 2022-07-15 11:22:03

...

# -*- coding: utf-8 -*-

# 导入必要的模块
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud

# 创建停用词列表
def stopwordslist(filepath):
    stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
    return stopwords

# 对句子进行jieba分词，虽然WordCloud也有分词功能，但感觉没有jieba分词的结果好
def seg_sentence(sentence):
    sentence_seged = jieba.cut(sentence.strip())
    stopwords = stopwordslist('stopwords1893.txt')  # 这里加载停用词的路径
    outstr = []
    for word in sentence_seged:
        if word not in stopwords:
            if word != '\t' and word != ' ' and word != '\n':
                outstr.append(word)
    return outstr

# 打开并逐行读取文本文档
f = open("2.txt", 'r')
lines = f.readlines()

sentence = ''
for line in lines:
    sentence = ''.join([sentence, line]) # 每行句子都连接起来

f.close()

# 输入文本得到jieba分词结果
word_result_list = seg_sentence(sentence)
# 将分词连接起来，以逗号分隔
word_result = ','.join(word_result_list)


plt.figure(figsize=(12,6))

# 中文字体的保存目录
font = r'font/SimHei.ttf'

# 词云的参数设置
wc = WordCloud(
    background_color='white', # 设置背景颜色为白色
    colormap='winter', # 设置颜色风格为'winter'
    font_path=font, # 设置中文字体
    width=1280, # 设置词云图的宽度
    height=720, # 设置词云图的高度
    max_font_size=150, # 设置字体显示的最大值
    max_words=200 # 设置最多能显示的词数
)
# 输入文本给词云做处理
wc.generate(word_result)

# 显示词云图
plt.imshow(wordcloud)

# "off"表示不显示轴坐标
plt.axis("off")
plt.show()

# 输出词云图到当前目录
wordcloud.to_file("pict_wordcloud.jpg")

上一篇：前三周笔记(linux基础与python)

下一篇：手写SpringMVC

2

搭建 structs2 环境

2.第一个ASP.NET MVC 5.0应用程序

三方移植：小米MIX 2/2S Android 10刷机包发布下载

realme X2即将发售：NFC+30W VOOC 4.0 1499元起

Linux06 文件的打包和压缩（gzip/gunzip、tar、bzip2）

等了20个月 OPPO Find系列新品Find X2下周见：2K+120Hz

余承东：P30系列销量预计超2千万华为Mate X预计年底前上市

小米10/Pro开发版火速赶来：2月17日内测

Linux命令- echo、grep 、重定向、1>&2、2>&1的介绍

B2B网站服务模式分析

2

搭建 structs2 环境

2.第一个ASP.NET MVC 5.0应用程序

三方移植：小米MIX 2/2S Android 10刷机包发布下载

realme X2即将发售：NFC+30W VOOC 4.0 1499元起

Linux06 文件的打包和压缩（gzip/gunzip、tar、bzip2）

等了20个月 OPPO Find系列新品Find X2下周见：2K+120Hz

余承东：P30系列销量预计超2千万 华为Mate X预计年底前上市

小米10/Pro开发版火速赶来：2月17日内测

Linux命令- echo、grep 、重定向、1>&2、2>&1的介绍

B2B网站服务模式分析

余承东：P30系列销量预计超2千万华为Mate X预计年底前上市