欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

云图(词云图)实现方式

程序员文章站 2022-05-27 15:17:55
...

一、项目目录

云图(词云图)实现方式

二、实现词云图2种方式word_cloud、stylecloud

word_cloud : http://amueller.github.io/word_cloud/

stylecloud :https://github.com/minimaxir/stylecloud

三、Python代码实现

import pandas as pd
import csv
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
import jieba
import re
from matplotlib import colors
import numpy as np
from PIL import Image
import stylecloud
from IPython.display import Image 

# 读取Excel表格信息并返回结果
def excel_one_line_to_list():
    data = pd.read_excel('D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\tp20200530中文.xlsx', header=1, usecols=[0], names=None)
    df_li = data.values.tolist()
    result = []
    for s_li in df_li:
        result.append(s_li[0])
    return result

# 读取cvs表格信息并返回结果
def cvs_list():
    df = pd.read_csv('D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\tencent.csv', header=None)
    text = ''
    for line in df[1].replace('comments', ''):
        text += ' '.join(jieba.cut(line, cut_all=False))
    return text


# windows 环境 
# 参考:https://github.com/Brucepk/trump_Twitter
def test0():
    result = str(excel_one_line_to_list())
    results = re.sub("[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\。\@\#\\\&\*\%]", "", result).replace('谢谢', '').replace('今天', '')
    text = ''
    for line in results:
       text += ' '.join(jieba.cut(line, cut_all=False))
    print('text:', text)
    backgroud_Image = np.array(Image.open('D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\trump_Twitter-master\\test.jpg'))   # 生成词云图的背景图片

    # 设置生成词云的参数,font_path为电脑里的字体路径,需要改成你电脑的字体路径
    wc = WordCloud(scale=32, background_color='white', mask=backgroud_Image,
                # font_path='/System/Library/Fonts/Supplemental/Songti.ttc',
                font_path='C:\\Windows\\Fonts\\simfang.ttf',
               max_words=1000, max_font_size=100, random_state=42, mode='RGB')
    wc.generate_from_text(text)
    process_word = WordCloud.process_text(wc, text)
    sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True)
    print(sort[:50])   # 打印出排名前50的词
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file("trump2020.jpg")  # 保存词云图在代码的的同一目录下
    print('生成词云成功!')

# liunx 环境
def test1():
    result = str(excel_one_line_to_list())
    results = re.sub("[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\。\@\#\\\&\*\%]", "", result).replace('谢谢', '').replace('今天', '')
    text = ''
    for line in results:
        text += ' '.join(jieba.cut(line, cut_all=False))
    print('text:', text)
    backgroud_Image = np.array(Image.open('D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\trump_Twitter-master\\test.jpg'))   # 生成词云图的背景图片

    # 设置生成词云的参数,font_path为电脑里的字体路径,需要改成你电脑的字体路径
    wc = WordCloud(scale=32, background_color='white', mask=backgroud_Image,
                   font_path='/System/Library/Fonts/Supplemental/Songti.ttc',
                   max_words=1000, max_font_size=100, random_state=42, mode='RGB')
    wc.generate_from_text(text)

    process_word = WordCloud.process_text(wc, text)
    sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True)
    print(sort[:50])   # 打印出排名前50的词
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file("trump2020.jpg")  # 保存词云图在代码的的同一目录下
    print('生成词云成功!')



# windows 环境
# 参考:https://mp.weixin.qq.com/s/B23uDxzLUa_45uDh2R9Vlw
def test2():
    files = open("D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\Trump.txt", "r", encoding='utf-8')
    texts = files.read()
    files.close()
    ls = jieba.lcut(texts)
    #print("text:", ls)
    stylecloud.gen_stylecloud(text=" ".join(ls), collocations=False,
                          # 配色  
                          palette='tableau.BlueRed_6',
                          font_path=r'‪C:\\Windows\\Fonts\\msyh.ttc',
                          # 样式 
                          icon_name='fab fa-qq',
                          size=400,
                          output_name='词云.png')
    Image(filename='词云.png')
    print('生成词云成功!')               

if (__name__ == '__main__'):
    test0()
    test1()
    test2()

四、Java代码实现

        <dependency>
            <groupId>com.kennycason</groupId>
            <artifactId>kumo-core</artifactId>
            <version>1.27</version>
        </dependency>

        <dependency>
            <groupId>com.kennycason</groupId>
            <artifactId>kumo-tokenizers</artifactId>
            <version>1.27</version>
        </dependency>
    private static void test4(){
        FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
        frequencyAnalyzer.setWordFrequenciesToReturn(600);
        frequencyAnalyzer.setMinWordLength(2);
        frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
        // 可以直接从文件中读取
        //List<WordFrequency> wordFrequencies = frequencyAnalyzer.load(getInputStream("D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\Trump.txt"));
        List<WordFrequency> wordFrequencies = new ArrayList<>();
        // 用词语来随机生成词云
        List<String> test = Arrays.asList("你好","谢谢");
        //加入分词并随机生成权重,每次生成得图片都不一样
        test.stream().forEach(e-> wordFrequencies.add(new WordFrequency(e,new Random().nextInt(test.size()))));
        //此处不设置会出现中文乱码
        java.awt.Font font = new java.awt.Font("STSong-Light", 2, 18);
        Dimension dimension = new Dimension(900, 900);
        WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
        wordCloud.setPadding(2);
        wordCloud.setBackground(new CircleBackground(255));
        wordCloud.setFontScalar(new SqrtFontScalar(12, 42));
        //设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
        wordCloud.setColorPalette(new LinearGradientColorPalette(Color.RED, Color.BLUE, Color.GREEN, 30, 30));
        wordCloud.setKumoFont(new KumoFont(font));
        wordCloud.setBackgroundColor(new Color(255, 255, 255));
        //因为我这边是生成一个圆形,这边设置圆的半径
        wordCloud.setBackground(new CircleBackground(255));
        wordCloud.build(wordFrequencies);
        //生成词云图路径
        wordCloud.writeToFile("D:\\citydo-one\\技术\\Java_Note-master\\python\\tp\\text.png");
    }

参考:

https://github.com/Brucepk/trump_Twitter
https://mp.weixin.qq.com/s/B23uDxzLUa_45uDh2R9Vlw