python微信好友数据分析详解
程序员文章站
2023-11-22 09:34:10
基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件st...
基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionwords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行。
#wxfriends.py 2018-07-09 import itchat import sys import pandas as pd import matplotlib.pyplot as plt plt.rcparams['font.sans-serif']=['simhei']#绘图时可以显示中文 plt.rcparams['axes.unicode_minus']=false#绘图时可以显示中文 import jieba import jieba.posseg as pseg from scipy.misc import imread from wordcloud import wordcloud from os import path #解决编码问题 non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd) #获取好友信息 def getfriends(): friends = itchat.get_friends(update=true)[0:] flists = [] for i in friends: fdict={} fdict['nickname']=i['nickname'].translate(non_bmp_map) if i['sex'] == 1: fdict['sex']='男' elif i['sex'] == 2: fdict['sex']='女' else: fdict['sex']='雌雄同体' if i['province'] == '': fdict['province'] ='未知' else: fdict['province']=i['province'] fdict['city']=i['city'] fdict['signature']=i['signature'] flists.append(fdict) return flists #将好友信息保存成csv def savecsv(lists): df = pd.dataframe(lists) try: df.to_csv("wxfriends.csv",index = true,encoding='gb18030') except exception as ret: print(ret) return df #统计性别、省份字段 def anysys(df): df_sex = pd.dataframe(df['sex'].value_counts()) df_province = pd.dataframe(df['province'].value_counts()[:15]) df_signature = pd.dataframe(df['signature']) return df_sex,df_province,df_signature #绘制柱状图,并保存 def draw_chart(df_list,x_feature): try: x = list(df_list.index) ylist = df_list.values y = [] for i in ylist : for j in i: y.append(j) plt.bar(x,y,label=x_feature) plt.legend() plt.savefig(x_feature) plt.close() except: print("绘图失败") #解析取个性签名构成列表 def getsignlist(signature): sig_list = [] for i in signature.values: for j in i: sig_list.append(j.translate(non_bmp_map)) return sig_list #分词处理,并根据需要填写停用词、自定义词、合并词替换 def segmentwords(txtlist): stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8')) newslist = [] #新增自定义词 jieba.load_userdict("newdit.txt") for subject in txtlist: if subject.isspace(): continue word_list = pseg.cut(subject) for word, flag in word_list: if not word in stop_words and flag == 'n' or flag == 'eng' and word !='span' and word !='class': newslist.append(word) #合并指定的相似词 for line in open('unionwords.txt', encoding='utf-8'): newline = line.encode('utf-8').decode('utf-8-sig') #解决\ufeff问题 unionlist = newline.split("*") for j in range(1,len(unionlist)): #worddict[unionlist[0]] += worddict.pop(unionlist[j],0) for index,value in enumerate(newslist): if value == unionlist[j]: newslist[index] = unionlist[0] return newslist #高频词统计 def countwords(newslist): worddict = {} for item in newslist: worddict[item] = worddict.get(item,0) + 1 itemlist = list(worddict.items()) itemlist.sort(key=lambda x:x[1],reverse=true) for i in range(100): word, count = itemlist[i] print("{}:{}".format(word,count)) #绘制词云 def drawplant(newslist): d = path.dirname(__file__) mask_image = imread(path.join(d, "timg.png")) content = ' '.join(newslist) wordcloud = wordcloud(font_path='simhei.ttf', background_color="white",width=1300,height=620, max_words=200).generate(content) #mask=mask_image, # display the generated image: plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file('wordcloud.jpg') plt.show() def main(): #登陆微信 itchat.auto_login() # 登陆后不需要扫码 hotreload=true flists = getfriends() fdf = savecsv(flists) df_sex,df_province,df_signature = anysys(fdf) draw_chart(df_sex,"性别") draw_chart(df_province,"省份") wordlist = segmentwords(getsignlist(df_signature)) countwords(wordlist) drawplant(wordlist) main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
上一篇: 深入理解PHP变量的值类型和引用类型
下一篇: js实现分页功能