Python实战之实现获取动态图表
程序员文章站
2022-06-23 13:34:49
目录前言开发工具环境搭建百度指数微博指数结果展示前言利用python实现获取动态图表,废话不多说~让我们愉快地开始吧~开发工具python版本: 3.6.4相关模块:re模块;requests模块;u...
前言
利用python实现获取动态图表,废话不多说~
让我们愉快地开始吧~
开发工具
python版本: 3.6.4
相关模块:
re模块;
requests模块;
urllib模块;
pandas模块;
以及一些python自带的模块。
环境搭建
安装python并添加到环境变量,pip安装需要的相关模块即可。
看一下b站2019年「数据可视化」版块的情况,第一个视频超2百万的播放量,4万+的弹幕
百度指数
获取百度指数,首先需要登陆你的百度账号
以关键词「王者荣耀」为例,时间自定义为2020-10-01~2020-10-10
通过开发者工具,我们就能看到曲线图的数据接口
然而一看请求得到的结果,发现并没有数据,原因是这里使用了js加密
找到解决方法,成功实现爬取,代码实现
import time import json import execjs import datetime import requests from urllib.parse import urlencode def get_data(keywords, startdate, enddate, area): """ 获取加密的参数数据 """ # data_url = "http://index.baidu.com/api/searchapi/index?area=0&word=[[%7b%22name%22:%22%e7%8e%8b%e8%80%85%e8%8d%a3%e8%80%80%22,%22wordtype%22:1%7d]]&startdate=2020-10-01&enddate=2020-10-10" params = { 'word': json.dumps([[{'name': keyword, 'wordtype': 1}] for keyword in keywords]), 'startdate': startdate, 'enddate': enddate, 'area': area } data_url = 'http://index.baidu.com/api/searchapi/index?' + urlencode(params) # print(data_url) headers = { # 复制登录后的cookie "cookie": '你的cookie', "referer": "http://index.baidu.com/v2/main/index.html", "user-agent": "mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/77.0.3865.90 safari/537.36" } # 获取data和uniqid res = requests.get(url=data_url, headers=headers).json() data = res["data"]["userindexes"][0]["all"]["data"] uniqid = res["data"]["uniqid"] # 获取js函数中的参数t = "ev-fxk9t8v1lwal6,51348+.9270-%" t_url = "http://index.baidu.com/interface/ptbk?uniqid={}".format(uniqid) rep = requests.get(url=t_url, headers=headers).json() t = rep["data"] return {"data": data, "t": t} def get_search_index(word, startdate, enddate, area): """ 获取最终数据 """ word = word startdate = startdate enddate = enddate # 调用get_data获取data和uniqid res = get_data(word, startdate, enddate, area) e = res["data"] t = res["t"] # 读取js文件 with open('parsing_data_function.js', encoding='utf-8') as f: js = f.read() # 通过compile命令转成一个js对象 docjs = execjs.compile(js) # 调用function方法,得到指数数值 res = docjs.call('decrypt', t, e) # print(res) return res def get_date_list(begin_date, end_date): """ 获取时间列表 """ dates = [] dt = datetime.datetime.strptime(begin_date, "%y-%m-%d") date = begin_date[:] while date <= end_date: dates.append(date) dt += datetime.timedelta(days=1) date = dt.strftime("%y-%m-%d") return dates def get_area(): areas = {"901": "山东", "902": "贵州", "903": "江西", "904": "重庆", "905": "内蒙古", "906": "湖北", "907": "辽宁", "908": "湖南", "909": "福建", "910": "上海", "911": "北京", "912": "广西", "913": "广东", "914": "四川", "915": "云南", "916": "江苏", "917": "浙江", "918": "青海", "919": "宁夏", "920": "河北", "921": "黑龙江", "922": "吉林", "923": "天津", "924": "陕西", "925": "甘肃", "926": "*", "927": "河南", "928": "安徽", "929": "山西", "930": "海南", "931": "*", "932": "*", "933": "香港", "934": "澳门"} for value in areas.keys(): try: word = ['王者荣耀'] time.sleep(1) startdate = '2020-10-01' enddate = '2020-10-10' area = value res = get_search_index(word, startdate, enddate, area) result = res.split(',') dates = get_date_list(startdate, enddate) for num, date in zip(result, dates): print(areas[value], num, date) with open('area.csv', 'a+', encoding='utf-8') as f: f.write(areas[value] + ',' + str(num) + ',' + date + '\n') except: pass def get_word(): words = ['诸葛大力', '张伟', '胡一菲', '吕子乔', '陈美嘉', '赵海棠', '咖喱酱', '曾小贤', '秦羽墨'] for word in words: try: time.sleep(2) startdate = '2020-10-01' enddate = '2020-10-10' area = 0 res = get_search_index(word, startdate, enddate, area) result = res.split(',') dates = get_date_list(startdate, enddate) for num, date in zip(result, dates): print(word, num, date) with open('word.csv', 'a+', encoding='utf-8') as f: f.write(word + ',' + str(num) + ',' + date + '\n') except: pass get_area() get_word()
得到的csv文件结果如下,有两种形式的数据
一种是多个关键词每日指数数据,另一种是一个关键词各省市每日指数数据
有了数据就可以用python制作动图
import pandas as pd import bar_chart_race as bcr # 读取数据 # df = pd.read_csv('word.csv', encoding='utf-8', header=none, names=['name', 'number', 'day']) df = pd.read_csv('area.csv', encoding='utf-8', header=none, names=['name', 'number', 'day']) # 数据处理,数据透视表 df_result = pd.pivot_table(df, values='number', index=['day'], columns=['name'], fill_value=0) # 生成gif # bcr.bar_chart_race(df_result, filename='word.gif', title='爱情公寓5演职人员热度排行') bcr.bar_chart_race(df_result, filename='area.gif', title='国内各省市王者荣耀热度排行')
5行python代码,看看实现的效果
微博指数
百度搜索新浪的微博指数,打开网站一看,发现网页版无法使用
我们只需打开开发者工具,将你的浏览器模拟为手机端,刷新网页即可
可以看到,微指数的界面出来了
添加关键词,查看指数的数据接口
请求是post方法,并且不需要登陆微博账号
import re import time import json import requests import datetime # 请求头信息 headers = """accept: application/json accept-encoding: gzip, deflate, br accept-language: zh-cn,zh;q=0.9 content-length: 50 content-type: application/x-www-form-urlencoded cookie: '你的cookie' origin: https://data.weibo.com referer: https://data.weibo.com/index/newindex?visit_type=trend&wid=1011224685661 sec-fetch-mode: cors sec-fetch-site: same-origin user-agent: mozilla/5.0 (iphone; cpu iphone os 11_0 like mac os x) applewebkit/604.1.38 (khtml, like gecko) version/11.0 mobile/15a372 safari/604.1 x-requested-with: xmlhttprequest""" # 将请求头字符串转化为字典 headers = dict([line.split(": ",1) for line in headers.split("\n")]) print(headers) # 数据接口 url = 'https://data.weibo.com/index/ajax/newindex/getchartdata' # 获取时间列表 def get_date_list(begin_date, end_date): dates = [] dt = datetime.datetime.strptime(begin_date, "%y-%m-%d") date = begin_date[:] while date <= end_date: dates.append(date) dt += datetime.timedelta(days=1) date = dt.strftime("%y-%m-%d") return dates # 相关信息 names = ['汤唯', '朱亚文', '邓家佳', '乔振宇', '王学圻', '张艺兴', '俞灏明', '吴越', '梁冠华', '李昕亮', '苏可', '孙骁骁', '赵韩樱子', '孙耀琦', '魏巍'] # 获取微指数数据 for name in names: try: # 获取关键词id url_id = 'https://data.weibo.com/index/ajax/newindex/searchword' data_id = { 'word': name } html_id = requests.post(url=url_id, data=data_id, headers=headers) pattern = re.compile(r'li wid=\\\"(.*?)\\\" word') id = pattern.findall(html_id.text)[0] # 接口参数 data = { 'wid': id, 'dategroup': '1month' } time.sleep(2) # 请求数据 html = requests.post(url=url, data=data, headers=headers) result = json.loads(html.text) # 处理数据 if result['data']: values = result['data'][0]['trend']['s'] startdate = '2019-01-01' enddate = '2020-01-01' dates = result['data'][0]['trend']['x'] # 保存数据 for value, date in zip(values, dates): print(name, value, date) with open('weibo.csv', 'a+', encoding='utf-8') as f: f.write(name + ',' + str(value) + ',' + date + '\n') except: pass
获取到的信息
也来生成一个动态图表
import pandas as pd import bar_chart_race as bcr # 读取数据 df = pd.read_csv('weibo.csv', encoding='utf-8', header=none, names=['name', 'number', 'day']) # 数据处理,数据透视表 df_result = pd.pivot_table(df, values='number', index=['day'], columns=['name'], fill_value=0) # print(df_result[:10]) # 生成gif bcr.bar_chart_race(df_result[:10], filename='weibo.gif', title='大明风华演职人员热度排行')
结果展示
有喜欢可以尝试动手试试哦~
以上就是python实战之实现获取动态图表的详细内容,更多关于python获取动态图表的资料请关注其它相关文章!