如何获取YouTube音乐排行榜并制作成可视化图表
程序员文章站
2022-09-21 09:35:56
通过flourish制作的可视化动态图表链接地址https://public.flourish.studio/visualisation/3585290/import datetime#1.获取要爬取的url列表URLbegin = 'https://charts.youtube.com/charts/TopSongs/global/'URLend = '?hl=zh-cn'start = datetime.date(2020,1,3)current = datetime.dat.....
Helium实现网页自动化,通过flourish制作的可视化动态图表链接地址https://public.flourish.studio/visualisation/3585290/
import datetime #1.获取要爬取的url列表 URLbegin = 'https://charts.youtube.com/charts/TopSongs/global/' URLend = '?hl=zh-cn' start = datetime.date(2020,1,3) current = datetime.date.today() urllist = [] while start+datetime.timedelta(days=6)<=current: end = start+datetime.timedelta(days=6) url = URLbegin+start.__format__('%Y%m%d')+'-'+end.__format__('%Y%m%d')+URLend urllist.append(url) start = end+datetime.timedelta(days=1) # print(urllist) #2.运用自动化工具爬取url列表 from helium import * driver = start_chrome() for url in urllist: go_to(str(url)) click(Button('Download charts.'))
import os import pandas as pd import re #3.运行bat文件修改所有文件后缀名 #4.拼接爬取url下载的列表文件 inputfile_dir='D:/GOOGLE DONMLOAD/' outputfile='D:/GOOGLE DONMLOAD/alldata.csv' for inputfile in os.listdir(inputfile_dir): # print(inputfile) # 添加周排行日期列youtube-charts-top-songs-global-weekly-xxxx-xx-xx.txt # week_date=re.search('\\d{4}[-]\\d{2}[-]\\d{2}',inputfile) # print(re.search('\\d{4}[-]\\d{2}[-]\\d{2}',inputfile)) # print(week_date) week_date=inputfile[-14:-4] data=pd.read_csv(inputfile_dir+inputfile) # print(data.columns) data['Week Date']=week_date print(data.columns) data=data.to_csv(outputfile,mode='a',index=False) #5.去掉重复表头 newoutputfile='D:/GOOGLE DONMLOAD/alldata_new.csv' # 通过df.drop_duplicates()去重对读入的表头只能去掉一个,作为表头的那一行不会进行匹配 pd.read_csv(outputfile,header=None).drop_duplicates().to_csv(newoutputfile,index=False,header=False)
import pandas as pd def f(x): return (x.pivot_table(index = ['Track Name'], columns = ['Week Date'],values = ['Views'])) # 6.将表格内容处理成可视化需要的表格形式,最后通过flourish网站辅助生成可视化图表 parsedfile='D:/GOOGLE DONMLOAD/parseddata.csv' pd.read_csv('D:/GOOGLE DONMLOAD/finaldataset.csv').groupby(['Track Name']).apply(f).to_csv(parsedfile)
本文地址:https://blog.csdn.net/qq_38920368/article/details/108269043
上一篇: java递归之return的处理
下一篇: 数据分析必备的数据预处理操作