python 获取qq音乐热歌新歌排行榜信息
程序员文章站
2023-02-17 07:58:49
from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.by import Byimport...
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re
class main(object):
def __init__(self):
self.chrome_options = Options()
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--disable-gpu')
chrome_driver = "D:\\soft\\py3\\chromedriver.exe" #改成自己驱动的目录
self.header = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-language": "zh-CN,zh;q=0.9",
"referer": "https://y.qq.com/n/yqq/toplist/27.html",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
def query_music(self,url):
self.driver = webdriver.Chrome(self.chrome_driver, options=self.chrome_options)
self.driver.get(url)
WebDriverWait(self.driver, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "songlist__songname_txt")))
lists = self.driver.find_elements_by_class_name("songlist__songname_txt")
pattern = re.compile(r"https://y.qq.com/n/yqq/song/(\S+).html") # 取出每首歌的具体链接
url = []
for i in range(len(lists)):
li = lists.__getitem__(i)
a = li.find_element_by_class_name("js_song")
href = a.get_attribute("href")
m = pattern.match(href)
url.append(m.string)
self.driver.close()
return url
def list_music(self,url):
with open("./sing.json","w",encoding="utf-8") as fw:
self.driver = webdriver.Chrome(self.chrome_driver, options=self.chrome_options)
for u in self.query_music(url):
self.driver.get(u)
f = self.driver.find_element_by_class_name("data__name_txt")
f = f.text #歌曲名
g = self.driver.find_element_by_class_name("data__singer")
g = g.text#歌手
WebDriverWait(self.driver, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "data__info")))
lists = self.driver.find_elements_by_class_name("data__info")
for i in range(len(lists)):
li = lists.__getitem__(i)
a = li.find_element_by_class_name("js_album")
#专辑
a = a.text
b = li.find_element_by_class_name("js_lan")
#语种
b = b.text
c = li.find_element_by_class_name("js_genre")
#流派
c = c.text
d = li.find_element_by_class_name("js_company")
#唱片公司
d = d.text
e = li.find_element_by_class_name("js_public_time")
#发行时间
e = e.text
fw.write("{}".format(f + "\n" + g + "\n" +a +"\n"+ b+"\n" + c+"\n" + d+"\n" + e + "\n\n"))
self.driver.close()
if __name__ == '__main__':
url = "https://y.qq.com/n/yqq/toplist/27.html#stat=y_new.toplist.menu.27"#新歌榜url
url_re = "https://y.qq.com/n/yqq/toplist/26.html#stat=y_new.toplist.menu.26" #热歌榜
url_bs = "https://y.qq.com/n/yqq/toplist/62.html#stat=y_new.toplist.menu.62" #飙升榜
main().list_music(url_bs)
本文地址:https://blog.csdn.net/qq_39942956/article/details/107579567
上一篇: 洛谷:P1981表达式求值(栈)
下一篇: 指定时间间隔爬取一次的延时爬虫