JavaWeb调用python爬虫酷狗音乐(Jthon的使用)
程序员文章站
2022-11-23 09:54:54
介绍博主自己写的,已发布链接: Jython-酷狗音乐本文章用于java与python相互调用,进行爬虫酷狗音乐的部分歌曲。此篇以JavaWeb展示,python只用于获取mp3音乐的web连接,再以json的形式返回给Java。#mermaid-svg-MFpnRB32Cl2pSTpA .label{font-family:'trebuchet ms', verdana, arial;font-family:var(--mermaid-font-family);fill:#333;color:#...
介绍
博主自己写的,已发布 链接: Jython-酷狗音乐
本文章用于java与python相互调用,进行爬虫酷狗音乐的部分歌曲。
此篇以JavaWeb展示,python只用于获取mp3音乐的web连接,再以json的形式返回给Java。
功能概述
让用户输入要搜索的音乐名或者歌手名,然后把搜索到的歌曲信息给用户。用户选择下载音乐。
思路
1.python的思路可以看这篇文章https://blog.csdn.net/weixin_29168393/article/details/112362923
2.Java需要导入Jython.jar进行Java与python的调用(官网可以下载,直接百度Jython即可)。
Java用于获取用户输入信息,传入python中,再由python以json的形式传给Java。
主要代码如下:
Java代码块(调用python并读取返回值json类型)
//List<Music>——数组<实体类>——实体类存放(歌曲名称)(歌手名称)(MP3链接)
List<Music> list=null;
try {
//C:\\Python\\my_utils.py——python文件地址,input——你输入的歌曲名称
String[] args1 = new String[] { "python", "C:\\Python\\my_utils.py", input};
Process proc = Runtime.getRuntime().exec(args1);// 执行py文件
BufferedReader in = new BufferedReader(new InputStreamReader(proc.getInputStream()));
String line = null;
if ((line = in.readLine()) != null) {
//调用自定义方法解析json数组
list=getMusic(line);
}
in.close();
proc.waitFor();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
自定义方法
//解析json数组
List<Music> getMusic(String musicJson) {
Gson gson = new Gson();//创建Gson对象
JsonParser jsonParser = new JsonParser();
JsonArray jsonElements = jsonParser.parse(musicJson).getAsJsonArray();//获取JsonArray对象
ArrayList<Music> musics = new ArrayList<Music>();
for (JsonElement music : jsonElements) {
Music music1 = gson.fromJson(music, Music.class);//解析
musics.add(music1);
}
return musics;
}
(Music)实体类
public class Music {
//私有属性
private int id;//歌曲id
private String singer;//歌手
private String song;//歌曲名称
private String https;//链接
//属性的set、get对外公共访问方法
public int getId() {
return id;
}
public String getSinger() {
return singer;
}
public String getSong() {
return song;
}
public String getHttps() {
return https;
}
public void setId(int id) {
this.id = id;
}
public void setSinger(String singer) {
this.singer = singer;
}
public void setSong(String song) {
this.song = song;
}
public void setHttps(String https) {
this.https = https;
}
@Override
public String toString() {
return "Music [id=" + id + ", singer=" + singer + ", song=" + song + ", https=" + https + "]";
}
//构造方法
public Music(int id, String singer, String song, String https) {
super();
this.id = id;
this.singer = singer;
this.song = song;
this.https = https;
}
public Music() {
super();
// TODO Auto-generated constructor stub
}
python中代码
#coding:utf-8
import sys
import requests,json,re
data = []#存放json歌曲内容
headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
cookies = {'Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d': '1611881034',
'Hm_lvt_aedee6983d4cfc62f509129360d6bb3d': '1611880883',
'kg_dfid': '1IZbKH1WO7c43dxQKz43IjNd',
'kg_dfid_collect': 'd41d8cd98f00b204e9800998ecf8427e',
'kg_mid': '2bc7647d6a0491e2f102db3722039ec0'}
def jsonwaangzhi(hash):
url = 'http://www.kugou.com/yy/index.php?r=play/getdata&hash=' + hash
huoquwz = requests.get(url, headers, cookies=cookies).text
conditions = '\"album_id\":\"(.*?)\"'
album_ids = re.findall(conditions, huoquwz) # 歌曲的mp3的id
id='0'
for album_id in album_ids:
id=album_id
url = 'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery191010266427224936736_1546780601995&hash=' + hash + '&album_id=' + \
id + '&_=1546780601996'
huoquwzs = requests.get(url, headers, cookies=cookies).text
tiaojian = '\"play_url\":\"(.*?)\"'
zhan = re.findall(tiaojian, huoquwzs) # 歌曲的mp3的网站
return zhan
def gethtml(url,header = headers,encoding = 'utf-8'):
html = requests.get(url,header)
html.encoding = encoding
return html.text
#搜索
def search(name):
url='http://songsearch.kugou.com/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641'
dashuju = gethtml(url.format(name))
dashuju1 = dashuju.replace('jQuery191034642999175022426_1489023388639(', '').replace(')', '')
views = json.loads(dashuju1) # 格式转换
for view in views['data']['lists']:
geming = view['SongName']
geming=geming.replace('<em>', '').replace('</em>', '')
geshou = view['SingerName']
geshou = geshou.replace('<em>', '').replace('</em>', '')
shanzuokuohao ='\((.*)'
wukuohao = re.findall(shanzuokuohao,geming )#删除(live ****
for i in wukuohao:
kuohao='('+i
geming=geming.replace(kuohao,'')
yishou=geshou+' - '+geming
#view['FileHash']=====hash码
zhans=jsonwaangzhi(view['FileHash'])
for zhan in zhans:
zhan=zhan.replace('\\','')#由于网址中有\\这样的反刚,将它去掉
music={"singer":geshou,"song":geming,"https":zhan}
data.append(music)
print(data)
#data返回到Java中的值
if __name__ == '__main__':
a = []
for i in range(1, len(sys.argv)):
a.append((sys.argv[i]))
search(a[0])
#search("Java中传入的值")
主要代码也就这么一点,思路清晰一切好办。
如有疑问,下方留言即可。
本文地址:https://blog.csdn.net/m0_55000378/article/details/113974087