Python爬取酷狗MP3音频的步骤
分析问题
音频url
点入某个音乐的播放界面,通过f12-network,分析数据,可以看到有一个index.php?..返回数据中有一个play_url,打开后正是我们需要的音频。
查看该url的headers,其params参数如下,通过反复不同的几次尝试,得知r、callback、dfid、mid、platid这几项不变,而通过初步的requests尝试,发现最后一项'_'可有可无,改变的只有hash和album_id两项。
r: play/getdata callback: jquery1910861615852090795_1612578519454 hash: ef0da656831f08b1fd2cb855bc38ed2c dfid: 0q0clh1iczag3ey1j70ratil mid: b6cf66837b18642cc269390b066649dc platid: 4 album_id: 41669581 _: 1612578519455
搜索url
得知改变的只有两项后,那就容易多了,在搜索歌曲界面network,发现song?...该url返回值中有hash和album_id存在,我们之后只用搜索结果第一项(一般要搜的歌曲排名第一)。
同样分析其params参数,改变的只有keyword、signature、clienttime、mid、uuid。后三者可以比较容易看出其为毫秒级时间戳(13位),keyword也挺容易明白,signature哪里找呢?通过全局搜索signature,发现有一个js文件中含有该关键词。
callback: callback123 keyword: 花海 page: 1 pagesize: 30 bitrate: 0 isfuzzy: 0 tag: em inputtype: 0 platform: webfilter userid: -1 clientver: 2000 iscorrection: 1 privilege_filter: 0 srcappid: 2919 clienttime: 1612579100435 mid: 1612579100435 uuid: 1612579100435 dfid: - signature: 472f60133c23184cafc5005350c90229
js
找到的js代码如下
"undefined" == typeof faultylabs && (faultylabs = {}), faultylabs.md5 = function(a) { function b(a) { var b = (a >>> 0).tostring(16); return "00000000".substr(0, 8 - b.length) + b } function c(a) { for (var b = [], c = 0; c < a.length; c++) b = b.concat(k(a[c])); return b } function d(a) { for (var b = [], c = 0; 8 > c; c++) b.push(255 & a), a >>>= 8; return b } function e(a, b) { return a << b & 4294967295 | a >>> 32 - b } function f(a, b, c) { return a & b | ~a & c } function g(a, b, c) { return c & a | ~c & b } function h(a, b, c) { return a ^ b ^ c } function i(a, b, c) { return b ^ (a | ~c) } function j(a, b) { return a[b + 3] << 24 | a[b + 2] << 16 | a[b + 1] << 8 | a[b] } function k(a) { for (var b = [], c = 0; c < a.length; c++) if (a.charcodeat(c) <= 127) b.push(a.charcodeat(c)); else for (var d = encodeuricomponent(a.charat(c)).substr(1).split("%"), e = 0; e < d.length; e++) b.push(parseint(d[e], 16)); return b } function l() { for (var a = "", c = 0, d = 0, e = 3; e >= 0; e--) d = arguments[e], c = 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= d, a += b(c); return a } function m(a) { for (var b = new array(a.length), c = 0; c < a.length; c++) b[c] = a[c]; return b } function n(a, b) { return 4294967295 & a + b } function o() { function a(a, b, c, d) { var f = v; v = u, u = t, t = n(t, e(n(s, n(a, n(b, c))), d)), s = f } var b = p.length; p.push(128); var c = p.length % 64; if (c > 56) { for (var k = 0; 64 - c > k; k++) p.push(0); c = p.length % 64 } for (k = 0; 56 - c > k; k++) p.push(0); p = p.concat(d(8 * b)); var m = 1732584193 , o = 4023233417 , q = 2562383102 , r = 271733878 , s = 0 , t = 0 , u = 0 , v = 0; for (k = 0; k < p.length / 64; k++) { s = m, t = o, u = q, v = r; var w = 64 * k; a(f(t, u, v), 3614090360, j(p, w), 7), a(f(t, u, v), 3905402710, j(p, w + 4), 12), a(f(t, u, v), 606105819, j(p, w + 8), 17), a(f(t, u, v), 3250441966, j(p, w + 12), 22), a(f(t, u, v), 4118548399, j(p, w + 16), 7), a(f(t, u, v), 1200080426, j(p, w + 20), 12), a(f(t, u, v), 2821735955, j(p, w + 24), 17), a(f(t, u, v), 4249261313, j(p, w + 28), 22), a(f(t, u, v), 1770035416, j(p, w + 32), 7), a(f(t, u, v), 2336552879, j(p, w + 36), 12), a(f(t, u, v), 4294925233, j(p, w + 40), 17), a(f(t, u, v), 2304563134, j(p, w + 44), 22), a(f(t, u, v), 1804603682, j(p, w + 48), 7), a(f(t, u, v), 4254626195, j(p, w + 52), 12), a(f(t, u, v), 2792965006, j(p, w + 56), 17), a(f(t, u, v), 1236535329, j(p, w + 60), 22), a(g(t, u, v), 4129170786, j(p, w + 4), 5), a(g(t, u, v), 3225465664, j(p, w + 24), 9), a(g(t, u, v), 643717713, j(p, w + 44), 14), a(g(t, u, v), 3921069994, j(p, w), 20), a(g(t, u, v), 3593408605, j(p, w + 20), 5), a(g(t, u, v), 38016083, j(p, w + 40), 9), a(g(t, u, v), 3634488961, j(p, w + 60), 14), a(g(t, u, v), 3889429448, j(p, w + 16), 20), a(g(t, u, v), 568446438, j(p, w + 36), 5), a(g(t, u, v), 3275163606, j(p, w + 56), 9), a(g(t, u, v), 4107603335, j(p, w + 12), 14), a(g(t, u, v), 1163531501, j(p, w + 32), 20), a(g(t, u, v), 2850285829, j(p, w + 52), 5), a(g(t, u, v), 4243563512, j(p, w + 8), 9), a(g(t, u, v), 1735328473, j(p, w + 28), 14), a(g(t, u, v), 2368359562, j(p, w + 48), 20), a(h(t, u, v), 4294588738, j(p, w + 20), 4), a(h(t, u, v), 2272392833, j(p, w + 32), 11), a(h(t, u, v), 1839030562, j(p, w + 44), 16), a(h(t, u, v), 4259657740, j(p, w + 56), 23), a(h(t, u, v), 2763975236, j(p, w + 4), 4), a(h(t, u, v), 1272893353, j(p, w + 16), 11), a(h(t, u, v), 4139469664, j(p, w + 28), 16), a(h(t, u, v), 3200236656, j(p, w + 40), 23), a(h(t, u, v), 681279174, j(p, w + 52), 4), a(h(t, u, v), 3936430074, j(p, w), 11), a(h(t, u, v), 3572445317, j(p, w + 12), 16), a(h(t, u, v), 76029189, j(p, w + 24), 23), a(h(t, u, v), 3654602809, j(p, w + 36), 4), a(h(t, u, v), 3873151461, j(p, w + 48), 11), a(h(t, u, v), 530742520, j(p, w + 60), 16), a(h(t, u, v), 3299628645, j(p, w + 8), 23), a(i(t, u, v), 4096336452, j(p, w), 6), a(i(t, u, v), 1126891415, j(p, w + 28), 10), a(i(t, u, v), 2878612391, j(p, w + 56), 15), a(i(t, u, v), 4237533241, j(p, w + 20), 21), a(i(t, u, v), 1700485571, j(p, w + 48), 6), a(i(t, u, v), 2399980690, j(p, w + 12), 10), a(i(t, u, v), 4293915773, j(p, w + 40), 15), a(i(t, u, v), 2240044497, j(p, w + 4), 21), a(i(t, u, v), 1873313359, j(p, w + 32), 6), a(i(t, u, v), 4264355552, j(p, w + 60), 10), a(i(t, u, v), 2734768916, j(p, w + 24), 15), a(i(t, u, v), 1309151649, j(p, w + 52), 21), a(i(t, u, v), 4149444226, j(p, w + 16), 6), a(i(t, u, v), 3174756917, j(p, w + 44), 10), a(i(t, u, v), 718787259, j(p, w + 8), 15), a(i(t, u, v), 3951481745, j(p, w + 36), 21), m = n(m, s), o = n(o, t), q = n(q, u), r = n(r, v) } return l(r, q, o, m).touppercase() } var p = null , q = null; return "string" == typeof a ? p = k(a) : a.constructor == array ? 0 === a.length ? p = a : "string" == typeof a[0] ? p = c(a) : "number" == typeof a[0] ? p = a : q = typeof a[0] : "undefined" != typeof arraybuffer ? a instanceof arraybuffer ? p = m(new uint8array(a)) : a instanceof uint8array || a instanceof int8array ? p = m(a) : a instanceof uint32array || a instanceof int32array || a instanceof uint16array || a instanceof int16array || a instanceof float32array || a instanceof float64array ? p = m(new uint8array(a.buffer)) : q = typeof a : q = typeof a, q && alert("md5 type mismatch, cannot process " + q), o() } , function() { function a(a) { if (window.kgmobilecall) a && a(); else { var b = document.createelement("script"); b.src = "https://m3ws.kugou.com/static/js/common/mobilecall_3.0.js", b.onload = function() { this.readystate && "loaded" != this.readystate && "complete" != this.readystate || a && a() } , document.body.appendchild(b) } } function b(b, c, d) { b = b || {}, c = c || "", d = d || {}; var e, f = !1, g = "json"; "function" == typeof d ? e = d : (e = d.callback, f = d.useh5 || !1, g = d.posttype || "json"); var h = {}; for (var i in b) !h[i] && (h[i] = b[i]); var j = function() { var a = navigator.useragent.match(/kgbrowser/gi) ? !0 : !1 , b = navigator.useragent.match(/kugouandroid/gi) ? !0 : !1 , c = "undefined" == typeof external ? !1 : "undefined" == typeof external.supercall ? !1 : !0; return c || b || a ? !0 : !1 }() , k = (new date).gettime() , l = [] , m = {} , n = [] , o = [] , p = "nvph5oo715z5diwaeqlhmdswxxqv4hwt" , q = { appid: function(a) { return a() }, srcappid: function(a) { return a("2919") }, clientver: function(a) { return a("20000") }, "clienttime,mid,uuid,dfid": function(a) { return a({ clienttime: k, mid: k, uuid: k, dfid: "-" }) } } , r = function() { for (var a in q) l.push(a); !function(a) { function b(a) { if (a < l.length) q[l[a]](function(c) { if (c) if ("[object object]" == object.prototype.tostring.call(c)) for (var d in c) m[d] = c[d]; else m[l[a]] = c; b(a + 1) }); else { for (var d in m) !h[d] && (h[d] = m[d]); for (var d in h) n.push(d); if (n.sort(), n.foreach(function(a) { o.push(a + "=" + h[a]) }), c) if ("[object object]" == object.prototype.tostring.call(c)) if ("json" == g) o.push(json.stringify(c)); else { var f = []; for (var d in c) f.push(d + "=" + c[d]); o.push(f.join("&")) } else o.push(c); o.unshift(p), o.push(p), h.signature = faultylabs.md5(o.join("")), e && e(h) } } b(a) }(0) }; if (c && ("[object object]" != object.prototype.tostring.call(c) ? j = !1 : "urlencoded" == g && (j = !1)), j && !f) { var s = !1; a(function() { kgmobilecall.callcmd({ cmd: 764, jsonstr: json.stringify({ get: h, post: c }), callback: function(a) { if (s) return !1; if (s = !0, a && a.status) { delete a.status; for (var b in a) !h[b] && (h[b] = a[b]); return e && e(h) } j = !1, r() } }) }) } else j = !1, r() } "undefined" != typeof module && module.exports ? module.exports = b : "function" == typeof define && define.amd ? define(function() { return b }) : window.getinterfacepublic = b }();
在274行发现h.signature = faultylabs.md5(o.join("")),初步理解为signature是o内部元素拼接成字符串,对其加上断点并将o加入watch。
0: "nvph5oo715z5diwaeqlhmdswxxqv4hwt" 1: "bitrate=0" 2: "callback=callback123" 3: "clienttime=1612580098162" 4: "clientver=2000" 5: "dfid=-" 6: "inputtype=0" 7: "iscorrection=1" 8: "isfuzzy=0" 9: "keyword=花海" 10: "mid=1612580098162" 11: "page=1" 12: "pagesize=30" 13: "platform=webfilter" 14: "privilege_filter=0" 15: "srcappid=2919" 16: "tag=em" 17: "userid=-1" 18: "uuid=1612580098162" 19: "nvph5oo715z5diwaeqlhmdswxxqv4hwt" length: 20
在watch里不难发现o为一个长度为20的数组,之后我们按之前理解将字符串拼接。
nvph5oo715z5diwaeqlhmdswxxqv4hwtbitrate=0callback=callback123clienttime=1612580098162clientver=2000dfid=-inputtype=0iscorrection=1isfuzzy=0keyword=花海mid=1612580098162page=1pagesize=30platform=webfilterprivilege_filter=0srcappid=2919tag=emuserid=-1uuid=1612580098162nvph5oo715z5diwaeqlhmdswxxqv4hwt
之后要js解密,这谁会?反正我不会 0.0,那也有办法,用python调用js文件。在274行分析为md5加密,往前找看看有没有相关函数,果真有一个,将其保存为kugou.js
"undefined" == typeof faultylabs && (faultylabs = {}), faultylabs.md5 = function(a) { function b(a) { var b = (a >>> 0).tostring(16); return "00000000".substr(0, 8 - b.length) + b } function c(a) { for (var b = [], c = 0; c < a.length; c++) b = b.concat(k(a[c])); return b } function d(a) { for (var b = [], c = 0; 8 > c; c++) b.push(255 & a), a >>>= 8; return b } function e(a, b) { return a << b & 4294967295 | a >>> 32 - b } function f(a, b, c) { return a & b | ~a & c } function g(a, b, c) { return c & a | ~c & b } function h(a, b, c) { return a ^ b ^ c } function i(a, b, c) { return b ^ (a | ~c) } function j(a, b) { return a[b + 3] << 24 | a[b + 2] << 16 | a[b + 1] << 8 | a[b] } function k(a) { for (var b = [], c = 0; c < a.length; c++) if (a.charcodeat(c) <= 127) b.push(a.charcodeat(c)); else for (var d = encodeuricomponent(a.charat(c)).substr(1).split("%"), e = 0; e < d.length; e++) b.push(parseint(d[e], 16)); return b } function l() { for (var a = "", c = 0, d = 0, e = 3; e >= 0; e--) d = arguments[e], c = 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= 255 & d, d >>>= 8, c <<= 8, c |= d, a += b(c); return a } function m(a) { for (var b = new array(a.length), c = 0; c < a.length; c++) b[c] = a[c]; return b } function n(a, b) { return 4294967295 & a + b } function o() { function a(a, b, c, d) { var f = v; v = u, u = t, t = n(t, e(n(s, n(a, n(b, c))), d)), s = f } var b = p.length; p.push(128); var c = p.length % 64; if (c > 56) { for (var k = 0; 64 - c > k; k++) p.push(0); c = p.length % 64 } for (k = 0; 56 - c > k; k++) p.push(0); p = p.concat(d(8 * b)); var m = 1732584193 , o = 4023233417 , q = 2562383102 , r = 271733878 , s = 0 , t = 0 , u = 0 , v = 0; for (k = 0; k < p.length / 64; k++) { s = m, t = o, u = q, v = r; var w = 64 * k; a(f(t, u, v), 3614090360, j(p, w), 7), a(f(t, u, v), 3905402710, j(p, w + 4), 12), a(f(t, u, v), 606105819, j(p, w + 8), 17), a(f(t, u, v), 3250441966, j(p, w + 12), 22), a(f(t, u, v), 4118548399, j(p, w + 16), 7), a(f(t, u, v), 1200080426, j(p, w + 20), 12), a(f(t, u, v), 2821735955, j(p, w + 24), 17), a(f(t, u, v), 4249261313, j(p, w + 28), 22), a(f(t, u, v), 1770035416, j(p, w + 32), 7), a(f(t, u, v), 2336552879, j(p, w + 36), 12), a(f(t, u, v), 4294925233, j(p, w + 40), 17), a(f(t, u, v), 2304563134, j(p, w + 44), 22), a(f(t, u, v), 1804603682, j(p, w + 48), 7), a(f(t, u, v), 4254626195, j(p, w + 52), 12), a(f(t, u, v), 2792965006, j(p, w + 56), 17), a(f(t, u, v), 1236535329, j(p, w + 60), 22), a(g(t, u, v), 4129170786, j(p, w + 4), 5), a(g(t, u, v), 3225465664, j(p, w + 24), 9), a(g(t, u, v), 643717713, j(p, w + 44), 14), a(g(t, u, v), 3921069994, j(p, w), 20), a(g(t, u, v), 3593408605, j(p, w + 20), 5), a(g(t, u, v), 38016083, j(p, w + 40), 9), a(g(t, u, v), 3634488961, j(p, w + 60), 14), a(g(t, u, v), 3889429448, j(p, w + 16), 20), a(g(t, u, v), 568446438, j(p, w + 36), 5), a(g(t, u, v), 3275163606, j(p, w + 56), 9), a(g(t, u, v), 4107603335, j(p, w + 12), 14), a(g(t, u, v), 1163531501, j(p, w + 32), 20), a(g(t, u, v), 2850285829, j(p, w + 52), 5), a(g(t, u, v), 4243563512, j(p, w + 8), 9), a(g(t, u, v), 1735328473, j(p, w + 28), 14), a(g(t, u, v), 2368359562, j(p, w + 48), 20), a(h(t, u, v), 4294588738, j(p, w + 20), 4), a(h(t, u, v), 2272392833, j(p, w + 32), 11), a(h(t, u, v), 1839030562, j(p, w + 44), 16), a(h(t, u, v), 4259657740, j(p, w + 56), 23), a(h(t, u, v), 2763975236, j(p, w + 4), 4), a(h(t, u, v), 1272893353, j(p, w + 16), 11), a(h(t, u, v), 4139469664, j(p, w + 28), 16), a(h(t, u, v), 3200236656, j(p, w + 40), 23), a(h(t, u, v), 681279174, j(p, w + 52), 4), a(h(t, u, v), 3936430074, j(p, w), 11), a(h(t, u, v), 3572445317, j(p, w + 12), 16), a(h(t, u, v), 76029189, j(p, w + 24), 23), a(h(t, u, v), 3654602809, j(p, w + 36), 4), a(h(t, u, v), 3873151461, j(p, w + 48), 11), a(h(t, u, v), 530742520, j(p, w + 60), 16), a(h(t, u, v), 3299628645, j(p, w + 8), 23), a(i(t, u, v), 4096336452, j(p, w), 6), a(i(t, u, v), 1126891415, j(p, w + 28), 10), a(i(t, u, v), 2878612391, j(p, w + 56), 15), a(i(t, u, v), 4237533241, j(p, w + 20), 21), a(i(t, u, v), 1700485571, j(p, w + 48), 6), a(i(t, u, v), 2399980690, j(p, w + 12), 10), a(i(t, u, v), 4293915773, j(p, w + 40), 15), a(i(t, u, v), 2240044497, j(p, w + 4), 21), a(i(t, u, v), 1873313359, j(p, w + 32), 6), a(i(t, u, v), 4264355552, j(p, w + 60), 10), a(i(t, u, v), 2734768916, j(p, w + 24), 15), a(i(t, u, v), 1309151649, j(p, w + 52), 21), a(i(t, u, v), 4149444226, j(p, w + 16), 6), a(i(t, u, v), 3174756917, j(p, w + 44), 10), a(i(t, u, v), 718787259, j(p, w + 8), 15), a(i(t, u, v), 3951481745, j(p, w + 36), 21), m = n(m, s), o = n(o, t), q = n(q, u), r = n(r, v) } return l(r, q, o, m).touppercase() } var p = null , q = null; return "string" == typeof a ? p = k(a) : a.constructor == array ? 0 === a.length ? p = a : "string" == typeof a[0] ? p = c(a) : "number" == typeof a[0] ? p = a : q = typeof a[0] : "undefined" != typeof arraybuffer ? a instanceof arraybuffer ? p = m(new uint8array(a)) : a instanceof uint8array || a instanceof int8array ? p = m(a) : a instanceof uint32array || a instanceof int32array || a instanceof uint16array || a instanceof int16array || a instanceof float32array || a instanceof float64array ? p = m(new uint8array(a.buffer)) : q = typeof a : q = typeof a, q && alert("md5 type mismatch, cannot process " + q), o() }
之后用python的pyexecjs库调用, 但是注意调用的时候的名字是execjs。
代码实现
""" data: 2021/02/05 通过搜索爬取酷狗音乐,付费音乐暂时只能爬取试听部分。 """ import requests import re import json import time import execjs def get_signature(text): """ 获取signature值 :param text: 格式化之后的字符串 :return: 返回酷狗网站上加密后的signature """ # 读取js文件内容 with open("kugou.js", "r", encoding='utf-8') as f: js_str = f.read() # 通过js文件中逻辑数据,对文件进行加密 if js_str: js_obj = execjs.compile(js_str) return js_obj.call('faultylabs.md5', text) def get_url(keyword): """ 获取搜索之后的url :param keyword: 搜索词,如晴天 :return: 返回完整的url地址 """ search = "https://complexsearch.kugou.com/v2/search/song?callback=callback123&keyword={keyword}&page=1&pagesize=30&bitrate=0&isfuzzy=0&tag=em&inputtype=0&platform=webfilter&userid=-1&clientver=2000&iscorrection=1&privilege_filter=0&srcappid=2919&clienttime={time}&mid={time}&uuid={time}&dfid=-&signature={signature}" key_code = "nvph5oo715z5diwaeqlhmdswxxqv4hwtbitrate=0callback=callback123clienttime={time}clientver=2000dfid=-inputtype=0iscorrection=1isfuzzy=0keyword={keyword}mid={time}page=1pagesize=30platform=webfilterprivilege_filter=0srcappid=2919tag=emuserid=-1uuid={time}nvph5oo715z5diwaeqlhmdswxxqv4hwt" # 获得13位时间戳 millis = str(round(time.time() * 1000)) p = key_code.format(time=millis, keyword=keyword) signature = get_signature(p) # print(signature) search_url = search.format(keyword=keyword, time=millis, signature=signature) return search_url def get_data(url): headers = { 'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/88.0.4324.146 safari/537.36', 'referer': 'https://www.kugou.com/', 'authority': 'complexsearch.kugou.com', } res = requests.get(url=url, headers=headers) # 将获取的数据转为json格式 data = re.findall('callback123\((.*)\)', res.text, re.s)[0] json_data = json.loads(data)['data'] hash_value = json_data['lists'][0]['filehash'].lower() album_id = json_data['lists'][0]['albumid'] return hash_value, album_id def get_mp3(hash_value, album_id): """ 获取mp3音频文件 :param hash_value: 传入哈希值 :param album_id: 传入album id :return: none """ url = 'https://wwwapi.kugou.com/yy/index.php' params = { 'r': 'play/getdata', 'callback': 'jquery191019800824574510756_1612519333214', 'hash': str(hash_value), 'dfid': '0q0clh1iczag3ey1j70ratil', 'mid': 'b6cf66837b18642cc269390b066649dc', 'platid': '4', 'album_id': str(album_id), } headers = { 'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/88.0.4324.146 safari/537.36', 'referer': 'https://www.kugou.com/', 'authority': 'wwwapi.kugou.com', } res = requests.post(url=url, params=params, headers=headers) data = re.findall('jquery191019800824574510756_1612519333214\((.*?)\);', res.text, re.s)[0] json_data = json.loads(data) audio_name = json_data['data']['audio_name'] play_url = json_data['data']['play_url'] save_mp3(audio_name, play_url) def save_mp3(audio_name, play_url): """ 保持mp3文件 :param audio_name: 传入命名 :param play_url: 传入音频url :return: none """ content = requests.get(play_url).content with open(audio_name + '.mp3', mode='ab') as f: f.write(content) if __name__ == '__main__': try: keyword = input('请输入要搜索的歌曲名称:') hash_value, album_id = get_data(get_url(keyword)) get_mp3(hash_value, album_id) except exception as e: print('请输入正确歌曲名称。')
以上就是python爬取酷狗mp3音频的步骤的详细内容,更多关于python爬取酷狗mp3音频的资料请关注其它相关文章!