当前位置：首页 > news >正文

Python爬虫:获取某平台数据的下载链接 - 指南

news 2026/6/15 12:15:22

searchid的生成参考代码如下：

function a(e, t) {
for (var n = "".concat(e).split("").reverse(), a = "".concat(t).split("").reverse(), r = [], i = n.length, o = a.length, c = 0, s = i + o - 1; c <= s; c++)
r[c] = 0;
for (var l = 0; l < o; l++)
for (var u = 0; u < i; u++)
r[u + l] += parseInt(n[u], 10) * parseInt(a[l], 10),
r[u + 1 + l] += Math.floor(r[u + l] / 10),
r[u + l] = r[u + l] % 10;
return r.reverse(),
0 == r[0] && r.shift(),
r.join("")
}
function r(e, t) {
for (var n = "".concat(e).split("").reverse(), a = "".concat(t).split("").reverse(), r = n.length, i = a.length, o = 0, c = 0, s = 0, l = 0, u = 0, m = Math.max(r, i); u < m; u++)
c = u < r ? parseInt(n[u], 10) : 0,
s = u < i ? parseInt(a[u], 10) : 0,
l = Math.round(c) + Math.round(s) + o,
n[u] = "".concat(l % 10),
o = l >= 10 ? 1 : 0;
return 1 == o && n.push("1"),
n.reverse().join("")
}
function getSearchId(){
const e = 3;
var t = a(e, "18014398509481984")
, n = a(Math.round(Math.random() * parseInt("4194304", 10)), "4294967296")
, i = new Date
, o = 1e3 * (3600 * i.getHours() + 60 * i.getMinutes() + i.getSeconds()) + i.getMilliseconds();
return r(r(t, n), o)
}
console.log(getSearchId())

Python爬虫代码如下：

import requests
import numpy as np
import subprocess
import json
import execjs
import time
keyword = input("输入关键词:")
cookies = ''  # 这部分为你自己的cookie信息
with open(file='encode_random.js', mode='r', encoding='utf-8') as f:
js_str1 = f.read()
with open(file='encode_sign.js', mode='r', encoding='utf-8') as f:
js_str2 = f.read()
with open(file='encode_data.js', mode='r', encoding='utf-8') as f:
js_str3 = f.read()
with open(file='person.txt', mode='r', encoding='utf-8') as f:
QQ_str = f.read()
ctx = execjs.compile(js_str1)
searchid = ctx.call('getSearchId')
tk2 = ctx.call('get_tk', cookies)
obj = {
"comm": {
"cv": 4747474,
"ct": 24,
"format": "json",
"inCharset": "utf-8",
"outCharset": "utf-8",
"notice": 0,
"platform": "yqq.json",
"needNewCode": 1,
"uin": QQ_str,
"g_tk_new_20200303": tk2,
"g_tk": tk2
},
"req_1": {
"method": "DoSearchForQQMusicDesktop",
"module": "music.search.SearchCgiService",
"param": {
"remoteplace": "txt.yqq.song",
"searchid": searchid,
"search_type": 0,
"query": keyword,
"page_num": 1,
"num_per_page": 10
}
}
}
ctx2 = execjs.compile(js_str2)
sign = ctx2.call('main', obj)
time2 = int(time.time() * 1000)
params = {
'_': time2,
'encoding': 'ag-1',
'sign': sign
}
ctx3 = execjs.compile(js_str3)
url = 'https://u6.y.qq.com/cgi-bin/musics.fcg'
with open(file='2.js', mode='w', encoding='utf-8') as f:
f.write(js_str3 + f'\nmain1({obj})')
res = subprocess.run(['node', '2.js'], capture_output=True)
# 这里的js代码进行了异步操作，无法直接用execjs模块运行
data = res.stdout.decode('utf-8').split('\n')[0]
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
'cookie': cookies
}
rsp = requests.post(url=url, headers=headers, data=data, params=params)
content = rsp.content
byte_arr = np.frombuffer(content, dtype=np.uint8)
arr = [int(e) for e in byte_arr]
with open(file='decode.js', mode='r', encoding='utf-8') as f:
js_str = f.read()
# 这里解密操作arr数组因为涉及js与python的数据不兼容的问题，使用subprocess执行js代码
with open(file='1.js', mode='w', encoding='utf-8') as f:
f.write(js_str + f'\nconsole.log(main({arr}))')
res = subprocess.run(['node', '1.js'], capture_output=True)
res_str = res.stdout.decode('utf-8').split('\n')[0]
with open(file='res.json', mode='w', encoding='utf-8') as f:
f.write(res_str)
map = json.loads(res_str)
songs = map['req_1']['data']['body']['song']['list']
for i, song in enumerate(songs):
id, mid = song['id'], song['mid']
singer = ','.join([s['name'] for s in song['singer']])
title = song['title']
print(i + 1, title, singer, id, mid)
index = int(input('输入想下载的歌曲id:'))
song_info = songs[index - 1]
song_id = song_info['id']
song_mid = song_info['mid']
album_mid = song_info['album']['mid']
d2 = song_info['vs'][0]
d3 = song_info['file']['media_mid']
pay_play = song_info['pay']['pay_play']
guid_js = '''
function main(){
let a = (new Date).getUTCMilliseconds();
return String(Math.round(2147483647 * Math.random()) * a % 1e10);
}
'''
guid = execjs.compile(guid_js).call('main')
obj2 = {"comm": {"cv": 4747474, "ct": 24, "format": "json", "inCharset": "utf-8", "outCharset": "utf-8", "notice": 0,
"platform": "yqq.json", "needNewCode": 1, "uin": QQ_str, "g_tk_new_20200303": tk2,
"g_tk": tk2},
"req_1": {"module": "music.musicasset.SongFavRead", "method": "IsSongFanByMid",
"param": {"v_songMid": [song_mid]}},
"req_2": {"module": "music.musichallSong.PlayLyricInfo", "method": "GetPlayLyricInfo",
"param": {"songMID": song_mid, "songID": song_id}},
"req_3": {"method": "GetCommentCount", "module": "music.globalComment.GlobalCommentRead",
"param": {"request_list": [{"biz_type": 1, "biz_id": str(song_id), "biz_sub_type": 0}]}},
"req_4": {"module": "music.musichallAlbum.AlbumInfoServer", "method": "GetAlbumDetail",
"param": {"albumMid": album_mid}},
"req_5": {"module": "music.vkey.GetEVkey", "method": "GetUrl",
"param": {"guid": guid, "songmid": [song_mid], "songtype": [0], "uin": QQ_str,
"loginflag": 1, "platform": "20", "xcdn": 1}}}
if pay_play == 1:
obj2['req_5']['param']['filename'] = [f"RS02{d2 or d3}.mp3"]
sign2 = ctx2.call('main', obj2)
params = {
'_': int(time.time() * 1000),
'encoding': 'ag-1',
'sign': sign2
}
with open(file='3.js', mode='w', encoding='utf-8') as f:
f.write(js_str3 + f'\nmain1({obj2})')
res = subprocess.run(['node', '3.js'], capture_output=True)
# 这里的js代码进行了异步操作，无法直接用execjs模块运行
data = res.stdout.decode('utf-8').split('\n')[0]
rsp2 = requests.post(url=url, headers=headers, data=data, params=params)
content = rsp2.content
byte_arr = np.frombuffer(content, dtype=np.uint8)
arr = [int(e) for e in byte_arr]
with open(file='decode.js', mode='r', encoding='utf-8') as f:
js_str = f.read()
# 这里解密操作arr数组因为涉及js与python的数据不兼容的问题，使用subprocess执行js代码
with open(file='4.js', mode='w', encoding='utf-8') as f:
f.write(js_str + f'\nconsole.log(main({arr}))')
res = subprocess.run(['node', '4.js'], capture_output=True)
res_str = res.stdout.decode('utf-8').split('\n')[0]
with open(file='6.json', mode='w', encoding='utf-8') as f:
f.write(res_str)
map = json.loads(res_str)
download_url = map['req_5']['data']['midurlinfo'][0]['xcdnurl']
download_url2 = 'https://ws6.stream.qqmusic.qq.com/' + map['req_5']['data']['midurlinfo'][0]['purl']
if pay_play == 1:
print("当前歌曲需要vip,试听下载链接为:",download_url2)
else:
print('下载链接：', download_url)