利用Requests爬取图书信息
程序员文章站
2022-05-09 21:17:15
...
import requests
import json
from openpyxl import Workbook, load_workbook
url = "https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=2725fe7b-b2c2-4769-8f6f" \
"-c95f04c70275"
headers = {
"bookTagId": "2725fe7b-b2c2-4769-8f6f-c95f04c70275"
}
resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'
res = json.loads(resp.text)
results = res['data']
# 创建一个xlsx文件对象
workbook = Workbook()
worksheets = workbook.worksheets[0]
worksheets.append(['bookName(书名)', 'bookId(书本ID)', "picPath"])
workbook2 = Workbook()
worksheets2 = workbook2.worksheets[0]
worksheets2.append(['bookName(书名)', 'discountPrice(价格)', "num(库存)"])
for result in results:
name = result['bookName']
worksheets.append([result['bookName'], result['bookId'], result['picPath']])
workbook.save("计算机新书基本信息.xlsx")
result = result['bookId']
url = "https://www.ptpress.com.cn/bookinfo/getBookDetailsById"
data = {
'bookId': result
}
resp = requests.post(url, data=data)
# print(resp)
tans_dict = json.loads(resp.text)
# 获取到价格
# print(tans_dict['data']['discountPrice'])
url = "https://www.ptpress.com.cn/order/getBookSaleStock"
isbn = tans_dict['data']['photos'][0]['isbn']
data = {'isbn': isbn}
resp = requests.post(url, data=data)
tans_dict1 = json.loads(resp.text)
# 获取到库存信息
worksheets2.append([name, tans_dict['data']['discountPrice'], tans_dict1['data']])
workbook2.save('计算机类新书价格库存信息表.xlsx')