爬取**数据
程序员文章站
2022-06-03 17:53:08
...
import requests
import json
import csv
from user_agents import UA
def post_request(url,data):
HEADER = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': UA,
'Cookie':'你的cookie',
'Host': 'www.cwl.gov.cn',
'Referer': 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/',
'X-Requested-With': 'XMLHttpRequest',
}
try:
response = requests.post(
url=url,
headers=HEADER,
verify=True,
data=data,
timeout=20
)
res = bytes(response.text, response.encoding).decode("utf-8", 'ignore')
return res
except Exception as e:
pass
def ssq_craw():
url_ssq="http://www.cwl.gov.cn/cwl_admin/kjxx/findDrawNotice"
data = {
'name': 'ssq',
'issueCount': '100',
}
respon=json.loads(post_request(url=url_ssq,data=data))
data_list=respon["result"]
for i in data_list:
time=i.get("date","notfound")#日期
red=i.get("red","notfound")#红号
blue=i.get('blue',"notfound")#蓝号
print(red,blue)
with open('/media/liu/_dde_data/project/spider/**/' + "双色球" + '.csv', 'a+') as f:
f_csv = csv.writer(f)
f_csv.writerow([time, red, blue])
def sd_craw():
url_sd = "http://www.cwl.gov.cn/cwl_admin/kjxx/findDrawNotice"
data = {
'name': '3d',
'issueCount': '100',
}
respon = json.loads(post_request(url=url_sd,data=data))
data_list = respon["result"]
for i in data_list:
time = i.get("date", "notfound") # 日期
red = i.get("red", "notfound") # 红号
print(time,red)
with open('/media/liu/_dde_data/project/spider/**/' + "3D" + '.csv', 'a+') as f:
f_csv = csv.writer(f)
f_csv.writerow([time, red])
def qlc_craw():
url_ssq="http://www.cwl.gov.cn/cwl_admin/kjxx/findDrawNotice"
data = {
'name': 'qlc',
'issueCount': '100',
}
respon=json.loads(post_request(url=url_ssq,data=data))
data_list=respon["result"]
for i in data_list:
time=i.get("date","notfound")#日期
red=i.get("red","notfound")#红号
blue=i.get('blue',"notfound")#蓝号
print(red,blue)
with open('/media/liu/_dde_data/project/spider/**/' + "七乐彩" + '.csv', 'a+') as f:
f_csv = csv.writer(f)
f_csv.writerow([time, red, blue])
ssq_craw()
sd_craw()
qlc_craw()
上一篇: 如何设置网页页面高度100%
下一篇: Linux下与文件相关的几个时间的介绍