Python爬取智联招聘信息
程序员文章站
2022-05-09 17:45:06
...
代码如下
import requests
import json
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,'
' like Gecko) Chrome/72.0.3626.119 Safari/537.36'
}
def url_index():
range_ = ['https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90'.format(num) for num in range(0, 12)]
return range_
def salary_index(salary):
salarys=['','']
if 'K' in salary:
salary=salary.replace('K','000')
if '.' in salary:
salary = salary.replace('.000', '00')
if '-' in salary:
salarys=salary.split('-')
else:
salarys=[salary,salary]
return salarys
def go_index(url):
response = requests.post(url, headers)
loads = json.loads(response.content.decode())#将请求数据转换成字典数据
json_list=[]
for item in loads['data']['results']:
salarys=salary_index(item.get('salary',''))
data={
'city_name':item.get('city',dict()).get('display',''),#城市
'com_name':item.get('company',dict()).get('name',''),#公司名
'com_size':item.get('company',dict()).get('size',dict()).get('name',''),#公司人数
'com_type':item.get('company',dict()).get('type',dict()).get('name',''),#公司性质
'job_name': item.get('jobName', ''),#职位
'job_tag': item.get('jobTag', dict()).get('searchTag', ''),#福利
'timeState': item.get('timeState', ''),#发布时间情况 -最新 -最近 -招聘中
'low_salary': salarys[0],#工资下限
'higt_salary': salarys[1],#上限
'positionURL': item.get('positionURL', '')#详情页
}
json_list.append(data)
return json_list
def del_json(json_list):#从详情页中获取公司计划招聘人数
end_list=[]
for item in json_list:
url=item['positionURL']
item['size']=''
response = requests.get(url, headers)
html = etree.HTML(response.content.decode())
html_index = html.xpath('//ul[@class="summary-plane__info"]/li')
item['size']=html_index[3].xpath('./text()')
del item['positionURL']
print(item)
end_list.append(item)
return end_list
if __name__ == '__main__': #主要逻辑
#获取url_list
url_list=url_index();
json_list=[]
for url in url_list:
#发送请求 处理数据
json_index=go_index(url);
json_list+=json_index
end_list=del_json(json_list)
try:
with open('zl2.txt','a+',encoding='utf-8') as f:
json.dump(end_list,f,ensure_ascii=False)
except Exception as e:
print(e)