选定关键词爬取智联招聘职位概览信息
程序员文章站
2022-05-09 18:01:03
...
爬虫主体文件
zhaopin.py
import requests
import json
import time
import random
from getProduce.conn_sqlite import * # 这个地方需要自己修改
def get_overview():
kw = ['python', 'python开发', 'python工程师', 'python开发工程师', 'python爬虫工程师',
'python后端', 'python程序员', 'python大数据', '数据挖掘', '电商']
for w in kw:
err = 0
for i in range(200):
url = """https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90&
cityId=765&salary=0,0&workExperience=-1&education=-1&companyType=-1&
employmentType=-1&jobWelfareTag=-1&kw={}+&kt=3&=0&_v=0.61279220&
x-zp-page-request-id=20d2e31b40a649bbbf6ca46fe60fec3a-1562948729116-749702&
x-zp-client-id=91cbfd33-9b39-4114-9301-b53b6d1ba53d
""".format(90 * i, w)
if i == 0:
url = """https://fe-api.zhaopin.com/c/i/sou?pageSize=90&
cityId=765&workExperience=-1&education=-1&companyType=-1&
employmentType=-1&jobWelfareTag=-1&kw={}+&kt=3&_v=0.71451964&
x-zp-page-request-id=8e9d07af005a4ee2baaffcd8da94992c-1562949573697-740814&
x-zp-client-id=91cbfd33-9b39-4114-9301-b53b6d1ba53d""".format(w)
try:
response = requests.get(url).text
data = json.loads(response)
for each_job in data['data']['results']:
add_data = (
each_job['jobName'], # 工作名称
each_job['jobType']['items'][0]['name'], # 职业大分类名称
each_job['emplType'], # 工作类型
each_job['salary'], # 工资
each_job['eduLevel']['name'], # 教育水平
each_job['workingExp']['name'], # 工作经验
each_job['positionURL'], # 职位对应url
each_job['company']['name'], # 公司名称
each_job['company']['type']['name'], # 公司类型
each_job['company']['size']['name'], # 公司规模
each_job['company']['url'], # 公司对应url
each_job['city']['display'], # 工作城市
each_job['businessArea'], # 商圈
each_job['updateDate'], # 更新日期
each_job['score'], # 评分
'|'.join(each_job['welfare']) # 工作福利
)
print(add_data)
insert_job_info(add_data)
time.sleep(random.uniform(2.1, 3.1))
except Exception as e:
print(e)
err += 1
if err > 5:
break
if __name__ == '__main__':
get_overview()
将爬取的文件存入数据库conn_sqlite.py
import sqlite3
def action(sql):
conn = sqlite3.connect('zhaopin.sqlite3')
print("Opened database successfully")
c = conn.cursor()
try:
c.execute(sql)
conn.commit()
print("Table change successfully")
except Exception as e:
print(e)
finally:
conn.close()
def query(sql):
conn = sqlite3.connect('zhaopin.sqlite3')
print("Opened database successfully")
c = conn.cursor()
result = ''
try:
c.execute(sql)
result = c.fetchall()
except Exception as e:
print(e)
finally:
conn.close()
return result
def create_jobs_info_table():
create_table_sql = """
CREATE TABLE zlzp_sjfx(
id INTEGER PRIMARY KEY autoincrement,
job_name CHAR(100) ,
job_type_big_name CHAR(100) ,
job_type CHAR(100) ,
job_salary CHAR(100) ,
edu_level CHAR(100) ,
working_exp CHAR(100) ,
job_url CHAR(200) ,
company_name CHAR(100),
company_type CHAR(100),
company_size CHAR(100) ,
company_url CHAR(200) ,
job_city CHAR(100) ,
business_area CHAR(100) ,
job_updatetime CHAR(100) ,
job_score CHAR(100) ,
job_welfare CHAR(100)
)"""
action(create_table_sql)
def insert_job_info(data):
add_sql = """
INSERT INTO zlzp_sjfx
(job_name,
job_type_big_name,
job_type,
job_salary,
edu_level,
working_exp,
job_url,
company_name,
company_type,
company_size,
company_url,
job_city,
business_area,
job_updatetime,
job_score,
job_welfare)
VALUES {}
""".format(data)
action(add_sql)
if __name__ == '__main__':
create_jobs_info_table()
单独运行conn_sqlite.py
文件自动生成sqlite
数据库
然后修改zhaopin.py
文件引入conn_sqlite
命令行
最后执行zhaopin.py
文件即可
上一篇: Linux samba服务器配置