欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

选定关键词爬取智联招聘职位概览信息

程序员文章站 2022-05-09 18:01:03
...

爬虫主体文件

zhaopin.py

import requests
import json
import time
import random
from getProduce.conn_sqlite import *    # 这个地方需要自己修改


def get_overview():
    kw = ['python', 'python开发', 'python工程师', 'python开发工程师', 'python爬虫工程师',
          'python后端', 'python程序员', 'python大数据', '数据挖掘', '电商']
    for w in kw:
        err = 0
        for i in range(200):
            url = """https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90&
            cityId=765&salary=0,0&workExperience=-1&education=-1&companyType=-1&
            employmentType=-1&jobWelfareTag=-1&kw={}+&kt=3&=0&_v=0.61279220&
            x-zp-page-request-id=20d2e31b40a649bbbf6ca46fe60fec3a-1562948729116-749702&
            x-zp-client-id=91cbfd33-9b39-4114-9301-b53b6d1ba53d
            """.format(90 * i, w)
            if i == 0:
                url = """https://fe-api.zhaopin.com/c/i/sou?pageSize=90&
                cityId=765&workExperience=-1&education=-1&companyType=-1&
                employmentType=-1&jobWelfareTag=-1&kw={}+&kt=3&_v=0.71451964&
                x-zp-page-request-id=8e9d07af005a4ee2baaffcd8da94992c-1562949573697-740814&
                x-zp-client-id=91cbfd33-9b39-4114-9301-b53b6d1ba53d""".format(w)
            try:

                response = requests.get(url).text
                data = json.loads(response)

                for each_job in data['data']['results']:
                    add_data = (
                        each_job['jobName'],  # 工作名称
                        each_job['jobType']['items'][0]['name'],  # 职业大分类名称
                        each_job['emplType'],  # 工作类型
                        each_job['salary'],  # 工资
                        each_job['eduLevel']['name'],  # 教育水平
                        each_job['workingExp']['name'],  # 工作经验
                        each_job['positionURL'],  # 职位对应url
                        each_job['company']['name'],  # 公司名称
                        each_job['company']['type']['name'],  # 公司类型
                        each_job['company']['size']['name'],  # 公司规模
                        each_job['company']['url'],  # 公司对应url
                        each_job['city']['display'],  # 工作城市
                        each_job['businessArea'],  # 商圈
                        each_job['updateDate'],  # 更新日期
                        each_job['score'],  # 评分
                        '|'.join(each_job['welfare'])  # 工作福利
                    )
                    print(add_data)
                    insert_job_info(add_data)
                time.sleep(random.uniform(2.1, 3.1))
            except Exception as e:
                print(e)
                err += 1
                if err > 5:
                    break


if __name__ == '__main__':
    get_overview()

将爬取的文件存入数据库
conn_sqlite.py

import sqlite3


def action(sql):
    conn = sqlite3.connect('zhaopin.sqlite3')
    print("Opened database successfully")
    c = conn.cursor()
    try:
        c.execute(sql)

        conn.commit()
        print("Table change successfully")
    except Exception as e:
        print(e)
    finally:
        conn.close()


def query(sql):
    conn = sqlite3.connect('zhaopin.sqlite3')
    print("Opened database successfully")
    c = conn.cursor()
    result = ''
    try:
        c.execute(sql)
        result = c.fetchall()
    except Exception as e:
        print(e)
    finally:
        conn.close()
    return result


def create_jobs_info_table():
    create_table_sql = """
                CREATE TABLE zlzp_sjfx(
                    id INTEGER PRIMARY KEY autoincrement,
                    job_name CHAR(100) ,
                    job_type_big_name CHAR(100) ,
                    job_type CHAR(100) ,
                    job_salary CHAR(100) ,
                    edu_level CHAR(100) ,
                    working_exp CHAR(100) ,
                    job_url CHAR(200) ,
                    company_name CHAR(100),
                    company_type CHAR(100),
                    company_size CHAR(100) ,
                    company_url CHAR(200) ,
                    job_city CHAR(100) ,
                    business_area CHAR(100) ,               
                    job_updatetime CHAR(100) ,
                    job_score CHAR(100) ,
                    job_welfare CHAR(100) 
                )"""
    action(create_table_sql)


def insert_job_info(data):
    add_sql = """
    INSERT INTO zlzp_sjfx
    (job_name, 
    job_type_big_name,
    job_type,
    job_salary,
    edu_level,
    working_exp,
    job_url, 
    company_name,
    company_type,
    company_size,
    company_url,
    job_city,
    business_area,
    job_updatetime,
    job_score,
    job_welfare)
    VALUES {}
    """.format(data)
    action(add_sql)


if __name__ == '__main__':
    create_jobs_info_table()

单独运行conn_sqlite.py文件自动生成sqlite数据库
然后修改zhaopin.py文件引入conn_sqlite命令行
最后执行zhaopin.py文件即可

相关标签: 爬虫