欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python爬虫 - 爬取智联招聘

程序员文章站 2022-05-09 17:45:30
...
import urllib.request
import urllib.parse
from Bs4 import BeautifulSoup
import lxml

def main():

    url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?'

    jl = input('请输入工作地点:')
    kw = input('请输入工作关键字:')
    start_page = int(input('请输入起始页码:'))
    end_page = int(input('请输入结束页码:'))
    for page in range(start_page, end_page+1):
        data = {
            'jl':jl,
            'kw':kw,
            'p':page,
        }
        url_now = url + urllib.parse.urlencode(data)
        print(url_now)
        header = {'User-Agent': ' Mozilla/5.0 (Windows NT 6.1; Win64;'
                                ' x64) AppleWebKit/537.36 (KHTML, like'
                                ' Gecko) Chrome/71.0.3578.98 Safari/537.36', }
        request = urllib.request.Request(url=url_now, headers=header)
        response = urllib.request.urlopen(request)
        #print(response)
        soup = BeautifulSoup(response.read().decode('utf8'), 'lxml')
        print(soup.find('a',href='http://company.zhaopin.com/CZ000100000.htm'))
        #print(type(info))
        #for div in div_list:
        #    zwmc = div.span['title']
        #    print(zwmc)
        #    exit()

if __name__ == '__main__':
    main()
    
相关标签: Python爬虫 python