python 爬虫 5i5j房屋信息获取并存储到数据库

程序员文章站 2024-02-05 15:20:22

1 from lxml import etree 2 from selenium import webdriver 3 import pymysql 4 5 def Geturl(fullurl):#获取每个招聘网页的链接 6 browser.get(fullurl) 7 shouye_html_t... ......

 1 from lxml import etree
 2 from selenium import webdriver
 3 import pymysql
 4 
 5 def geturl(fullurl):#获取每个招聘网页的链接
 6     browser.get(fullurl)
 7     shouye_html_text = browser.page_source
 8     shouye_ele = etree.html(shouye_html_text)
 9     zf_list = shouye_ele.xpath('/html/body/div[4]/div[1]/div[2]/ul/li/div/h3/a/@href')#链接url
10     zf_url_list  = []
11     for zf_url_lost in zf_list:
12         zf_url  = 'https://bj.5i5j.com'+zf_url_lost
13         zf_url_list.append(zf_url)
14     return zf_url_list
15 def getinfo(zp_url_list):
16     for zp_url in zp_url_list:
17         browser.get(zp_url)
18         zp_info_html = browser.page_source
19         zp_ele = etree.html(zp_info_html)
20         zp_info_title = str(zp_ele.xpath('//html/body/div[3]/div[1]/div[1]/h1/text()')[0])
21         zp_info_num = str(zp_ele.xpath('/html/body/div[3]/div[2]/div[2]/div[1]/div[1]/div/p[1]/text()')[0])+'元/月'#价格
22         zp_info_type = str(zp_ele.xpath('/html/body/div[3]/div[2]/div[2]/div[1]/div[2]/div/p[1]/text()')[0])#户型
23         zp_info_zone = str(zp_ele.xpath('/html/body/div[3]/div[2]/div[2]/div[1]/div[3]/div/p[1]/text()')[0])+'平米'#房屋大小
24         zp_info_need_1 = str(zp_ele.xpath('/html/body/div[3]/div[2]/div[2]/div[2]/ul/li[1]/span/text()')[0])#房屋信息
25         zp_info_need_2 = str(zp_ele.xpath('/html/body/div[3]/div[2]/div[2]/div[2]/ul/li[1]/a/text()')[0])#房屋信息
26         zp_info_need = zp_info_need_1+zp_info_need_2
27         connection = pymysql.connect(host='localhost', user='root', password='1234', db='5i5j', )
28         try:
29             with connection.cursor() as cursor:
30                 sql = "insert into `5i5j_info` (`title`,`num`,`type`, `zone`,`need`) values (%s,%s,%s,%s, %s)"
31                 cursor.execute(sql, (zp_info_title,zp_info_num,zp_info_type,zp_info_zone,zp_info_need))
32             connection.commit()
33         finally:
34             connection.close()
35         print(zp_info_title,zp_info_num,zp_info_type,zp_info_zone,zp_info_need)
36 if __name__ == '__main__':
37     browser = webdriver.chrome()
38     pags = int(input('需要几页?'))
39     for i in range(1,pags+1):
40         url = 'https://bj.5i5j.com/zufang/huilongguan/n{}/'
41         fullurl = url.format(str(i))
42         zf_url_list = geturl(fullurl)
43         print(fullurl)
44         # print(zf_url_list)
45         getinfo(zf_url_list)
46     browser.close()

上一篇： Python 读写文件的操作代码

下一篇： php实现从上传文件创建缩略图的方法

python 爬虫 5i5j房屋信息获取并存储到数据库

python 爬虫 5i5j房屋信息获取并存储到数据库

Python爬虫爬取全球疫情数据并存储到mysql数据库的步骤

python 爬虫 5i5j房屋信息获取并存储到数据库

Python爬虫爬取全球疫情数据并存储到mysql数据库的步骤

python 爬虫 5i5j房屋信息 获取并存储到数据库

python 爬虫 5i5j房屋信息 获取并存储到数据库

Python爬虫爬取全球疫情数据并存储到mysql数据库的步骤

python 爬虫 5i5j房屋信息 获取并存储到数据库

Python爬虫爬取全球疫情数据并存储到mysql数据库的步骤

python 爬虫 5i5j房屋信息获取并存储到数据库

python 爬虫 5i5j房屋信息获取并存储到数据库

python 爬虫 5i5j房屋信息获取并存储到数据库