(65)-- 爬取58交友信息
程序员文章站
2024-03-14 21:47:53
...
# 二级爬取58交友的名字、年龄、身高、学历、图片信息,并把这些信息保存到数据库中
# 首先要在当前目录下建立一个58文件夹,然后通过Navicat连接到数据库
# mydb.py
import pymysql
class Mydb:
def __init__(self):
try:
self.conn = pymysql.connect('127.0.0.1','root','123456','han',charset='utf8')
self.cursor = self.conn.cursor()
except Exception as e:
print(e)
def execute(self,sql,data):
try:
row = self.cursor.execute(sql,data)
self.conn.commit()
return row # 返回影响行数
except Exception as e:
print('执行增删改失败')
print(e)
self.conn.rollback()
if __name__ == '__main__':
mydb = Mydb()
sql = 'insert into py07_58friend(`name`,`age`,`height`,`edu`,`img`) VALUES(%s,%s,%s,%s,%s)'
data = ("大美",16,170,'博士','')
row = mydb.execute(sql,data)
print(row)
# 58friend.py
from bs4 import BeautifulSoup
import requests
import time
from urllib import request
from mydb import Mydb
def get_friend():
base_url = 'http://jiaoyou.58.com/bj/mm/18-20/'
while True:
response = requests.get(base_url)
html = response.text
html = BeautifulSoup(html,'lxml')
friend_list = html.select('dl.fj_list')
for friend in friend_list:
name = friend.select('b')[0].text
age_height = friend.select('span')[1].text
age_height = age_height.split('|')
age = age_height[0].strip('岁')
height = age_height[1].strip('cm')
edu = friend.select('span')[2].text
edu = edu.split('|')[0]
img = friend.select('img')[0]['src']
fname = img.split('/')[-1]
a = request.urlretrieve(img,'./58/' + fname)
sql = 'insert into py07_58friend(`name`,`age`,`height`,`edu`,`img`) VALUES(%s,%s,%s,%s,%s)'
data = (name ,age, height, edu, img)
row = mydb.execute(sql,data)
print(name)
# 提取下一页链接
next_page = html.select('a#nextPage')[0]['href']
base_url = 'http://jiaoyou.58.com' + next_page
time.sleep(1)
if __name__ == '__main__':
mydb = Mydb()
get_friend()
# 爬取部分结果如下:
C:\Users\cz\AppData\Local\Programs\Python\Python35\python.exe E:/Python/python爬虫/11.py
鸿燕
我不要拥抱
嗯嗯
fei
周晨
王诗诗
丽丽
男人的福利
世界太假
毛毛
阿英
跳舞女
公主病 *
田灵儿
依楠
相遇是缘
寂寞空虚冷
香水百合
单恋高校
Process finished with exit code 1
兄弟连学python
Python学习交流、资源共享群:563626388 QQ