python+selenium爬取淘宝羽毛球拍信息
程序员文章站
2023-09-17 23:23:45
数据量很大,就爬取前五页偷下懒from selenium import webdriverimport pandas as pdimport openpyxlimport timedriver = webdriver.Chrome()url = 'https://s.taobao.com/search?q=羽毛球拍'driver.get(url)driver.implicitly_wait(20)namelist = []pricelist = []weblist = []locat...
数据量很大,就爬取前五页偷下懒
from selenium import webdriver
import pandas as pd
import openpyxl
import time
driver = webdriver.Chrome()
url = 'https://s.taobao.com/search?q=羽毛球拍'
driver.get(url)
driver.implicitly_wait(20)
namelist = []
pricelist = []
weblist = []
locationlist = []
numberlist = []
def infor():
items = driver.find_elements_by_class_name('item.J_MouserOnverReq ')
for i in items:
name = i.find_element_by_class_name('row.row-2.title').find_element_by_class_name('J_ClickStat').text
price = i.find_element_by_class_name('J_ClickStat').get_attribute('trace-price')
web = i.find_element_by_class_name('J_ClickStat').get_attribute('href')
location = i.find_element_by_class_name('location').text
number = i.find_element_by_class_name('deal-cnt').text
namelist.append(name)
pricelist.append(price)
weblist.append(web)
locationlist.append(location)
numberlist.append(number)
if __name__ == '__main__':
for i in range(1,6):
driver.implicitly_wait(20)
driver.find_element_by_class_name('input.J_Input').clear()
driver.find_element_by_class_name('input.J_Input').send_keys(i)
driver.find_element_by_class_name('btn.J_Submit').click()
time.sleep(3)
driver.get(driver.current_url)
driver.implicitly_wait(20)
infor()
print('第{}页录入成功'.format(i))
data=pd.DataFrame([namelist,pricelist,locationlist,numberlist,weblist]).T
data.columns=['name','price','location','number','web']
data.to_excel('work.xlsx')
爬虫新手 有不规范之处大家多多见谅O(∩_∩)O
本文地址:https://blog.csdn.net/weixin_47128538/article/details/107357109
上一篇: opencv-图像的几何变换小结