欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

 抓取**数据

程序员文章站 2022-06-03 17:52:44
...

 抓取**数据

from selenium import webdriver
import time
from bs4 import BeautifulSoup
from lxml import etree


driver = webdriver.Chrome()
driver.get("https://datachart.500.com/ssq/history/history.shtml")

driver.find_element_by_id("start").clear()
driver.find_element_by_id("start").send_keys("10000")
driver.find_element_by_xpath("/html/body/table/tbody/tr[1]/td/div/div/table/tbody/tr[1]/td/div/div[1]/div/table/tbody/tr/td[2]/img").click()
time.sleep(3)
source = driver.page_source
soup = BeautifulSoup(source,'lxml')
with open('hongqiu.txt','a') as file_handle:
    for k in soup.find_all('td','t_cfont2'):
        print(k.string)
        result = str(k.string)
        file_handle.write(result)
        file_handle.write('\n')
with open('lanqiu.txt','a',encoding='utf-8') as file_handle:
    for k in soup.find_all('td','t_cfont4'):
        result = str(k.string)
        file_handle.write(result)
        file_handle.write('\n')