欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Scrapy中selenium的配置与使用

程序员文章站 2022-05-12 09:19:20
...

一,首先是无头模式的设置

from selenium import webdriver

# 创建chrome参数对象
opt = webdriver.ChromeOptions()

# 把chrome设置成*面模式,不论windows还是linux都可以,自动适配对应参数
opt.set_headless()

# 创建chrome*面对象
driver = webdriver.Chrome(options=opt)

# 访问百度
driver.get('https://baidu.com/')

#打印内容
driver.page_source

二、基本用法

from selenium import webdriver
from time import sleep

# 创建一个chrome浏览器的实例对象
driver = webdriver.Chrome()

# 去打开一个url地址
driver.get('https://www.baidu.com')

# 等待几秒钟, 让页面加载完成.
sleep(1)

# 保存截图
driver.save_screenshot('./baidu.png')

# 获取源码
print(driver.page_source)

# 关闭窗口
driver.close()

# 关闭浏览器
driver.quit()

三、获取输入框,提交等操作

from selenium import webdriver
from time import sleep


driver = webdriver.Chrome()

driver.get('https://www.baidu.com')

sleep(1)

input = driver.find_element_by_id('kw')
print(type(input))

input.send_keys('python')

submit = driver.find_element_by_id('su')
submit.click()
sleep(1)

driver.save_screenshot('./python.png')

driver.quit()

四、拖拽操作

from selenium import webdriver
from time import sleep


driver = webdriver.Chrome()

actions = webdriver.ActionChains(driver)

driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
sleep(1)

# 切换iframe
driver.switch_to.frame('iframeResult')

start = driver.find_element_by_id('draggable')

end = driver.find_element_by_id('droppable')

sleep(1)

action = actions.drag_and_drop(source=start, target=end)
sleep(1)

action.perform()

五、切换窗口

from selenium import webdriver
from time import sleep

driver = webdriver.Chrome()

driver.get('https://www.baidu.com')

# 执行js
driver.execute_script('window.open()')

# 窗口切换
driver.switch_to.window(driver.window_handles[1])
# 窗口的名字:
print(driver.window_handles)

driver.get('https://www.taobao.com')

driver.switch_to.window(driver.window_handles[0])

driver.quit()

六、滚动操作

from selenium import webdriver
from time import sleep


driver = webdriver.Chrome()

driver.get('https://www.jd.com')

sleep(2)

# 下拉操作.
# 借助js来完成

driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')

七、scrapy中的中间件设置

class SeleniumMiddleware():
    def __init__(self):
        self.browser = webdriver.Chrome()

    def process_request(self, request, spider):
        self.browser.get(request.url)
        time.sleep(3)
        html = self.browser.page_source

        return HtmlResponse(url=request.url, body=html.encode())

八、模拟CSDN登录(滑块未成功)

from selenium import webdriver
from time import sleep

driver = webdriver.Chrome()

driver.get('https://passport.csdn.net/login')

sleep(1)

driver.find_element_by_link_text('账号登录').click()
sleep(1)

driver.find_element_by_id('all').send_keys('---')

driver.find_element_by_id('password-number').send_keys('----')

driver.find_element_by_tag_name('button').click()

sleep(2)
driver.save_screenshot('csdn.png')
driver.quit()