python爬虫—“爱彼迎”：ERR_HTTP2_COMPRESSION_ERROR/网页可能暂时无法连接，或者它已永久性地移动到了新网址。

程序员文章站 2022-07-14 21:12:41

...

爱彼迎
被爱彼迎制裁的半死不活，用requests什么数据都返回不了，甚至自己用chrome浏览器打开爱彼迎搜索以后，也会一直报错 python爬虫—“爱彼迎”：ERR_HTTP2_COMPRESSION_ERROR/网页可能暂时无法连接，或者它已永久性地移动到了新网址。
找了半天原因，也按照很多教程做了尝试，都没办法解决。只要打开新页面，就会报错，刷新一下内容才能出来。
换成Firefox后根本打不开
看来也只能用selenium打开一次，刷新一次了 python爬虫—“爱彼迎”：ERR_HTTP2_COMPRESSION_ERROR/网页可能暂时无法连接，或者它已永久性地移动到了新网址。
既然使用了selenium，那就试试从主页进去搜索城市的功能吧。

path = r'/Users/chenbaba/Desktop/python/chromedriver'
browser = webdriver.Chrome(executable_path=path)
wait = WebDriverWait(browser, 10)
#找到搜索框，输入城市，并点击“搜索”按钮
def parse_bnb(url,kw):
    browser.get(url)
    browser.implicitly_wait(10)
    city = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#Koan-via-HeroController__input")))
    city.send_keys(kw)
    login = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#ChinaSearchBarWithDate > form > div._mv0xzc > button")))
    login.click()

搜索以后的结果只有15-17页，找到页面中的“下一页”按钮，点它～

next_page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#site-content > div > div > div._1kss53yu > div > div > div > div._1ou8uzt > nav > ul > li._i66xk8d > a")))
next_page.click()

selenium遇到ERR_HTTP2_COMPRESSION_ERROR时，报错为
selenium.common.exceptions.TimeoutException 或者
selenium.common.exceptions.WebDriverException
使用try/except即可，遇到报错刷新页面再来一遍即可

 except (selenium.common.exceptions.TimeoutException,selenium.common.exceptions.WebDriverException):
     browser.refresh()

完整代码如下：

import requests
import pymongo
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from lxml import etree

path = r'/Users/chenbaba/Desktop/python/chromedriver'
browser = webdriver.Chrome(executable_path=path)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
wait = WebDriverWait(browser, 10)
#存入mongodb
client = pymongo.MongoClient('localhost',27017)
mydb = client['Real_estatedb']
airbnb = mydb['airbnb']
#这里为函数传入参数m，主要是为了页面刷新后到最后一页能及时关闭
def parse_info(m):
    for i in range (m,17):
        try:
            res = browser.page_source
            l = etree.HTML(res)
            infos = l.xpath('//div[@class="_fhph4u"]/div')
            #每条信息中的内容都不太一样，所以可能出现xpath找不到相关信息的情况，先将可能没有的值都设置为空，然后在使用xpath时判断一下即可
            img_src = ''
            score = ''
            comments = ''
            type = ''
            room = ''
            price = ''
            comments = ''
            for info in infos:
                if info.xpath('./div/div/meta[1]/@content'):
                    title = info.xpath('./div/div/meta[1]/@content')[0]
                if info.xpath('.//img[@class="_9ofhsl"]/@src'):
                    img_src = str(info.xpath('.//img[@class="_9ofhsl"]/@src')).strip('[]')
                if info.xpath('.//span[@class="_1clmxfj"]/text()'):
                    score = info.xpath('.//span[@class="_1clmxfj"]/text()')[0]
                if info.xpath('.//div[@class="_1etkxf1"]/span/span/text()[1]'):
                    type = info.xpath('.//div[@class="_1etkxf1"]/span/span/text()[1]')[0]
                if info.xpath('.//div[@class="_1etkxf1"]/span/span/text()[2]'):
                    room = info.xpath('.//div[@class="_1etkxf1"]/span/span/text()[2]')[0]
                if info.xpath('.//div[@class="_1ixtnfc"]/span[2]/text()'):
                    price = info.xpath('.//div[@class="_1ixtnfc"]/span[2]/text()')[0]
                if info.xpath('.//div[@class="_11jctj9"]/span/span/text()'):
                    comments = info.xpath('.//div[@class="_11jctj9"]/span/span/text()')[0]
                print(title,price)
                data = {
                    '标题':title,
                    '评分':score,
                    '类型':type,
                    '厅室数量':room,
                    '价格（元/晚）':price,
                    '评论':comments,
                    '图片链接':img_src
                }
                airbnb.insert_one(data)
            next_page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,
                                                       "#site-content > div > div > div._1kss53yu > div > div > div > div._1ou8uzt > nav > ul > li._i66xk8d > a")))
            next_page.click()
            m=i
        except (selenium.common.exceptions.TimeoutException,selenium.common.exceptions.WebDriverException):
            browser.refresh()
            return parse_info(m)
            
def parse_bnb(url,kw):
    browser.get(url)
    browser.implicitly_wait(10)
    city = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#Koan-via-HeroController__input")))
    city.send_keys(kw)
    login = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#ChinaSearchBarWithDate > form > div._mv0xzc > button")))
    login.click()
    parse_info(1)

def main():
    url = 'https://www.airbnb.cn/'
    # kw为想查询的城市
    kw = '西安'
    parse_bnb(url,kw)
    browser.quit()

if __name__ == '__main__':
    main()

python爬虫—“爱彼迎”：ERR_HTTP2_COMPRESSION_ERROR/网页可能暂时无法连接，或者它已永久性地移动到了新网址。
大功告成！
初学，代码有点繁琐，见谅～