微博评论爬取优化

程序员文章站 2022-05-02 20:48:45

...

#抓取MiuMiu微博所有评论
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd



def switch_to_window():
    nowhandle=driver.current_window_handle

    allhandles=driver.window_handles

    for handle in allhandles:
        if handle != nowhandle:
            driver.switch_to_window(handle)

def use_mouse_add_hide_content():
    for i in range(5):
        driver.execute_script('window.scrollTo(0,1000000)')
        time.sleep(5)

def get_comment_time():
    for i in range(50):
        try:
            element = driver.find_element_by_class_name("more_txt")
            driver.find_element_by_class_name("more_txt").click()
            time.sleep(8)
        except NoSuchElementException as e:
            print(e)
            break

def write_to_txt(commentdata_1,timedata):
    for i in range(len(commentdata_1)):
        with open('D:\\comment_time.txt',mode='a+',encoding='utf-8') as f:#数据保存地址
            f.write(commentdata_1.iloc[i,0])
            f.write('\t')
            f.write(timedata[i+1].text)
            f.write('\n') 

def get_one_page()
    for num in range(2,47):
        print("The {} comment is start!".format(num))
        #use_mouse_add_hide_content()
        if driver.find_element_by_xpath('//*[@id="Pl_Official_MyProfileFeed__23"]/div/div['+str(num)+']/div[2]/div/ul/li[3]/a/span/span/span/em[2]').text == '评论':
            continue
        driver.find_element_by_xpath('//*[@id="Pl_Official_MyProfileFeed__23"]/div/div['+str(num)+']/div[1]/div[3]/div[2]/a[1]').click()
        time.sleep(5)
        switch_to_window()

        use_mouse_add_hide_content()

        get_comment_time()

        commentdata = driver.find_elements_by_class_name("WB_text")
        timedata = driver.find_elements_by_css_selector("[class='WB_from S_txt2']")
        commentdata_1 = pd.DataFrame([commentdata[1].text],columns=["text"])
        for i in range(2,len(commentdata)):
            commentdata_2 = pd.DataFrame([commentdata[i].text],columns=["text"])
            if '：' in commentdata_2["text"][0]:
                commentdata_1 = pd.concat((commentdata_1,commentdata_2))

        write_to_txt(commentdata_1,timedata)

        driver.close()
        allhandles=driver.window_handles
        for handle in allhandles:
            driver.switch_to_window(handle)
        time.sleep(5)
    

if __name__ == '__main__':

    driver = webdriver.Chrome()

    driver.get('https://weibo.com')

    driver.find_element_by_id('loginname').send_keys('******')

    driver.find_element_by_name('password').send_keys('******')

    driver.find_element_by_xpath('//*[@id="pl_login_form"]/div/div[3]/div[6]/a').click()
    time.sleep(10)

    driver.get('https://weibo.com/miumiuofficial?profile_ftype=1&is_all=1#_0')
	
    for j in range(29):
        get_one_page()
        driver.find_element_by_link_text('下一页').click()
        time.sleep(10)

上一篇：玩物得志，帮助文玩行业做入门级商品和入门级教育

下一篇：男生复合后的qq签名、和女友复合qq签名大全

微博评论爬取优化

WordPress优化：查询百度收录,自动同步微博带图片等

微博如何设置单条不能评论? 单条微博设置禁止评论的技巧

怎么上微博热门评论?如何成为热门微博里的热门评论?

python爬虫爬取微博评论案例详解

新浪微博iOS版可以带图片评论啦

通过抓取淘宝评论为例讲解Python爬取ajax动态生成的数据(经典)

Python实现爬取马云的微博功能示例

微博营销小技巧微博评论也可以引流

python爬取微信公众号文章

python使用webdriver爬取微信公众号

微博评论爬取优化

WordPress优化：查询百度收录,自动同步微博带图片等

微博如何设置单条不能评论? 单条微博设置禁止评论的技巧

怎么上微博热门评论?如何成为热门微博里的热门评论?

python爬虫爬取微博评论案例详解

新浪微博iOS版可以带图片评论啦

通过抓取淘宝评论为例讲解Python爬取ajax动态生成的数据(经典)

Python实现爬取马云的微博功能示例

微博营销小技巧 微博评论也可以引流

python爬取微信公众号文章

python使用webdriver爬取微信公众号

微博营销小技巧微博评论也可以引流