python钓鱼评论爬取

程序员文章站 2022-04-08 23:20:03

...

python钓鱼评论爬取并存入txt

import re#python正则分割
# from  bs4 import BeautifulSoup#网页美味汤
#from selenium import webdriver#模拟点击鼠标点击网页库
#import time#时间
import  requests#直接爬取网页库
def direct(secname):
    r=requests.get(secname)
    html=r.text
    rule="http://www.diaoyu.com/diaochang/changsha/list-.*-4-1-1.html"
    gg=re.findall(rule,html)
    return gg
def direct2(secname):
    r=requests.get(secname)
    html=r.text
    rule="http://www.diaoyu.com/diaochang/changsha/\d+.html"#消除数字
    gg=re.findall(rule,html)
    gg=list(set(gg))#去除重复
    return gg
def direct3(secname):
    r=requests.get(secname)
    html=r.text
    rule="</span>\d分\n</span>\n<p>.*</p>"#消除数字
    gg=re.findall(rule,html)
    gg=list(set(gg))#去除重复
    return gg

def main():
    gg=direct("http://www.diaoyu.com/diaochang/changsha/list-0-4-1-1.html")
    print(gg)
    allcomments=[]
    for i in range(2,len(gg)):
        #第一个芙蓉区啥都没
            print(gg[i])
            gg2=direct2(gg[i])
            # print(len(gg2))
            for j in range(len(gg2)):
                    gg3=direct3(gg2[j])
                    allcomments.append(gg3)
    outfiles='E:\\comments'+'.txt'
    output= open(outfiles,'w',encoding='utf-8')
    for i in allcomments:
        for j in i:
            print(str(j[7:9]))
            output.write(str(j[7:9]))
            output.write("\t")
            output.write(str(j[21:-4]))
            output.write('\t')
            output.write('\n')
#调用main()函数
if __name__ == '__main__':
    main()

相关标签： # python爬虫 python 爬虫

上一篇： BeautifulSoup

下一篇： C#路径中获取文件全路径、目录、扩展名、文件名称

python钓鱼评论爬取

python钓鱼评论爬取并存入txt

使用python爬取B站千万级数据

Python实现爬取亚马逊数据并打印出Excel文件操作示例

详解用python写网络爬虫-爬取新浪微博评论

Python 爬取携程所有机票的实例代码

Python实现的爬取百度贴吧图片功能完整示例

详解python 爬取12306验证码

Python实现爬取知乎神回复简单爬虫代码分享

Python网络爬虫（selenium爬取动态网页、爬虫案例分析、哈希算法与RSA加密）

Python实现爬取逐浪小说的方法

Python爬虫实战用 BeautifulSoup 爬取电影网站信息

python钓鱼评论爬取

python钓鱼评论爬取 并存入txt

使用python爬取B站千万级数据

Python实现爬取亚马逊数据并打印出Excel文件操作示例

详解用python写网络爬虫-爬取新浪微博评论

Python 爬取携程所有机票的实例代码

Python实现的爬取百度贴吧图片功能完整示例

详解python 爬取12306验证码

Python实现爬取知乎神回复简单爬虫代码分享

Python网络爬虫（selenium爬取动态网页、爬虫案例分析、哈希算法与RSA加密）

Python实现爬取逐浪小说的方法

Python爬虫实战用 BeautifulSoup 爬取电影网站信息

python钓鱼评论爬取并存入txt