欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

使用python刷取博客访问量

程序员文章站 2022-04-26 17:09:41
...

话不多说直接上代码

import re
import requests
from requests import RequestException
import time
import random


def get_page(url):
    try:
        headers = {
            'Referer': 'https://blog.csdn.net',  # 伪装成从CSDN博客搜索到的文章
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
            # 伪装成浏览器
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        print('请求出错')
        return None


def parse_page(html):
    try:
        read_num = int(re.compile('<span.*?read-count.*?(\d+).*?</span>').search(html).group(1))
        return read_num
    except Exception:
        print('解析出错')
        return None


def main():
    try:
        while 1:
            url = 'https://blog.csdn.net/qinqi100233/article/details/107505297'  # 待刷浏览量博客的url
            html = get_page(url)
            if html:
                read_num = parse_page(html)
                if read_num:
                    print('当前阅读量:', read_num)
            url = 'https://blog.csdn.net/qinqi100233/article/details/107505099'  # 待刷浏览量博客的url
            html = get_page(url)
            if html:
                read_num = parse_page(html)
                if read_num:
                    print('当前阅读量:', read_num)
            url = 'https://blog.csdn.net/qinqi100233/article/details/107455959'  # 待刷浏览量博客的url
            html = get_page(url)
            if html:
                read_num = parse_page(html)
                if read_num:
                    print('当前阅读量:', read_num)
            url = 'https://blog.csdn.net/qinqi100233/article/details/107236836'  # 待刷浏览量博客的url
            html = get_page(url)
            if html:
                read_num = parse_page(html)
                if read_num:
                    print('当前阅读量:', read_num)
            url = 'https://blog.csdn.net/qinqi100233/article/details/107236367'  # 待刷浏览量博客的url
            html = get_page(url)
            if html:
                read_num = parse_page(html)
                if read_num:
                    print('当前阅读量:', read_num)
            sleep_time = random.randint(60, 83)
            print('please wait', sleep_time, 's')
            time.sleep(sleep_time)  # 设置访问频率,过于频繁的访问会触发反爬虫
    except Exception:
        print('出错啦!')


if __name__ == '__main__':
    main()

这是运行成功后的效果
使用python刷取博客访问量
因为怕官方查到封博客,所以把时间设置慢了一些,所以他是慢慢的增加,也是体谅各位

相关标签: python