使用python刷取博客访问量
程序员文章站
2022-04-26 17:09:41
...
话不多说直接上代码
import re
import requests
from requests import RequestException
import time
import random
def get_page(url):
try:
headers = {
'Referer': 'https://blog.csdn.net', # 伪装成从CSDN博客搜索到的文章
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
# 伪装成浏览器
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
print('请求出错')
return None
def parse_page(html):
try:
read_num = int(re.compile('<span.*?read-count.*?(\d+).*?</span>').search(html).group(1))
return read_num
except Exception:
print('解析出错')
return None
def main():
try:
while 1:
url = 'https://blog.csdn.net/qinqi100233/article/details/107505297' # 待刷浏览量博客的url
html = get_page(url)
if html:
read_num = parse_page(html)
if read_num:
print('当前阅读量:', read_num)
url = 'https://blog.csdn.net/qinqi100233/article/details/107505099' # 待刷浏览量博客的url
html = get_page(url)
if html:
read_num = parse_page(html)
if read_num:
print('当前阅读量:', read_num)
url = 'https://blog.csdn.net/qinqi100233/article/details/107455959' # 待刷浏览量博客的url
html = get_page(url)
if html:
read_num = parse_page(html)
if read_num:
print('当前阅读量:', read_num)
url = 'https://blog.csdn.net/qinqi100233/article/details/107236836' # 待刷浏览量博客的url
html = get_page(url)
if html:
read_num = parse_page(html)
if read_num:
print('当前阅读量:', read_num)
url = 'https://blog.csdn.net/qinqi100233/article/details/107236367' # 待刷浏览量博客的url
html = get_page(url)
if html:
read_num = parse_page(html)
if read_num:
print('当前阅读量:', read_num)
sleep_time = random.randint(60, 83)
print('please wait', sleep_time, 's')
time.sleep(sleep_time) # 设置访问频率,过于频繁的访问会触发反爬虫
except Exception:
print('出错啦!')
if __name__ == '__main__':
main()
这是运行成功后的效果
因为怕官方查到封博客,所以把时间设置慢了一些,所以他是慢慢的增加,也是体谅各位
上一篇: HSQLDB基本认识