python爬取微博转发以及转发后的点赞数、转发人信息
程序员文章站
2022-05-02 20:42:24
...
延续上一篇博客,这一篇是为了爬取微博转发人的一些相关数据,数据的分析没什么太大的难度,找到翻页规律就行,不多说,直接贴代码
# -*- coding:utf-8 -*-
__author__ = 'TengYu'
import requests
import xlwt
import re
import json
import time
headers = {'User-agert':'Your-agent',
'Cookie':'Your-Cookie'}
#工具类,用来去除爬取的正文中一些不需要的链接、标签等
class Tool:
deleteImg = re.compile('<img.*?>')
newLine =re.compile('<tr>|<div>|</tr>|</div>')
deleteAite = re.compile('//.*?:')
deleteAddr = re.compile('<a.*?>.*?</a>|<a href='+'\'https:')
deleteTag = re.compile('<.*?>')
deleteWord = re.compile('回复@|回覆@|回覆|回复')
@classmethod
def replace(cls,x):
x = re.sub(cls.deleteWord,'',x)
x = re.sub(cls.deleteImg,'',x)
x = re.sub(cls.deleteAite,'',x)
x = re.sub(cls.deleteAddr, '', x)
x = re.sub(cls.newLine,'',x)
x = re.sub(cls.deleteTag,'',x)
return x.strip()
class zhuanfa(object):
def get_zhuanfa(self):
File = open('filename.txt', "w")
excel = xlwt.Workbook(encoding='utf-8')
sheet = excel.add_sheet('sheet1')
sheet.write(0, 0, 'id')
sheet.write(0, 1, 'name')
sheet.write(0, 2, 'time')
sheet.write(0, 3, 'text')
sheet.write(0, 4, 'likes')
count = 0
i = 0
while True and count < 1000:
url = 'https://m.weibo.cn/api/statuses/repostTimeline?id=4303334066243259&page='
i = i + 1
url = url + str(i)
print url
try:
response = requests.get(url, headers=headers)
resjson = json.loads(response.text)
dataset = resjson.get('data')
data = dataset.get('data')
for j in range(0, len(data)):
temp = data[j]
user = temp.get('user')
text = temp.get('text')
text = Tool.replace(text)
userid = user.get('id')
screen_name = user.get('screen_name')
created_at = temp.get('created_at')
attitudes_count = temp.get('attitudes_count')
count += 1
File.write(text.encode('utf-8') + '\n')
sheet.write(count,0,userid)
sheet.write(count,1,screen_name.encode('utf-8'))
sheet.write(count,2,created_at.encode('utf-8'))
sheet.write(count,3,text.encode('utf-8'))
sheet.write(count,4,attitudes_count)
print ("已经获取" + str(count) + "条数据")
time.sleep(3)
except Exception,e:
print e
File.close()
excel.save('filename.xls')
if __name__ == '__main__':
Zhuanfa = zhuanfa()
Zhuanfa.get_zhuanfa()
尊重原作,转载请注明,转载自:https://blog.csdn.net/kr2563