Python 新浪实时新闻
程序员文章站
2022-05-02 22:02:05
...
'''
Python 新浪实时新闻 by 郑瑞国
'''
import re,time
import urllib.request
def open_url(url):
return urllib.request.urlopen(url).read().decode("utf-8","ignore")
def find_text(url):
return re.findall('<a.*>(.*?)</a>',open_url(url))
def save_text(text):
t=[]
try:
with open(r'd:\test.txt','r') as pre_f:
t = pre_f.readlines()
except:
pass
with open(r'd:\test.txt','a') as f:
for i in range(0,len(text)):
if len(text[i])>8:
if text[i]+'\n' not in t:
print(text[i])
f.write(text[i]+"\n")
print('*',end=' ')
if __name__ == "__main__":
url = 'http://news.sina.com.cn'
while True:
text = find_text(url)
save_text(text)
time.sleep(20)
上一篇: 爬取大众点评22页餐馆名称
下一篇: Python 正则 爬新浪新闻