欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python 新浪实时新闻

程序员文章站 2022-05-02 22:02:05
...
'''
Python 新浪实时新闻 by 郑瑞国
'''
import re,time
import urllib.request 
 
def open_url(url):
    return urllib.request.urlopen(url).read().decode("utf-8","ignore")
 
def find_text(url):
    return re.findall('<a.*>(.*?)</a>',open_url(url))
 
def save_text(text):
    t=[]
    try:
        with open(r'd:\test.txt','r') as pre_f:
            t = pre_f.readlines()
    except:
        pass
    with open(r'd:\test.txt','a') as f:
        for i in range(0,len(text)):
            if len(text[i])>8:
                if text[i]+'\n' not in t: 
                    print(text[i])
                    f.write(text[i]+"\n")
    print('*',end=' ')
 
if __name__ == "__main__":
    url = 'http://news.sina.com.cn'
    while True:
        text = find_text(url)
        save_text(text)
        time.sleep(20)