scrapy 定时执行的两种方法
程序员文章站
2022-05-10 17:20:38
...
# -*- coding: utf-8 -*-
import subprocess
import schedule
import time
import datetime
from multiprocessing import Process
from scrapy import cmdline
import logging
def crawl_work():
# subprocess.Popen('scrapy crawl it')
print('-'*100)
# args = ["scrapy", "crawl", 'it']
# while True:
# start = time.time()
# p = Process(target=cmdline.execute, args=(args,))
# p.start()
# p.join()
# logging.debug("### use time: %s" % (time.time() - start))
if __name__=='__main__':
print('*'*10+'开始执行定时爬虫'+'*'*10)
schedule.every(1).minutes.do(crawl_work)
print('当前时间为{}'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
print('*' * 10 + '定时爬虫开始运行' + '*' * 10)
while True:
schedule.run_pending()
time.sleep(10)
# -*- coding: utf-8 -*-
from multiprocessing import Process
from scrapy import cmdline
import time
import logging
# 配置参数即可, 爬虫名称,运行频率
confs = [
{
"spider_name": "it",
"frequency": 2,
},
]
def start_spider(spider_name, frequency):
args = ["scrapy", "crawl", spider_name]
while True:
start = time.time()
p = Process(target=cmdline.execute, args=(args,))
p.start()
p.join()
logging.debug("### use time: %s" % (time.time() - start))
time.sleep(frequency)
if __name__ == '__main__':
for conf in confs:
process = Process(target=start_spider,args=(conf["spider_name"], conf["frequency"]))
process.start()
time.sleep(86400)