欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

scrapy 定时执行的两种方法

程序员文章站 2022-05-10 17:20:38
...
# -*- coding: utf-8 -*-
import subprocess
import schedule
import time
import datetime
from multiprocessing import Process
from scrapy import cmdline
import logging
def crawl_work():
    # subprocess.Popen('scrapy crawl it')
    print('-'*100)
    # args = ["scrapy", "crawl", 'it']
    # while True:
    #     start = time.time()
    #     p = Process(target=cmdline.execute, args=(args,))
    #     p.start()
    #     p.join()
    #     logging.debug("### use time: %s" % (time.time() - start))
if __name__=='__main__':
    print('*'*10+'开始执行定时爬虫'+'*'*10)
    schedule.every(1).minutes.do(crawl_work)
    print('当前时间为{}'.format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    print('*' * 10 + '定时爬虫开始运行' + '*' * 10)
    while True:
        schedule.run_pending()
        time.sleep(10)

# -*- coding: utf-8 -*-
from multiprocessing import Process
from scrapy import cmdline
import time
import logging

# 配置参数即可, 爬虫名称,运行频率
confs = [
    {
        "spider_name": "it",
        "frequency": 2,
    },
]


def start_spider(spider_name, frequency):
    args = ["scrapy", "crawl", spider_name]
    while True:
        start = time.time()
        p = Process(target=cmdline.execute, args=(args,))
        p.start()
        p.join()
        logging.debug("### use time: %s" % (time.time() - start))
        time.sleep(frequency)


if __name__ == '__main__':
    for conf in confs:
        process = Process(target=start_spider,args=(conf["spider_name"], conf["frequency"]))
        process.start()
        time.sleep(86400)

 

相关标签: scrapy