scrapy案例 爬取数据保存到excel
程序员文章站
2022-04-16 16:39:41
# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item): # define the fields for your item here like: img = scrapy.Field() title = scrapy.Field() type = scrapy.Field() pic = scrapy.Field()# -*- coding: utf-8 -*-import s...
# -*- coding: utf-8 -*-
import scrapy
class Mkw1Item(scrapy.Item):
img = scrapy.Field()
title = scrapy.Field()
type = scrapy.Field()
pic = scrapy.Field()
# -*- coding: utf-8 -*-
import scrapy
from .. import items
import re
class MukeSpider(scrapy.Spider):
name = 'muke'
allowed_domains = ['imooc.com']
start_urls = ['https://www.imooc.com/new/course/list']
def parse(self, response):
item = items.Mkw1Item()
a = response.xpath('//*[@id="main"]/div[5]/div[1]/a')
for i in range(len(a)):
img = response.xpath('//a[{}]/div/@style'.format(i + 1)).extract()[0]
pattern_2 = '//.*\.*g'
img = re.findall(pattern_2, img)[0]
item['img'] = img
item['title'] = response.xpath('//a[{}]/p[1]/text()'.format(i + 1)).extract()[0]
item['type'] = response.xpath('//a[{}]/p[2]/text()'.format(i + 1)).extract()[0]
item['pic'] = response.xpath('//a[{}]/p[3]/span[1]/text()'.format(i + 1)).extract()[0]
yield item
# -*- coding: utf-8 -*-
import xlwt
class Mkw1Pipeline(object):
def __init__(self):
self.num = 1
self.wb = xlwt.Workbook()
self.sheet = self.wb.add_sheet('慕课网')
self.list = ['img', 'title', 'type', 'pic']
for i in range(len(self.list)):
self.sheet.write(0, i, self.list[i])
def process_item(self, item, spider):
for i, j in zip(range(len(item)), item):
self.sheet.write(self.num, i, item[j])
self.num = self.num + 1
def close_spider(self, spider):
self.wb.save('../mkw.xlsx')
本文地址:https://blog.csdn.net/Hoo_ligan/article/details/110261766
上一篇: 你太坏了
下一篇: 他是北宋禁军统帅,揭秘杨信的生平经历