From f8411cd5804d00ea4c4b38478ede487a847425bd Mon Sep 17 00:00:00 2001 From: Cody Hiar Date: Tue, 20 Mar 2018 17:56:41 -0600 Subject: Adding in pipeline --- myproject/myproject/pipelines.py | 15 ++++++++++----- myproject/myproject/settings.py | 6 +++--- myproject/myproject/spiders/blogitems.py | 3 +-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/myproject/myproject/pipelines.py b/myproject/myproject/pipelines.py index 0bdee9f..4cf9b9e 100644 --- a/myproject/myproject/pipelines.py +++ b/myproject/myproject/pipelines.py @@ -1,11 +1,16 @@ # -*- coding: utf-8 -*- +import json -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html +class FileWritePipeline(object): + + def open_spider(self, spider): + self.file = open('items.json', 'w') + + def close_spider(self, spider): + self.file.close() -class MyprojectPipeline(object): def process_item(self, item, spider): + line = json.dumps(dict(item)) + "\n" + self.file.write(line) return item diff --git a/myproject/myproject/settings.py b/myproject/myproject/settings.py index 6790f78..cbd1ed9 100644 --- a/myproject/myproject/settings.py +++ b/myproject/myproject/settings.py @@ -65,9 +65,9 @@ LOG_LEVEL = 'WARNING' # Configure item pipelines # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -#ITEM_PIPELINES = { -# 'myproject.pipelines.MyprojectPipeline': 300, -#} +ITEM_PIPELINES = { + 'myproject.pipelines.FileWritePipeline': 300, +} # Enable and configure the AutoThrottle extension (disabled by default) # See https://doc.scrapy.org/en/latest/topics/autothrottle.html diff --git a/myproject/myproject/spiders/blogitems.py b/myproject/myproject/spiders/blogitems.py index 805681b..973f022 100644 --- a/myproject/myproject/spiders/blogitems.py +++ b/myproject/myproject/spiders/blogitems.py @@ -28,5 +28,4 @@ class BlogItemsSpider(scrapy.Spider): date=date, year=year ) - print(blog_post) - + yield blog_post -- cgit v1.2.3