diff options
-rw-r--r-- | myproject/myproject/pipelines.py | 15 | ||||
-rw-r--r-- | myproject/myproject/settings.py | 6 | ||||
-rw-r--r-- | myproject/myproject/spiders/blogitems.py | 3 |
3 files changed, 14 insertions, 10 deletions
diff --git a/myproject/myproject/pipelines.py b/myproject/myproject/pipelines.py index 0bdee9f..4cf9b9e 100644 --- a/myproject/myproject/pipelines.py +++ b/myproject/myproject/pipelines.py @@ -1,11 +1,16 @@ # -*- coding: utf-8 -*- +import json -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html +class FileWritePipeline(object): + + def open_spider(self, spider): + self.file = open('items.json', 'w') + + def close_spider(self, spider): + self.file.close() -class MyprojectPipeline(object): def process_item(self, item, spider): + line = json.dumps(dict(item)) + "\n" + self.file.write(line) return item diff --git a/myproject/myproject/settings.py b/myproject/myproject/settings.py index 6790f78..cbd1ed9 100644 --- a/myproject/myproject/settings.py +++ b/myproject/myproject/settings.py @@ -65,9 +65,9 @@ LOG_LEVEL = 'WARNING' # Configure item pipelines # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -#ITEM_PIPELINES = { -# 'myproject.pipelines.MyprojectPipeline': 300, -#} +ITEM_PIPELINES = { + 'myproject.pipelines.FileWritePipeline': 300, +} # Enable and configure the AutoThrottle extension (disabled by default) # See https://doc.scrapy.org/en/latest/topics/autothrottle.html diff --git a/myproject/myproject/spiders/blogitems.py b/myproject/myproject/spiders/blogitems.py index 805681b..973f022 100644 --- a/myproject/myproject/spiders/blogitems.py +++ b/myproject/myproject/spiders/blogitems.py @@ -28,5 +28,4 @@ class BlogItemsSpider(scrapy.Spider): date=date, year=year ) - print(blog_post) - + yield blog_post |