aboutsummaryrefslogtreecommitdiff
path: root/myproject/myproject/spiders/blogitems.py
diff options
context:
space:
mode:
Diffstat (limited to 'myproject/myproject/spiders/blogitems.py')
-rw-r--r--myproject/myproject/spiders/blogitems.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/myproject/myproject/spiders/blogitems.py b/myproject/myproject/spiders/blogitems.py
new file mode 100644
index 0000000..805681b
--- /dev/null
+++ b/myproject/myproject/spiders/blogitems.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+import scrapy
+
+from ..items import BlogPostItem
+
+
+class BlogItemsSpider(scrapy.Spider):
+ name = 'blogitems'
+ allowed_domains = ['codyhiar.com']
+ start_urls = ['http://codyhiar.com/']
+
+ def __init__(self, tag=None, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.start_urls = ['https://www.codyhiar.com/tags/{}'.format(tag)]
+
+ def parse(self, response):
+ # from scrapy.shell import inspect_response
+ # inspect_response(response, self)
+ year_archives = response.css('.archive')
+ for year_archive in year_archives:
+ year = year_archive.css('h3::text').extract_first()
+ posts = year_archive.css('.post-item')
+ for post in posts:
+ title = post.css('a::text').extract_first().strip()
+ date = post.css('.post-time::text').extract_first()
+ blog_post = BlogPostItem(
+ title=title,
+ date=date,
+ year=year
+ )
+ print(blog_post)
+