# -*- coding: utf-8 -*- import scrapy from ..items import BlogPostItem class BlogItemsSpider(scrapy.Spider): name = 'blogitems' allowed_domains = ['codyhiar.com'] start_urls = ['http://codyhiar.com/'] def __init__(self, tag=None, *args, **kwargs): super().__init__(*args, **kwargs) self.start_urls = ['https://www.codyhiar.com/tags/{}'.format(tag)] def parse(self, response): # from scrapy.shell import inspect_response # inspect_response(response, self) year_archives = response.css('.archive') for year_archive in year_archives: year = year_archive.css('h3::text').extract_first() posts = year_archive.css('.post-item') for post in posts: title = post.css('a::text').extract_first().strip() date = post.css('.post-time::text').extract_first() blog_post = BlogPostItem( title=title, date=date, year=year ) yield blog_post