aboutsummaryrefslogtreecommitdiff
path: root/myproject/myproject/spiders/linkfinder.py
blob: 04c057b8a54fdc3719cf6a71466dbebcccde9808 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor


class LinkFinderSpider(scrapy.spiders.CrawlSpider):
    name = 'linkfinder'
    allowed_domains = ['www.codyhiar.com']
    start_urls = ['https://codyhiar.com/']

    rules = (Rule(LinkExtractor(allow=()), process_links='print_internal_links'),)

    def print_internal_links(self, links):
        for link in links:
            if 'codyhiar.com' in link.url:
                print(link.url)
        return links