diff options
Diffstat (limited to 'myproject/myproject/spiders/linkfinder.py')
-rw-r--r-- | myproject/myproject/spiders/linkfinder.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/myproject/myproject/spiders/linkfinder.py b/myproject/myproject/spiders/linkfinder.py new file mode 100644 index 0000000..04c057b --- /dev/null +++ b/myproject/myproject/spiders/linkfinder.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +import scrapy +from scrapy.spiders import CrawlSpider, Rule +from scrapy.linkextractors import LinkExtractor + + +class LinkFinderSpider(scrapy.spiders.CrawlSpider): + name = 'linkfinder' + allowed_domains = ['www.codyhiar.com'] + start_urls = ['https://codyhiar.com/'] + + rules = (Rule(LinkExtractor(allow=()), process_links='print_internal_links'),) + + def print_internal_links(self, links): + for link in links: + if 'codyhiar.com' in link.url: + print(link.url) + return links |