blob: 04c057b8a54fdc3719cf6a71466dbebcccde9808 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class LinkFinderSpider(scrapy.spiders.CrawlSpider):
name = 'linkfinder'
allowed_domains = ['www.codyhiar.com']
start_urls = ['https://codyhiar.com/']
rules = (Rule(LinkExtractor(allow=()), process_links='print_internal_links'),)
def print_internal_links(self, links):
for link in links:
if 'codyhiar.com' in link.url:
print(link.url)
return links
|