aboutsummaryrefslogtreecommitdiff
path: root/myproject/myproject/spiders/linkfinder.py
diff options
context:
space:
mode:
Diffstat (limited to 'myproject/myproject/spiders/linkfinder.py')
-rw-r--r--myproject/myproject/spiders/linkfinder.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/myproject/myproject/spiders/linkfinder.py b/myproject/myproject/spiders/linkfinder.py
new file mode 100644
index 0000000..04c057b
--- /dev/null
+++ b/myproject/myproject/spiders/linkfinder.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from scrapy.spiders import CrawlSpider, Rule
+from scrapy.linkextractors import LinkExtractor
+
+
+class LinkFinderSpider(scrapy.spiders.CrawlSpider):
+ name = 'linkfinder'
+ allowed_domains = ['www.codyhiar.com']
+ start_urls = ['https://codyhiar.com/']
+
+ rules = (Rule(LinkExtractor(allow=()), process_links='print_internal_links'),)
+
+ def print_internal_links(self, links):
+ for link in links:
+ if 'codyhiar.com' in link.url:
+ print(link.url)
+ return links