aboutsummaryrefslogtreecommitdiff
path: root/check_links.py
diff options
context:
space:
mode:
authorCody Hiar <cody@hiar.ca>2023-03-13 10:44:49 -0600
committerCody Hiar <cody@hiar.ca>2023-03-13 10:44:49 -0600
commitf69fe236e09bd3d34975335b091142804001b4ab (patch)
tree389271d4cc1f6ac169a6a53a5924c4c1c8fc4fc5 /check_links.py
Initial commit
Diffstat (limited to 'check_links.py')
-rw-r--r--check_links.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/check_links.py b/check_links.py
new file mode 100644
index 0000000..46feb0f
--- /dev/null
+++ b/check_links.py
@@ -0,0 +1,41 @@
+import asyncio
+
+import aiohttp
+import markdown
+from bs4 import BeautifulSoup
+
+with open("README.md") as f:
+ body_markdown = f.read()
+html_page = markdown.markdown(body_markdown)
+
+
+print("Gathering links...")
+urls = []
+soup = BeautifulSoup(html_page, features="lxml")
+for link in soup.findAll("a"):
+ urls.append(link.get("href"))
+
+
+async def get_url_response(session, url):
+ try:
+ async with session.get(url) as resp:
+ return (resp.status, url)
+ except Exception:
+ return (000, url)
+
+
+async def main():
+ timeout = aiohttp.ClientTimeout(total=20)
+ async with aiohttp.ClientSession(timeout=timeout) as session:
+ tasks = []
+ for url in urls:
+ tasks.append(asyncio.ensure_future(get_url_response(session, url)))
+
+ print("Checking links...")
+ responses = await asyncio.gather(*tasks)
+ for _, (resp, url) in enumerate(responses):
+ if resp != 200:
+ print(resp, url)
+
+loop = asyncio.get_event_loop()
+loop.run_until_complete(main())