diff options
author | Cody Hiar <cody@hiar.ca> | 2023-03-13 10:44:49 -0600 |
---|---|---|
committer | Cody Hiar <cody@hiar.ca> | 2023-03-13 10:44:49 -0600 |
commit | f69fe236e09bd3d34975335b091142804001b4ab (patch) | |
tree | 389271d4cc1f6ac169a6a53a5924c4c1c8fc4fc5 /check_links.py |
Initial commit
Diffstat (limited to 'check_links.py')
-rw-r--r-- | check_links.py | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/check_links.py b/check_links.py new file mode 100644 index 0000000..46feb0f --- /dev/null +++ b/check_links.py @@ -0,0 +1,41 @@ +import asyncio + +import aiohttp +import markdown +from bs4 import BeautifulSoup + +with open("README.md") as f: + body_markdown = f.read() +html_page = markdown.markdown(body_markdown) + + +print("Gathering links...") +urls = [] +soup = BeautifulSoup(html_page, features="lxml") +for link in soup.findAll("a"): + urls.append(link.get("href")) + + +async def get_url_response(session, url): + try: + async with session.get(url) as resp: + return (resp.status, url) + except Exception: + return (000, url) + + +async def main(): + timeout = aiohttp.ClientTimeout(total=20) + async with aiohttp.ClientSession(timeout=timeout) as session: + tasks = [] + for url in urls: + tasks.append(asyncio.ensure_future(get_url_response(session, url))) + + print("Checking links...") + responses = await asyncio.gather(*tasks) + for _, (resp, url) in enumerate(responses): + if resp != 200: + print(resp, url) + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) |