aboutsummaryrefslogtreecommitdiff
path: root/check_links.py
blob: 2c84de053b8e83660224170ae2d0a4eb95b51ea3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import asyncio

import aiohttp
import markdown
from bs4 import BeautifulSoup

with open("README.md") as f:
    body_markdown = f.read()
html_page = markdown.markdown(body_markdown)


print("Gathering links...")
urls = []
soup = BeautifulSoup(html_page, features="lxml")
for link in soup.findAll("a"):
    urls.append(link.get("href"))


async def get_url_response(session, url):
    try:
        async with session.get(url) as resp:
            return (resp.status, url)
    except Exception:
        return (000, url)


async def main():
    timeout = aiohttp.ClientTimeout(total=30)
    async with aiohttp.ClientSession(timeout=timeout) as session:
        tasks = []
        for url in urls:
            tasks.append(asyncio.ensure_future(get_url_response(session, url)))

        print("Checking links...")
        responses = await asyncio.gather(*tasks)
        for _, (resp, url) in enumerate(responses):
            if resp != 200:
                print(resp, url)


loop = asyncio.get_event_loop()
loop.run_until_complete(main())