blob: 2c84de053b8e83660224170ae2d0a4eb95b51ea3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
import asyncio
import aiohttp
import markdown
from bs4 import BeautifulSoup
with open("README.md") as f:
body_markdown = f.read()
html_page = markdown.markdown(body_markdown)
print("Gathering links...")
urls = []
soup = BeautifulSoup(html_page, features="lxml")
for link in soup.findAll("a"):
urls.append(link.get("href"))
async def get_url_response(session, url):
try:
async with session.get(url) as resp:
return (resp.status, url)
except Exception:
return (000, url)
async def main():
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = []
for url in urls:
tasks.append(asyncio.ensure_future(get_url_response(session, url)))
print("Checking links...")
responses = await asyncio.gather(*tasks)
for _, (resp, url) in enumerate(responses):
if resp != 200:
print(resp, url)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
|