From 455c7222b52e7c3aa7f4eb4b03e9b6f0a99adaef Mon Sep 17 00:00:00 2001 From: NissesSenap Date: Fri, 5 Oct 2018 22:23:49 +0200 Subject: [PATCH] Use self.loop.create_task Creating a class and make the eventloop avliable with self. Call it in the get_title_range function. --- .../web_scraping/async_scrape/program.py | 72 ++++++++----------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/src/04-asyncio/web_scraping/async_scrape/program.py b/src/04-asyncio/web_scraping/async_scrape/program.py index 275fccd..ef04965 100644 --- a/src/04-asyncio/web_scraping/async_scrape/program.py +++ b/src/04-asyncio/web_scraping/async_scrape/program.py @@ -1,58 +1,48 @@ -import asyncio - import aiohttp import bs4 from colorama import Fore +import asyncio -async def get_html(episode_number: int) -> str: - print(Fore.YELLOW + f"Getting HTML for episode {episode_number}", flush=True) - - url = f'https://talkpython.fm/{episode_number}' - - async with aiohttp.ClientSession() as session: - async with session.get(url) as resp: - resp.raise_for_status() - - return await resp.text() - - -def get_title(html: str, episode_number: int) -> str: - print(Fore.CYAN + f"Getting TITLE for episode {episode_number}", flush=True) - soup = bs4.BeautifulSoup(html, 'html.parser') - header = soup.select_one('h1') - if not header: - return "MISSING" +class Scraper: - return header.text.strip() + def __init__(self): + self.loop = asyncio.get_event_loop() + self.loop.run_until_complete(self.get_title_range()) + async def get_html(self, episode_number: int) -> str: + print(Fore.YELLOW + f"Getting HTML for episode {episode_number}", flush=True) -def main(): - loop = asyncio.get_event_loop() - loop.run_until_complete(get_title_range()) - print("Done.") + url = f'https://talkpython.fm/{episode_number}' + # resp = requests.get(url) + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + resp.raise_for_status() + return await resp.text() -async def get_title_range_old_version(): - # Please keep this range pretty small to not DDoS my site. ;) - for n in range(150, 160): - html = await get_html(n) - title = get_title(html, n) - print(Fore.WHITE + f"Title found: {title}", flush=True) + def get_title(self, html: str, episode_number: int) -> str: + print(Fore.CYAN + f"Getting TITLE for episode {episode_number}", flush=True) + soup = bs4.BeautifulSoup(html, 'html.parser') + header = soup.select_one('h1') + if not header: + return "MISSING" + return header.text.strip() -async def get_title_range(): - # Please keep this range pretty small to not DDoS my site. ;) + async def get_title_range(self): + # Please keep this range pretty small to not DDoS my site. ;) - tasks = [] - for n in range(150, 160): - tasks.append((n, asyncio.create_task(get_html(n)))) + tasks = [] + for n in range(150, 160): + tasks.append((n, self.loop.create_task(self.get_html(n)))) - for n, t in tasks: - html = await t - title = get_title(html, n) - print(Fore.WHITE + f"Title found: {title}", flush=True) + for n, t in tasks: + html = await t + title = self.get_title(html, n) + print(Fore.WHITE + f"Title found: {title}", flush=True) if __name__ == '__main__': - main() + Scraper() +