본문 바로가기
Python

비동기로 웹크롤링

by 앗사비 2023. 4. 1.
728x90
from bs4 import BeautifulSoup as bs
import aiohttp
import asyncio


async def fetch(session, url, i):
    async with session.get(url) as res:
        html = await res.text()
        soup = bs(html, "html.parser")
        tag = soup.find("span", class_="title")  # 첫번째 글제목만
        print(f"{i+1} : {tag.text}")


async def main():
    BASE_URL = "https://myinbox.tistory.com/"
    urls = [f"{BASE_URL}?page={i}" for i in range(1, 11)]
    async with aiohttp.ClientSession() as session:
        await asyncio.gather(*[fetch(session, url, i) for i, url in enumerate(urls)])


if __name__ == "__main__":
    asyncio.run(main())

참고 : https://github.com/amamov/teaching-async-python/blob/main/3-%EB%8F%99%EC%8B%9C%EC%84%B1-%ED%94%84%EB%A1%9C%EA%B7%B8%EB%9E%98%EB%B0%8D%EC%9C%BC%EB%A1%9C-%EB%8D%B0%EC%9D%B4%ED%84%B0-%EC%88%98%EC%A7%91/03-scraping.py

728x90