| 12345678910111213141516171819202122232425262728293031 |
- import asyncio
- from crawl4ai import AsyncWebCrawler
- from bs4 import BeautifulSoup
- def jscode():
- js_code = ["""
- const scrollInterval = setInterval(() => {
- window.scrollTo(0, document.body.scrollHeight);
- }, 200);
- setTimeout(() => {
- clearInterval(scrollInterval);
- }, 10000);
- """]
- return js_code
- async def main():
- async with AsyncWebCrawler(verbose=True, proxy="http://127.0.0.1:7890") as crawler:
- result = await crawler.arun(
- url="https://www.chaincatcher.com/news",
- cache_mode=True,
- js_code=jscode(),
- )
- soup = BeautifulSoup(result.html, "html.parser")
- print(soup.prettify())
- if __name__ == "__main__":
- asyncio.run(main())
|