main.py 803 B

12345678910111213141516171819202122232425262728293031
  1. import asyncio
  2. from crawl4ai import AsyncWebCrawler
  3. from bs4 import BeautifulSoup
  4. def jscode():
  5. js_code = ["""
  6. const scrollInterval = setInterval(() => {
  7. window.scrollTo(0, document.body.scrollHeight);
  8. }, 200);
  9. setTimeout(() => {
  10. clearInterval(scrollInterval);
  11. }, 10000);
  12. """]
  13. return js_code
  14. async def main():
  15. async with AsyncWebCrawler(verbose=True, proxy="http://127.0.0.1:7890") as crawler:
  16. result = await crawler.arun(
  17. url="https://www.chaincatcher.com/news",
  18. cache_mode=True,
  19. js_code=jscode(),
  20. )
  21. soup = BeautifulSoup(result.html, "html.parser")
  22. print(soup.prettify())
  23. if __name__ == "__main__":
  24. asyncio.run(main())