|
|
@@ -0,0 +1,31 @@
|
|
|
+import asyncio
|
|
|
+from crawl4ai import AsyncWebCrawler
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+
|
|
|
+def jscode():
|
|
|
+ js_code = ["""
|
|
|
+ const scrollInterval = setInterval(() => {
|
|
|
+ window.scrollTo(0, document.body.scrollHeight);
|
|
|
+ }, 200);
|
|
|
+ setTimeout(() => {
|
|
|
+ clearInterval(scrollInterval);
|
|
|
+ }, 10000);
|
|
|
+ """]
|
|
|
+
|
|
|
+ return js_code
|
|
|
+
|
|
|
+
|
|
|
+async def main():
|
|
|
+ async with AsyncWebCrawler(verbose=True, proxy="http://127.0.0.1:7890") as crawler:
|
|
|
+ result = await crawler.arun(
|
|
|
+ url="https://www.chaincatcher.com/news",
|
|
|
+ cache_mode=True,
|
|
|
+ js_code=jscode(),
|
|
|
+ )
|
|
|
+ soup = BeautifulSoup(result.html, "html.parser")
|
|
|
+ print(soup.prettify())
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ asyncio.run(main())
|