jack преди 10 месеца
ревизия
f07550d9d4
променени са 2 файла, в които са добавени 98 реда и са изтрити 0 реда
  1. 67 0
      .gitignore
  2. 31 0
      main.py

+ 67 - 0
.gitignore

@@ -0,0 +1,67 @@
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+.idea/*
+xml_files/
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+other/split_clash_config/split_config
+ai_news/save_data
+
+manual/clash/clash_each_node
+manual/singbox/singbox_each_node

+ 31 - 0
main.py

@@ -0,0 +1,31 @@
+import asyncio
+from crawl4ai import AsyncWebCrawler
+from bs4 import BeautifulSoup
+
+
+def jscode():
+    js_code = ["""
+            const scrollInterval = setInterval(() => {
+                window.scrollTo(0, document.body.scrollHeight);
+            }, 200);
+            setTimeout(() => {
+                clearInterval(scrollInterval);
+            }, 10000);
+            """]
+
+    return js_code
+
+
+async def main():
+    async with AsyncWebCrawler(verbose=True, proxy="http://127.0.0.1:7890") as crawler:
+        result = await crawler.arun(
+            url="https://www.chaincatcher.com/news",
+            cache_mode=True,
+            js_code=jscode(),
+        )
+        soup = BeautifulSoup(result.html, "html.parser")
+        print(soup.prettify())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())