11 月之前 · 1e9cd82476
--- a/ai_news/ai_news.py
+++ b/ai_news/ai_news.py
@@ -6,7 +6,6 @@ import uuid
 
				 import httpx
			
 
				 import asyncio
			
 
				 import datetime
			
 
				-import time
			
 
				 from bs4 import BeautifulSoup
			
 
				 from ollama import Client as oClient
			
 
				 from playwright.async_api import async_playwright
			
@@ -129,7 +128,8 @@ class AINEWS:
 
				             exit(0)
			
 
				 
			
 
				         k = config[key]
			
 
				-        return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
			
 
				+        return k
			
 
				+        # return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
			
 
				 
			
 
				     async def get_htmls(self, urls):
			
 
				         # 获取 HTML
			
@@ -182,6 +182,12 @@ class AINEWS:
 
				                     # 导航到指定网址
			
 
				                     await page.goto(url)
			
 
				 
			
 
				+                    # 禁止弹框
			
 
				+                    await self.disable_dialogs(page)
			
 
				+
			
 
				+                    # 调用 disable_images 方法阻止图片加载并隐藏图片
			
 
				+                    await self.disable_images(page)
			
 
				+
			
 
				                     # 滚动页面以加载动态内容
			
 
				                     await self.scroll_to_percentage(page)
			
 
				 
			
@@ -249,6 +255,32 @@ class AINEWS:
 
				         await page.screenshot(path=screenshot_path, full_page=True)
			
 
				         print(f"截图已保存到: {screenshot_path}")
			
 
				 
			
 
				+    async def disable_images(self, page):
			
 
				+        # 调用 JavaScript 函数阻止图片加载并隐藏图片
			
 
				+        await page.evaluate('''() => {
			
 
				+            function disableImages() {
			
 
				+                // 阻止所有图片加载
			
 
				+                document.querySelectorAll('img').forEach(img => {
			
 
				+                    img.src = ''; // 清空 src 属性
			
 
				+                    img.removeAttribute('srcset'); // 移除 srcset 属性（如果有）
			
 
				+                });
			
 
				+
			
 
				+                // 隐藏所有图片
			
 
				+                document.querySelectorAll('img').forEach(img => {
			
 
				+                    img.style.display = 'none';
			
 
				+                });
			
 
				+            }
			
 
				+            disableImages(); // 调用函数
			
 
				+        }''')
			
 
				+
			
 
				+    async def disable_dialogs(self, page):
			
 
				+        # 覆盖 JavaScript 的弹框方法，使其无效
			
 
				+        await page.evaluate('''() => {
			
 
				+            window.alert = () => {};
			
 
				+            window.confirm = () => true; // confirm 默认返回 true
			
 
				+            window.prompt = () => null;  // prompt 默认返回 null
			
 
				+        }''')
			
 
				+
			
 
				     def process_data(self, result_text, prompt_words, role, ai_host):
			
 
				         # 整理获取的数据, 返回准备发送的数据
			
 
				         process_send = []
			
@@ -268,7 +300,15 @@ class AINEWS:
 
				                 process_send.append(response_context)
			
 
				         return process_send
			
 
				 
			
 
				-    def main(self, datetime_file_path, target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key):
			
 
				+    def main(self, config):
			
 
				+        target_url_list = config['target_url_list']
			
 
				+        prompt_words = config['prompt_words']
			
 
				+        role = config['role']
			
 
				+        use_browser = config['use_browser']
			
 
				+        ai_host = config['ai_host']
			
 
				+        message_bot_key = config['message_bot_key']
			
 
				+        use_ai = config['use_ai']
			
 
				+
			
 
				         # 获取所有的网页html内容
			
 
				         if use_browser:
			
 
				             result_text = asyncio.run(self.get_htmls_with_browser(target_url_list, datetime_file_path))
			
@@ -283,15 +323,15 @@ class AINEWS:
 
				             print('无数据, 程序退出')
			
 
				             exit(0)
			
 
				 
			
 
				-        # # 如果只需要保存爬取数据, 不使用 AI, 注释下面
			
 
				-        # # 处理发送 text 数据
			
 
				-        # process_send = self.process_data(result_text, prompt_words, role, ai_host)
			
 
				-        #
			
 
				-        # # 创建消息bot实例
			
 
				-        # bot = MatrixBot('message-bot', 'aaaAAA111!!!', message_bot_key)
			
 
				-        # # 发送消息
			
 
				-        # for process_text in process_send:
			
 
				-        #     bot.send_message(process_text)
			
 
				+        if use_ai:
			
 
				+            # 处理发送 text 数据
			
 
				+            process_send = self.process_data(result_text, prompt_words, role, ai_host)
			
 
				+
			
 
				+            # 创建消息bot实例
			
 
				+            bot = MatrixBot('message-bot', 'aaaAAA111!!!', message_bot_key)
			
 
				+            # 发送消息
			
 
				+            for process_text in process_send:
			
 
				+                bot.send_message(process_text)
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
@@ -300,7 +340,9 @@ if __name__ == "__main__":
 
				     datetime_file_path = ainews.mkdir_save_data()
			
 
				 
			
 
				     for key in key_list:
			
 
				-        target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key = ainews.load_config(key)
			
 
				+        config = ainews.load_config(key)
			
 
				+        target_url_list = config['target_url_list']
			
 
				+
			
 
				         print(f'关键词 {key} 共有 {len(target_url_list)} 个网址')
			
 
				-        ainews.main(datetime_file_path, target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key)
			
 
				+        ainews.main(config)
			
 
				     print('done!')
			
--- a/ai_news/config.json
+++ b/ai_news/config.json
@@ -3,6 +3,7 @@
 
				     "use_browser": 0,
			
 
				     "ai_host": "http://127.0.0.1:11434",
			
 
				     "message_bot_key": "",
			
 
				+    "use_ai": 1,
			
 
				     "target_url_list": [],
			
 
				     "prompt_words": "",
			
 
				     "role": ""
			
@@ -11,6 +12,7 @@
 
				     "use_browser": 1,
			
 
				     "ai_host": "http://home.erhe.link:36001",
			
 
				     "message_bot_key": "!CgWvWEnLbKYvhXLvil:chat.abeginner.cn",
			
 
				+    "use_ai": 1,
			
 
				     "target_url_list": [
			
 
				       "https://wublock123.com",
			
 
				       "https://m.odaily.news/newsflash",
			
@@ -20,7 +22,9 @@
 
				       "https://www.jinse.cn/lives",
			
 
				       "https://www.techflowpost.com/article/index.html",
			
 
				       "https://foresightnews.pro/news",
			
 
				-      "https://www.web3sj.com/news/"
			
 
				+      "https://www.web3sj.com/news/",
			
 
				+      "https://www.techflowpost.com/newsletter/index.html",
			
 
				+      "https://www.theblockbeats.info/newsflash?type=2"
			
 
				     ],
			
 
				     "role": "你是一个新闻报播员, 负责理解和报播新闻, 请用中文回答",
			
 
				     "prompt_words": "这是最近的数字币相关的新闻, 你现在是一个新闻报播员, 帮我整理并总结以上新闻, 分析一下数字币接下来大概得走势, 再在此基础上, 给每一条新闻做一个情感分析, 最后输出一下总得分"
			
@@ -28,7 +32,8 @@
 
				   "A": {
			
 
				     "use_browser": 1,
			
 
				     "ai_host": "http://home.erhe.link:36001",
			
 
				-    "message_bot_key": "",
			
 
				+    "message_bot_key": "!CgWvWEnLbKYvhXLvil:chat.abeginner.cn",
			
 
				+    "use_ai": 1,
			
 
				     "target_url_list": [
			
 
				       "https://www.eastmoney.com/",
			
 
				       "https://www.jrj.com.cn/",