|
|
@@ -6,7 +6,6 @@ import uuid
|
|
|
import httpx
|
|
|
import asyncio
|
|
|
import datetime
|
|
|
-import time
|
|
|
from bs4 import BeautifulSoup
|
|
|
from ollama import Client as oClient
|
|
|
from playwright.async_api import async_playwright
|
|
|
@@ -129,7 +128,8 @@ class AINEWS:
|
|
|
exit(0)
|
|
|
|
|
|
k = config[key]
|
|
|
- return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
|
|
|
+ return k
|
|
|
+ # return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
|
|
|
|
|
|
async def get_htmls(self, urls):
|
|
|
# 获取 HTML
|
|
|
@@ -182,6 +182,12 @@ class AINEWS:
|
|
|
# 导航到指定网址
|
|
|
await page.goto(url)
|
|
|
|
|
|
+ # 禁止弹框
|
|
|
+ await self.disable_dialogs(page)
|
|
|
+
|
|
|
+ # 调用 disable_images 方法阻止图片加载并隐藏图片
|
|
|
+ await self.disable_images(page)
|
|
|
+
|
|
|
# 滚动页面以加载动态内容
|
|
|
await self.scroll_to_percentage(page)
|
|
|
|
|
|
@@ -249,6 +255,32 @@ class AINEWS:
|
|
|
await page.screenshot(path=screenshot_path, full_page=True)
|
|
|
print(f"截图已保存到: {screenshot_path}")
|
|
|
|
|
|
+ async def disable_images(self, page):
|
|
|
+ # 调用 JavaScript 函数阻止图片加载并隐藏图片
|
|
|
+ await page.evaluate('''() => {
|
|
|
+ function disableImages() {
|
|
|
+ // 阻止所有图片加载
|
|
|
+ document.querySelectorAll('img').forEach(img => {
|
|
|
+ img.src = ''; // 清空 src 属性
|
|
|
+ img.removeAttribute('srcset'); // 移除 srcset 属性(如果有)
|
|
|
+ });
|
|
|
+
|
|
|
+ // 隐藏所有图片
|
|
|
+ document.querySelectorAll('img').forEach(img => {
|
|
|
+ img.style.display = 'none';
|
|
|
+ });
|
|
|
+ }
|
|
|
+ disableImages(); // 调用函数
|
|
|
+ }''')
|
|
|
+
|
|
|
+ async def disable_dialogs(self, page):
|
|
|
+ # 覆盖 JavaScript 的弹框方法,使其无效
|
|
|
+ await page.evaluate('''() => {
|
|
|
+ window.alert = () => {};
|
|
|
+ window.confirm = () => true; // confirm 默认返回 true
|
|
|
+ window.prompt = () => null; // prompt 默认返回 null
|
|
|
+ }''')
|
|
|
+
|
|
|
def process_data(self, result_text, prompt_words, role, ai_host):
|
|
|
# 整理获取的数据, 返回准备发送的数据
|
|
|
process_send = []
|
|
|
@@ -268,7 +300,15 @@ class AINEWS:
|
|
|
process_send.append(response_context)
|
|
|
return process_send
|
|
|
|
|
|
- def main(self, datetime_file_path, target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key):
|
|
|
+ def main(self, config):
|
|
|
+ target_url_list = config['target_url_list']
|
|
|
+ prompt_words = config['prompt_words']
|
|
|
+ role = config['role']
|
|
|
+ use_browser = config['use_browser']
|
|
|
+ ai_host = config['ai_host']
|
|
|
+ message_bot_key = config['message_bot_key']
|
|
|
+ use_ai = config['use_ai']
|
|
|
+
|
|
|
# 获取所有的网页html内容
|
|
|
if use_browser:
|
|
|
result_text = asyncio.run(self.get_htmls_with_browser(target_url_list, datetime_file_path))
|
|
|
@@ -283,15 +323,15 @@ class AINEWS:
|
|
|
print('无数据, 程序退出')
|
|
|
exit(0)
|
|
|
|
|
|
- # # 如果只需要保存爬取数据, 不使用 AI, 注释下面
|
|
|
- # # 处理发送 text 数据
|
|
|
- # process_send = self.process_data(result_text, prompt_words, role, ai_host)
|
|
|
- #
|
|
|
- # # 创建消息bot实例
|
|
|
- # bot = MatrixBot('message-bot', 'aaaAAA111!!!', message_bot_key)
|
|
|
- # # 发送消息
|
|
|
- # for process_text in process_send:
|
|
|
- # bot.send_message(process_text)
|
|
|
+ if use_ai:
|
|
|
+ # 处理发送 text 数据
|
|
|
+ process_send = self.process_data(result_text, prompt_words, role, ai_host)
|
|
|
+
|
|
|
+ # 创建消息bot实例
|
|
|
+ bot = MatrixBot('message-bot', 'aaaAAA111!!!', message_bot_key)
|
|
|
+ # 发送消息
|
|
|
+ for process_text in process_send:
|
|
|
+ bot.send_message(process_text)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
@@ -300,7 +340,9 @@ if __name__ == "__main__":
|
|
|
datetime_file_path = ainews.mkdir_save_data()
|
|
|
|
|
|
for key in key_list:
|
|
|
- target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key = ainews.load_config(key)
|
|
|
+ config = ainews.load_config(key)
|
|
|
+ target_url_list = config['target_url_list']
|
|
|
+
|
|
|
print(f'关键词 {key} 共有 {len(target_url_list)} 个网址')
|
|
|
- ainews.main(datetime_file_path, target_url_list, prompt_words, role, use_browser, ai_host, message_bot_key)
|
|
|
+ ainews.main(config)
|
|
|
print('done!')
|