|
@@ -116,21 +116,51 @@ class AINEWS:
|
|
|
with open(file, 'w', encoding='utf-8') as file:
|
|
with open(file, 'w', encoding='utf-8') as file:
|
|
|
file.write(str(url_to_text))
|
|
file.write(str(url_to_text))
|
|
|
|
|
|
|
|
|
|
+ # region 读取配置文件
|
|
|
def load_config(self, key):
|
|
def load_config(self, key):
|
|
|
# 读取配置文件 config.json
|
|
# 读取配置文件 config.json
|
|
|
|
|
+ # 如果当前路径无 config 则新建 config.json, 并写入一个配置例子
|
|
|
|
|
+ current_dir = os.path.dirname(os.path.abspath(__file__)) # 获取当前文件的目录路径
|
|
|
|
|
+ # 构建 config.json 文件的完整路径
|
|
|
|
|
+ config_path = os.path.join(current_dir, 'config.json')
|
|
|
|
|
+
|
|
|
config = {}
|
|
config = {}
|
|
|
|
|
+
|
|
|
if os.path.exists('config.json'):
|
|
if os.path.exists('config.json'):
|
|
|
with open('config.json', 'r', encoding='utf-8') as f:
|
|
with open('config.json', 'r', encoding='utf-8') as f:
|
|
|
config = json.load(f)
|
|
config = json.load(f)
|
|
|
|
|
|
|
|
if not config:
|
|
if not config:
|
|
|
- print('config.json is not exist!')
|
|
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ # 检查 config.json 文件是否存在
|
|
|
|
|
+ if not os.path.exists(config_path):
|
|
|
|
|
+ # 如果不存在,创建并写入默认的 JSON 数据
|
|
|
|
|
+ default_config = {
|
|
|
|
|
+ "example": {
|
|
|
|
|
+ "use_browser": 0,
|
|
|
|
|
+ "ai_host": 'http://127.0.0.1:11434(不需要此功能留空)',
|
|
|
|
|
+ "message_bot_key": '填入matrix的key(不需要此功能留空)',
|
|
|
|
|
+ "target_url_list": ['目标网站'],
|
|
|
|
|
+ "role": "AI的角色, 例如: 你是一个聊天机器人",
|
|
|
|
|
+ "prompt_words": "提示词: 帮我总结, 用中文回复"
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 写入 JSON 数据到 config.json 文件
|
|
|
|
|
+ with open(config_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
+ json.dump(default_config, f, indent=4)
|
|
|
|
|
+
|
|
|
|
|
+ print(f"Created {config_path} with default configuration.")
|
|
|
|
|
+
|
|
|
exit(0)
|
|
exit(0)
|
|
|
|
|
|
|
|
k = config[key]
|
|
k = config[key]
|
|
|
return k
|
|
return k
|
|
|
# return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
|
|
# return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host'], k['message_bot_key']
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 使用httpx获取网页内容
|
|
|
async def get_htmls(self, urls):
|
|
async def get_htmls(self, urls):
|
|
|
# 获取 HTML
|
|
# 获取 HTML
|
|
|
async with httpx.AsyncClient() as client:
|
|
async with httpx.AsyncClient() as client:
|
|
@@ -163,14 +193,16 @@ class AINEWS:
|
|
|
url_to_text = {url: text for url, text in results}
|
|
url_to_text = {url: text for url, text in results}
|
|
|
|
|
|
|
|
return url_to_text
|
|
return url_to_text
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 使用Playwright获取HTML内容
|
|
|
async def get_htmls_with_browser(self, urls, datetime_file_path):
|
|
async def get_htmls_with_browser(self, urls, datetime_file_path):
|
|
|
# 使用 Playwright 获取 HTML 内容
|
|
# 使用 Playwright 获取 HTML 内容
|
|
|
url_to_text = {}
|
|
url_to_text = {}
|
|
|
|
|
|
|
|
async with async_playwright() as p:
|
|
async with async_playwright() as p:
|
|
|
# 启动浏览器
|
|
# 启动浏览器
|
|
|
- browser = await p.chromium.launch(headless=False)
|
|
|
|
|
|
|
+ browser = await p.chromium.launch(headless=True)
|
|
|
# 创建浏览器上下文
|
|
# 创建浏览器上下文
|
|
|
context = await browser.new_context()
|
|
context = await browser.new_context()
|
|
|
|
|
|
|
@@ -221,7 +253,9 @@ class AINEWS:
|
|
|
await browser.close()
|
|
await browser.close()
|
|
|
|
|
|
|
|
return url_to_text
|
|
return url_to_text
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 滚动页面
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
async def scroll_to_percentage(page):
|
|
async def scroll_to_percentage(page):
|
|
|
# 获取页面标题并打印
|
|
# 获取页面标题并打印
|
|
@@ -237,7 +271,9 @@ class AINEWS:
|
|
|
await page.evaluate(f"window.scrollTo({{top: {scroll_position}, behavior: 'smooth'}})")
|
|
await page.evaluate(f"window.scrollTo({{top: {scroll_position}, behavior: 'smooth'}})")
|
|
|
await asyncio.sleep(0.5) # 使用异步 sleep
|
|
await asyncio.sleep(0.5) # 使用异步 sleep
|
|
|
await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
|
|
await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 网页截图
|
|
|
@staticmethod
|
|
@staticmethod
|
|
|
async def screenshot(page, datetime_file_path):
|
|
async def screenshot(page, datetime_file_path):
|
|
|
# 顺手截图
|
|
# 顺手截图
|
|
@@ -254,7 +290,9 @@ class AINEWS:
|
|
|
# 进行整页截图
|
|
# 进行整页截图
|
|
|
await page.screenshot(path=screenshot_path, full_page=True)
|
|
await page.screenshot(path=screenshot_path, full_page=True)
|
|
|
print(f"截图已保存到: {screenshot_path}")
|
|
print(f"截图已保存到: {screenshot_path}")
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 禁止网页显示图片
|
|
|
async def disable_images(self, page):
|
|
async def disable_images(self, page):
|
|
|
# 调用 JavaScript 函数阻止图片加载并隐藏图片
|
|
# 调用 JavaScript 函数阻止图片加载并隐藏图片
|
|
|
await page.evaluate('''() => {
|
|
await page.evaluate('''() => {
|
|
@@ -272,7 +310,9 @@ class AINEWS:
|
|
|
}
|
|
}
|
|
|
disableImages(); // 调用函数
|
|
disableImages(); // 调用函数
|
|
|
}''')
|
|
}''')
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
|
|
+ # region 覆盖JavaScript的弹框方法,使其无效
|
|
|
async def disable_dialogs(self, page):
|
|
async def disable_dialogs(self, page):
|
|
|
# 覆盖 JavaScript 的弹框方法,使其无效
|
|
# 覆盖 JavaScript 的弹框方法,使其无效
|
|
|
await page.evaluate('''() => {
|
|
await page.evaluate('''() => {
|
|
@@ -281,6 +321,9 @@ class AINEWS:
|
|
|
window.prompt = () => null; // prompt 默认返回 null
|
|
window.prompt = () => null; // prompt 默认返回 null
|
|
|
}''')
|
|
}''')
|
|
|
|
|
|
|
|
|
|
+ # endregion
|
|
|
|
|
+
|
|
|
|
|
+ # region AI处理数据
|
|
|
def process_data(self, result_text, prompt_words, role, ai_host):
|
|
def process_data(self, result_text, prompt_words, role, ai_host):
|
|
|
# 整理获取的数据, 返回准备发送的数据
|
|
# 整理获取的数据, 返回准备发送的数据
|
|
|
process_send = []
|
|
process_send = []
|
|
@@ -300,6 +343,9 @@ class AINEWS:
|
|
|
process_send.append(response_context)
|
|
process_send.append(response_context)
|
|
|
return process_send
|
|
return process_send
|
|
|
|
|
|
|
|
|
|
+ # endregion
|
|
|
|
|
+
|
|
|
|
|
+ # region 主函数
|
|
|
def main(self, config):
|
|
def main(self, config):
|
|
|
target_url_list = config['target_url_list']
|
|
target_url_list = config['target_url_list']
|
|
|
prompt_words = config['prompt_words']
|
|
prompt_words = config['prompt_words']
|
|
@@ -332,7 +378,7 @@ class AINEWS:
|
|
|
# 发送消息
|
|
# 发送消息
|
|
|
for process_text in process_send:
|
|
for process_text in process_send:
|
|
|
bot.send_message(process_text)
|
|
bot.send_message(process_text)
|
|
|
-
|
|
|
|
|
|
|
+ # endregion
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
ainews = AINEWS()
|
|
ainews = AINEWS()
|