# -*- coding: utf-8 -*- import os import re import json import httpx import asyncio import time from bs4 import BeautifulSoup from ollama import Client as oClient from send_matrix import MatrixBot key_list = 'web3' text_batch = 0 class AINEWS: def create_config_if_not_exists(self): # 获取当前文件的目录路径 current_dir = os.path.dirname(os.path.abspath(__file__)) # 构建 config.json 文件的完整路径 config_path = os.path.join(current_dir, 'config.json') # 检查 config.json 文件是否存在 if not os.path.exists(config_path): # 如果不存在,创建并写入默认的 JSON 数据 default_config = { "example": { "target_url_list": [], "role": "", "prompt_words": "" } } # 写入 JSON 数据到 config.json 文件 with open(config_path, 'w', encoding='utf-8') as f: json.dump(default_config, f, indent=4) print(f"Created {config_path} with default configuration.") def save_to_txt(self, url_to_text): current_file_path = os.path.dirname(__file__) save_file_path = os.path.join(current_file_path, 'save_txt') if not os.path.exists(save_file_path): os.makedirs(save_file_path) file = os.path.join(save_file_path, str(int(time.time())) + '.txt') with open(file, 'w', encoding='utf-8') as file: file.write(str(url_to_text)) print(f'txt文件已保存') def load_config(self, key): config = {} if os.path.exists('config.json'): with open('config.json', 'r', encoding='utf-8') as f: config = json.load(f) if not config: print('config.json is not exist!') exit(0) k = config[key] return k['target_url_list'], k['prompt_words'], k['role'] async def get_htmls(self, urls): async with httpx.AsyncClient() as client: async def get_html(url): try: print(f'正在打开: {url}') # 发送 GET 请求获取页面内容 response = await client.get(url) response.raise_for_status() # 确保请求成功 # 使用 BeautifulSoup 解析 HTML 内容 soup = BeautifulSoup(response.text, 'html.parser') # 提取纯文本内容 text = soup.get_text(separator=' ', strip=True) # 去除多余的空白字符 cleaned_text = re.sub(r'\s+', ' ', text).strip() return url, cleaned_text except Exception as e: print(f"Error fetching {url}: {e}") return url, "" # 使用 asyncio.gather 同时获取所有网站的 HTML tasks = [get_html(url) for url in urls] results = await asyncio.gather(*tasks) # 将结果存储在字典中 url_to_text = {url: text for url, text in results} return url_to_text def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4): message = text + '\n\n' + prompt_words print(f'use model: {model}') try: response_iter = oClient(host=host).chat(model=model, messages=[ {'role': 'system', 'content': role}, {'role': 'user', 'content': message} ], options={"temperature": temperature}, stream=False) return response_iter['message']['content'] except Exception as e: print(f"\n发生错误: {e}") return None def process_data(self, result_text, prompt_words, role): process_send = [] if text_batch: for k, v in result_text.items(): response_context = self.call_ollama('http://192.168.31.28:11434', role, v, prompt_words) if response_context: message = f'{k}\n{response_context}\n' process_send.append(message) else: t = '' for k, v in result_text.items(): t += f'{k}\n{v}\n' response_context = self.call_ollama('http://192.168.31.28:11434', role, t, prompt_words) if response_context: process_send.append(response_context) return process_send def main(self, target_url_list, prompt_words, role): result_text = asyncio.run(self.get_htmls(target_url_list)) self.save_to_txt(result_text) # 创建消息bot实例 bot = MatrixBot('message-bot', 'aaaAAA111!!!') # 准备发送 text process_send = self.process_data(result_text, prompt_words, role) # 发送消息 for process_text in process_send: bot.send_message(process_text) if __name__ == "__main__": ainews = AINEWS() ainews.create_config_if_not_exists() for key in key_list: target_url_list, prompt_words, role = ainews.load_config(key) ainews.main(target_url_list, prompt_words, role) print('done!')