| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # -*- coding: utf-8 -*-
- import os
- import re
- import json
- import httpx
- import asyncio
- import time
- from bs4 import BeautifulSoup
- from ollama import Client as oClient
- from send_matrix import MatrixBot
- key_list = 'web3'
- text_batch = 0
- class AINEWS:
- def create_config_if_not_exists(self):
- # 获取当前文件的目录路径
- current_dir = os.path.dirname(os.path.abspath(__file__))
- # 构建 config.json 文件的完整路径
- config_path = os.path.join(current_dir, 'config.json')
- # 检查 config.json 文件是否存在
- if not os.path.exists(config_path):
- # 如果不存在,创建并写入默认的 JSON 数据
- default_config = {
- "example": {
- "target_url_list": [],
- "role": "",
- "prompt_words": ""
- }
- }
- # 写入 JSON 数据到 config.json 文件
- with open(config_path, 'w', encoding='utf-8') as f:
- json.dump(default_config, f, indent=4)
- print(f"Created {config_path} with default configuration.")
- def save_to_txt(self, url_to_text):
- current_file_path = os.path.dirname(__file__)
- save_file_path = os.path.join(current_file_path, 'save_txt')
- if not os.path.exists(save_file_path):
- os.makedirs(save_file_path)
- file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
- with open(file, 'w', encoding='utf-8') as file:
- file.write(str(url_to_text))
- print(f'txt文件已保存')
- def load_config(self, key):
- config = {}
- if os.path.exists('config.json'):
- with open('config.json', 'r', encoding='utf-8') as f:
- config = json.load(f)
- if not config:
- print('config.json is not exist!')
- exit(0)
- k = config[key]
- return k['target_url_list'], k['prompt_words'], k['role']
- async def get_htmls(self, urls):
- async with httpx.AsyncClient() as client:
- async def get_html(url):
- try:
- print(f'正在打开: {url}')
- # 发送 GET 请求获取页面内容
- response = await client.get(url)
- response.raise_for_status() # 确保请求成功
- # 使用 BeautifulSoup 解析 HTML 内容
- soup = BeautifulSoup(response.text, 'html.parser')
- # 提取纯文本内容
- text = soup.get_text(separator=' ', strip=True)
- # 去除多余的空白字符
- cleaned_text = re.sub(r'\s+', ' ', text).strip()
- return url, cleaned_text
- except Exception as e:
- print(f"Error fetching {url}: {e}")
- return url, ""
- # 使用 asyncio.gather 同时获取所有网站的 HTML
- tasks = [get_html(url) for url in urls]
- results = await asyncio.gather(*tasks)
- # 将结果存储在字典中
- url_to_text = {url: text for url, text in results}
- return url_to_text
- def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
- message = text + '\n\n' + prompt_words
- print(f'use model: {model}')
- try:
- response_iter = oClient(host=host).chat(model=model,
- messages=[
- {'role': 'system', 'content': role},
- {'role': 'user', 'content': message}
- ],
- options={"temperature": temperature},
- stream=False)
- return response_iter['message']['content']
- except Exception as e:
- print(f"\n发生错误: {e}")
- return None
- def process_data(self, result_text, prompt_words, role):
- process_send = []
- if text_batch:
- for k, v in result_text.items():
- response_context = self.call_ollama('http://192.168.31.28:11434', role, v, prompt_words)
- if response_context:
- message = f'{k}\n{response_context}\n'
- process_send.append(message)
- else:
- t = ''
- for k, v in result_text.items():
- t += f'{k}\n{v}\n'
- response_context = self.call_ollama('http://192.168.31.28:11434', role, t, prompt_words)
- if response_context:
- process_send.append(response_context)
- return process_send
- def main(self, target_url_list, prompt_words, role):
- result_text = asyncio.run(self.get_htmls(target_url_list))
- self.save_to_txt(result_text)
- # 创建消息bot实例
- bot = MatrixBot('message-bot', 'aaaAAA111!!!')
- # 准备发送 text
- process_send = self.process_data(result_text, prompt_words, role)
- # 发送消息
- for process_text in process_send:
- bot.send_message(process_text)
- if __name__ == "__main__":
- ainews = AINEWS()
- ainews.create_config_if_not_exists()
- for key in key_list:
- target_url_list, prompt_words, role = ainews.load_config(key)
- ainews.main(target_url_list, prompt_words, role)
- print('done!')
|