toor
/
AI_NEWS


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
							# -*- coding: utf-8 -*-
import os
import re
import json
import httpx
import asyncio
import time
from bs4 import BeautifulSoup
from ollama import Client as oClient
from send_matrix import MatrixBot

key_list = 'web3'
text_batch = 0


class AINEWS:
    def create_config_if_not_exists(self):
        # 获取当前文件的目录路径
        current_dir = os.path.dirname(os.path.abspath(__file__))

        # 构建 config.json 文件的完整路径
        config_path = os.path.join(current_dir, 'config.json')

        # 检查 config.json 文件是否存在
        if not os.path.exists(config_path):
            # 如果不存在，创建并写入默认的 JSON 数据
            default_config = {
                "example": {
                    "target_url_list": [],
                    "role": "",
                    "prompt_words": ""
                }
            }

            # 写入 JSON 数据到 config.json 文件
            with open(config_path, 'w', encoding='utf-8') as f:
                json.dump(default_config, f, indent=4)

            print(f"Created {config_path} with default configuration.")

    def save_to_txt(self, url_to_text):
        current_file_path = os.path.dirname(__file__)
        save_file_path = os.path.join(current_file_path, 'save_txt')
        if not os.path.exists(save_file_path):
            os.makedirs(save_file_path)
        file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
        with open(file, 'w', encoding='utf-8') as file:
            file.write(str(url_to_text))
        print(f'txt文件已保存')

    def load_config(self, key):
        config = {}
        if os.path.exists('config.json'):
            with open('config.json', 'r', encoding='utf-8') as f:
                config = json.load(f)

        if not config:
            print('config.json is not exist!')
            exit(0)

        k = config[key]
        return k['target_url_list'], k['prompt_words'], k['role']

    async def get_htmls(self, urls):
        async with httpx.AsyncClient() as client:
            async def get_html(url):
                try:
                    print(f'正在打开: {url}')
                    # 发送 GET 请求获取页面内容
                    response = await client.get(url)
                    response.raise_for_status()  # 确保请求成功

                    # 使用 BeautifulSoup 解析 HTML 内容
                    soup = BeautifulSoup(response.text, 'html.parser')

                    # 提取纯文本内容
                    text = soup.get_text(separator=' ', strip=True)

                    # 去除多余的空白字符
                    cleaned_text = re.sub(r'\s+', ' ', text).strip()

                    return url, cleaned_text
                except Exception as e:
                    print(f"Error fetching {url}: {e}")
                    return url, ""

            # 使用 asyncio.gather 同时获取所有网站的 HTML
            tasks = [get_html(url) for url in urls]
            results = await asyncio.gather(*tasks)

            # 将结果存储在字典中
            url_to_text = {url: text for url, text in results}

            return url_to_text

    def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
        message = text + '\n\n' + prompt_words
        print(f'use model: {model}')
        try:
            response_iter = oClient(host=host).chat(model=model,
                                                    messages=[
                                                        {'role': 'system', 'content': role},
                                                        {'role': 'user', 'content': message}
                                                    ],
                                                    options={"temperature": temperature},
                                                    stream=False)
            return response_iter['message']['content']
        except Exception as e:
            print(f"\n发生错误: {e}")
            return None

    def process_data(self, result_text, prompt_words, role):
        process_send = []
        if text_batch:
            for k, v in result_text.items():
                response_context = self.call_ollama('http://192.168.31.28:11434', role, v, prompt_words)
                if response_context:
                    message = f'{k}\n{response_context}\n'
                    process_send.append(message)
        else:
            t = ''
            for k, v in result_text.items():
                t += f'{k}\n{v}\n'
            response_context = self.call_ollama('http://192.168.31.28:11434', role, t, prompt_words)
            if response_context:
                process_send.append(response_context)
        return process_send

    def main(self, target_url_list, prompt_words, role):
        result_text = asyncio.run(self.get_htmls(target_url_list))
        self.save_to_txt(result_text)

        # 创建消息bot实例
        bot = MatrixBot('message-bot', 'aaaAAA111!!!')

        # 准备发送 text
        process_send = self.process_data(result_text, prompt_words, role)

        # 发送消息
        for process_text in process_send:
            bot.send_message(process_text)


if __name__ == "__main__":
    ainews = AINEWS()
    ainews.create_config_if_not_exists()

    for key in key_list:
        target_url_list, prompt_words, role = ainews.load_config(key)
        ainews.main(target_url_list, prompt_words, role)
    print('done!')