11 місяців тому · 2f65213de0
--- a/ai_news/Readme.md
+++ b/ai_news/Readme.md
@@ -0,0 +1 @@
 
				+pip install httpx beautifulsoup ollama matrix_client
			
--- a/ai_news/ai_news.py
+++ b/ai_news/ai_news.py
@@ -0,0 +1,265 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import os
			
 
				+import re
			
 
				+import json
			
 
				+import httpx
			
 
				+import asyncio
			
 
				+import time
			
 
				+from bs4 import BeautifulSoup
			
 
				+from ollama import Client as oClient
			
 
				+from playwright.async_api import async_playwright
			
 
				+from matrix_client.client import MatrixClient
			
 
				+from matrix_client.api import MatrixHttpApi
			
 
				+
			
 
				+key_list = ['web3']
			
 
				+text_batch = 0
			
 
				+
			
 
				+
			
 
				+class OllamaChat(object):
			
 
				+    def __init__(self, host='http://192.168.31.28:11434'):
			
 
				+        self.host = host
			
 
				+
			
 
				+    def call_ollama(self, role, text, prompt_words, model='llava:13b', temperature=0.4):
			
 
				+        # 使用 ollama 里面的模型
			
 
				+        message = text + '\n\n' + prompt_words
			
 
				+        print(f'use model: {model}')
			
 
				+        try:
			
 
				+            response_iter = oClient(host=self.host).chat(model=model,
			
 
				+                                                         messages=[
			
 
				+                                                             {'role': 'system', 'content': role},
			
 
				+                                                             {'role': 'user', 'content': message}
			
 
				+                                                         ],
			
 
				+                                                         options={"temperature": temperature},
			
 
				+                                                         stream=False)
			
 
				+            return response_iter['message']['content']
			
 
				+        except Exception as e:
			
 
				+            print(f"\n发生错误: {e}")
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+class MatrixBot:
			
 
				+    def __init__(self, user, password):
			
 
				+        self.base_url = "https://matrix.erhe.top"
			
 
				+        self.user = user
			
 
				+        self.password = password
			
 
				+        self.client = MatrixClient("https://matrix.erhe.top")
			
 
				+        self.token = self.login()
			
 
				+        self.to = "!CgWvWEnLbKYvhXLvil:chat.abeginner.cn"
			
 
				+
			
 
				+    def login(self):
			
 
				+        self.token = self.client.login(username=self.user, password=self.password)
			
 
				+        return self.token
			
 
				+
			
 
				+    def send_message(self, message):
			
 
				+        if self.token:
			
 
				+            try:
			
 
				+                api = MatrixHttpApi(self.base_url, token=self.token)
			
 
				+                api.send_message(self.to, message)
			
 
				+            except Exception as e:
			
 
				+                print(e)
			
 
				+                api = MatrixHttpApi(self.base_url, token=self.token)
			
 
				+                api.send_message(self.to, str(e))
			
 
				+
			
 
				+        else:
			
 
				+            print("Bot is not logged in. Please login first.")
			
 
				+
			
 
				+
			
 
				+class AINEWS:
			
 
				+    def create_config_if_not_exists(self):
			
 
				+        # 如果当前路径无 config 则新建 config.json, 并写入一个配置例子
			
 
				+        current_dir = os.path.dirname(os.path.abspath(__file__))  # 获取当前文件的目录路径
			
 
				+
			
 
				+        # 构建 config.json 文件的完整路径
			
 
				+        config_path = os.path.join(current_dir, 'config.json')
			
 
				+
			
 
				+        # 检查 config.json 文件是否存在
			
 
				+        if not os.path.exists(config_path):
			
 
				+            # 如果不存在，创建并写入默认的 JSON 数据
			
 
				+            default_config = {
			
 
				+                "example": {
			
 
				+                    "use_browser": 0,
			
 
				+                    "ai_host": 'http://127.0.0.1:11434',
			
 
				+                    "target_url_list": ['目标网站'],
			
 
				+                    "role": "AI的角色, 例如: 你是一个聊天机器人",
			
 
				+                    "prompt_words": "提示词: 帮我总结, 用中文回复"
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            # 写入 JSON 数据到 config.json 文件
			
 
				+            with open(config_path, 'w', encoding='utf-8') as f:
			
 
				+                json.dump(default_config, f, indent=4)
			
 
				+
			
 
				+            print(f"Created {config_path} with default configuration.")
			
 
				+
			
 
				+    def save_to_txt(self, url_to_text):
			
 
				+        # 将爬取的新闻 保存到 txt 文件中
			
 
				+        current_file_path = os.path.dirname(__file__)
			
 
				+        save_file_path = os.path.join(current_file_path, 'save_txt')
			
 
				+        if not os.path.exists(save_file_path):
			
 
				+            os.makedirs(save_file_path)
			
 
				+        file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
			
 
				+        with open(file, 'w', encoding='utf-8') as file:
			
 
				+            file.write(str(url_to_text))
			
 
				+
			
 
				+
			
 
				+    def load_config(self, key):
			
 
				+        # 读取配置文件 config.json
			
 
				+        config = {}
			
 
				+        if os.path.exists('config.json'):
			
 
				+            with open('config.json', 'r', encoding='utf-8') as f:
			
 
				+                config = json.load(f)
			
 
				+
			
 
				+        if not config:
			
 
				+            print('config.json is not exist!')
			
 
				+            exit(0)
			
 
				+
			
 
				+        k = config[key]
			
 
				+        return k['target_url_list'], k['prompt_words'], k['role'], k['use_browser'], k['ai_host']
			
 
				+
			
 
				+    async def get_htmls(self, urls):
			
 
				+        # 获取 HTML
			
 
				+        async with httpx.AsyncClient() as client:
			
 
				+            async def get_html(url):
			
 
				+                try:
			
 
				+                    print(f'正在打开: {url}')
			
 
				+                    # 发送 GET 请求获取页面内容
			
 
				+                    response = await client.get(url)
			
 
				+                    response.raise_for_status()  # 确保请求成功
			
 
				+
			
 
				+                    # 使用 BeautifulSoup 解析 HTML 内容
			
 
				+                    soup = BeautifulSoup(response.text, 'html.parser')
			
 
				+
			
 
				+                    # 提取纯文本内容
			
 
				+                    text = soup.get_text(separator=' ', strip=True)
			
 
				+
			
 
				+                    # 去除多余的空白字符
			
 
				+                    cleaned_text = re.sub(r'\s+', ' ', text).strip()
			
 
				+
			
 
				+                    return url, cleaned_text
			
 
				+                except Exception as e:
			
 
				+                    print(f"Error fetching {url}: {e}")
			
 
				+                    return url, ""
			
 
				+
			
 
				+            # 使用 asyncio.gather 同时获取所有网站的 HTML
			
 
				+            tasks = [get_html(url) for url in urls]
			
 
				+            results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+            # 将结果存储在字典中
			
 
				+            url_to_text = {url: text for url, text in results}
			
 
				+
			
 
				+            return url_to_text
			
 
				+
			
 
				+    async def get_htmls_with_browser(self, urls):
			
 
				+        # 使用 Playwright 获取 HTML 内容
			
 
				+        url_to_text = {}
			
 
				+
			
 
				+        async with async_playwright() as p:
			
 
				+            # 启动浏览器
			
 
				+            browser = await p.chromium.launch(headless=True)
			
 
				+            # 创建浏览器上下文
			
 
				+            context = await browser.new_context()
			
 
				+
			
 
				+            async def get_html(url):
			
 
				+                try:
			
 
				+                    print(f'正在打开: {url}')
			
 
				+                    # 在上下文中打开新页面
			
 
				+                    page = await context.new_page()
			
 
				+                    # 导航到指定网址
			
 
				+                    await page.goto(url)
			
 
				+
			
 
				+                    # 滚动页面以加载动态内容
			
 
				+                    await self.scroll_to_percentage(page)
			
 
				+
			
 
				+                    # 获取渲染后的 HTML
			
 
				+                    html = await page.content()
			
 
				+                    # 使用 BeautifulSoup 解析 HTML 内容
			
 
				+                    soup = BeautifulSoup(html, 'html.parser')
			
 
				+                    # 提取纯文本内容
			
 
				+                    text = soup.get_text(separator=' ', strip=True)
			
 
				+                    # 去除多余的空白字符
			
 
				+                    cleaned_text = re.sub(r'\s+', ' ', text).strip()
			
 
				+                    # 关闭页面
			
 
				+                    await page.close()
			
 
				+                    return url, cleaned_text
			
 
				+                except Exception as e:
			
 
				+                    print(f"Error fetching {url}: {e}")
			
 
				+                    return url, ""
			
 
				+
			
 
				+            # 使用 asyncio.gather 同时获取所有网站的 HTML
			
 
				+            tasks = [get_html(url) for url in urls]
			
 
				+            results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+            # 将结果存储在字典中
			
 
				+            url_to_text = {url: text for url, text in results}
			
 
				+
			
 
				+            # 关闭上下文和浏览器
			
 
				+            await context.close()
			
 
				+            await browser.close()
			
 
				+
			
 
				+        return url_to_text
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def scroll_to_percentage(page):
			
 
				+        # 获取页面标题并打印
			
 
				+        title = await page.title()
			
 
				+        print(f'正在滚动浏览器页面: {title}')
			
 
				+
			
 
				+        percentage_list = [i for i in range(5, 101, 2)]
			
 
				+        for percentage in percentage_list:
			
 
				+            # 计算页面的指定百分比高度
			
 
				+            height = await page.evaluate("() => document.body.scrollHeight")
			
 
				+            scroll_position = height * (percentage / 100)
			
 
				+            # 跳转到指定的百分比位置
			
 
				+            await page.evaluate(f"window.scrollTo({{top: {scroll_position}, behavior: 'smooth'}})")
			
 
				+            await asyncio.sleep(0.5)  # 使用异步 sleep
			
 
				+        await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
			
 
				+
			
 
				+    def process_data(self, result_text, prompt_words, role):
			
 
				+        # 整理获取的数据, 返回准备发送的数据
			
 
				+        process_send = []
			
 
				+        O = OllamaChat(ai_host)
			
 
				+        if text_batch:
			
 
				+            for k, v in result_text.items():
			
 
				+                response_context = O.call_ollama(role, v, prompt_words)
			
 
				+                if response_context:
			
 
				+                    message = f'{k}\n{response_context}\n'
			
 
				+                    process_send.append(message)
			
 
				+        else:
			
 
				+            t = ''
			
 
				+            for k, v in result_text.items():
			
 
				+                t += f'{k}\n{v}\n'
			
 
				+            response_context = O.call_ollama(role, t, prompt_words)
			
 
				+            if response_context:
			
 
				+                process_send.append(response_context)
			
 
				+        return process_send
			
 
				+
			
 
				+    def main(self, target_url_list, prompt_words, role, use_browser, ai_host):
			
 
				+        # 获取所有的网页html内容
			
 
				+        if use_browser:
			
 
				+            result_text = asyncio.run(self.get_htmls_with_browser(target_url_list))
			
 
				+        else:
			
 
				+            result_text = asyncio.run(self.get_htmls(target_url_list))
			
 
				+
			
 
				+        # 保存文本
			
 
				+        self.save_to_txt(result_text)
			
 
				+
			
 
				+        # # 如果只需要保存爬取数据, 不使用 AI, 注释下面
			
 
				+        # # 创建消息bot实例
			
 
				+        # bot = MatrixBot('message-bot', 'aaaAAA111!!!')
			
 
				+        #
			
 
				+        # # 处理发送 text 数据
			
 
				+        # process_send = self.process_data(result_text, prompt_words, role, ai_host)
			
 
				+        #
			
 
				+        # # 发送消息
			
 
				+        # for process_text in process_send:
			
 
				+        #     bot.send_message(process_text)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    ainews = AINEWS()
			
 
				+    ainews.create_config_if_not_exists()
			
 
				+
			
 
				+    for key in key_list:
			
 
				+        target_url_list, prompt_words, role, use_browser, ai_host = ainews.load_config(key)
			
 
				+        ainews.main(target_url_list, prompt_words, role, use_browser, ai_host)
			
 
				+    print('done!')
			
--- a/ai_news/config.json
+++ b/ai_news/config.json
@@ -0,0 +1,49 @@
 
				+{
			
 
				+  "example": {
			
 
				+    "use_browser": 0,
			
 
				+    "ai_host": "http://127.0.0.1:11434",
			
 
				+    "target_url_list": [],
			
 
				+    "prompt_words": "",
			
 
				+    "role": ""
			
 
				+  },
			
 
				+  "web3": {
			
 
				+    "use_browser": 0,
			
 
				+    "ai_host": "http://home.erhe.link:36001",
			
 
				+    "target_url_list": [
			
 
				+      "https://wublock123.com",
			
 
				+      "https://m.odaily.news/newsflash",
			
 
				+      "https://www.chaincatcher.com/news",
			
 
				+      "https://www.panewslab.com/",
			
 
				+      "https://www.theblockbeats.info/newsflash",
			
 
				+      "https://www.jinse.cn/lives",
			
 
				+      "https://www.techflowpost.com/article/index.html",
			
 
				+      "https://foresightnews.pro/news",
			
 
				+      "https://www.web3sj.com/news/"
			
 
				+    ],
			
 
				+    "role": "你是一个新闻报播员, 负责理解和报播新闻, 请用中文回答",
			
 
				+    "prompt_words": "这是最近的数字币相关的新闻, 你现在是一个新闻报播员, 帮我整理并总结以上新闻, 分析一下数字币接下来大概得走势, 再在此基础上, 给每一条新闻做一个情感分析, 最后输出一下总得分"
			
 
				+  },
			
 
				+  "A": {
			
 
				+    "use_browser": 1,
			
 
				+    "ai_host": "http://home.erhe.link:36001",
			
 
				+    "target_url_list": [
			
 
				+      "https://www.eastmoney.com/",
			
 
				+      "https://www.jrj.com.cn/",
			
 
				+      "https://www.10jqka.com.cn/",
			
 
				+      "https://www.stcn.com/",
			
 
				+      "https://www.cs.com.cn/",
			
 
				+      "https://www.cnstock.com/",
			
 
				+      "https://www.cls.cn/",
			
 
				+      "https://www.yicai.com/",
			
 
				+      "https://www.hexun.com/",
			
 
				+      "https://www.wind.com.cn/",
			
 
				+      "https://www.sse.com.cn/",
			
 
				+      "https://www.szse.cn/",
			
 
				+      "https://www.xueqiu.com/",
			
 
				+      "https://www.jiemian.com/",
			
 
				+      "https://www.caixin.com/"
			
 
				+    ],
			
 
				+    "prompt_words": "给你一个或多个网页的源代码, 里面是未清洗的网页源代码,你可以无视网页源代码的部分,关注内容就行,重复的话就不用说了,帮我总结一下这些网站的内容, 请用中文回答",
			
 
				+    "role": ""
			
 
				+  }
			
 
				+}
			
--- a/ai_news/requirements.txt
+++ b/ai_news/requirements.txt
@@ -0,0 +1,27 @@
 
				+annotated-types==0.7.0
			
 
				+anyio==4.8.0
			
 
				+beautifulsoup4==4.12.3
			
 
				+certifi==2024.12.14
			
 
				+charset-normalizer==3.4.1
			
 
				+distro==1.9.0
			
 
				+exceptiongroup==1.2.2
			
 
				+gotify==0.6.0
			
 
				+greenlet==3.1.1
			
 
				+h11==0.14.0
			
 
				+httpcore==1.0.7
			
 
				+httpx==0.27.2
			
 
				+idna==3.10
			
 
				+jiter==0.8.2
			
 
				+matrix-client==0.4.0
			
 
				+ollama==0.4.5
			
 
				+openai==1.59.6
			
 
				+playwright==1.49.1
			
 
				+pydantic==2.10.5
			
 
				+pydantic_core==2.27.2
			
 
				+pyee==12.0.0
			
 
				+requests==2.32.3
			
 
				+sniffio==1.3.1
			
 
				+soupsieve==2.6
			
 
				+tqdm==4.67.1
			
 
				+typing_extensions==4.12.2
			
 
				+urllib3==1.26.20
			
--- a/message/message_coin_detail.py
+++ b/message/message_coin_detail.py
@@ -64,7 +64,7 @@ def fetch_coin_data(target):
 
				             # text += f'Diluted Market Value: {dilute}\n'
			
 
				             # text += f'Logo: {logoUrl}\n'
			
 
				 
			
 
				-            return text + '\n'
			
 
				+            return text
			
 
				 
			
 
				 
			
 
				 def fetch_vix_data():
			
@@ -159,7 +159,7 @@ def main():
 
				         for retry in range(1, retry_count + 1):
			
 
				             result = fetch_coin_data(target)
			
 
				             if result:
			
 
				-                text += result + '\n\n'
			
 
				+                text += result + '\n'
			
 
				                 break
			
 
				             else:
			
 
				                 print(f"{target} Failed to fetch data. retry: {retry}")
			
@@ -181,17 +181,17 @@ def main():
 
				     for retry in range(1, retry_count + 1):
			
 
				         result = fetch_gas_data()
			
 
				         if result:
			
 
				-            text += result + '\n\n'
			
 
				+            text += '\n' + result + '\n\n'
			
 
				             break
			
 
				         else:
			
 
				             # print(f"Failed to fetch Gas data. retry: {retry}")
			
 
				             if retry == retry_count:
			
 
				                 text += f"Failed to fetch Gas data. retry count: {retry}"
			
 
				 
			
 
				-    # if text:
			
 
				-    #     GotifyNotifier('Real-time coin price\n', text, 'AgfOJESqDKftBTQ').send_message()
			
 
				-    # else:
			
 
				-    #     print('No Data')
			
 
				+    if text:
			
 
				+        GotifyNotifier('Real-time coin price\n', text, 'AgfOJESqDKftBTQ').send_message()
			
 
				+    else:
			
 
				+        print('No Data')
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/utils/utils_call_ollama.py
+++ b/utils/utils_call_ollama.py
@@ -0,0 +1,38 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import time
			
 
				+from ollama import Client as oClient
			
 
				+
			
 
				+
			
 
				+class OllamaChat(object):
			
 
				+    def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
			
 
				+        message = text + '\n\n' + prompt_words
			
 
				+        print(f'use model: {model}')
			
 
				+        try:
			
 
				+            response_iter = oClient(host=host).chat(model=model,
			
 
				+                                                    messages=[
			
 
				+                                                        {'role': 'system', 'content': role},
			
 
				+                                                        {'role': 'user', 'content': message}
			
 
				+                                                    ],
			
 
				+                                                    options={"temperature": temperature},
			
 
				+                                                    stream=False)
			
 
				+            return response_iter['message']['content']
			
 
				+        except Exception as e:
			
 
				+            print(f"\n发生错误: {e}")
			
 
				+
			
 
				+
			
 
				+# if __name__ == "__main__":
			
 
				+#     C = OllamaChat()
			
 
				+#     start_time = time.time()
			
 
				+#
			
 
				+#     role = '你是一个聊天机器人'
			
 
				+#
			
 
				+#     text = 'hello'
			
 
				+#
			
 
				+#     prompt_words = '你好呀'
			
 
				+#
			
 
				+#     response_context = C.call_ollama('http://192.168.31.28:11434', role, text, prompt_words, model='llava:13b')
			
 
				+#     print(response_context)
			
 
				+#
			
 
				+#     end_time = time.time()
			
 
				+#     run_time = end_time - start_time
			
 
				+#     print(f"程序运行时间：{run_time} 秒\n")
			
--- a/utils/utils_send_matrix.py
+++ b/utils/utils_send_matrix.py
@@ -11,7 +11,7 @@ class MatrixBot:
 
				         self.password = password
			
 
				         self.client = MatrixClient("https://matrix.erhe.top")
			
 
				         self.token = self.login()
			
 
				-        self.to = "!ddrrTpQmepfgivMxeW:chat.abeginner.cn"
			
 
				+        self.to = "!CgWvWEnLbKYvhXLvil:chat.abeginner.cn"
			
 
				 
			
 
				     def login(self):
			
 
				         self.token = self.client.login(username=self.user, password=self.password)
			
@@ -25,16 +25,16 @@ class MatrixBot:
 
				             except Exception as e:
			
 
				                 print(e)
			
 
				                 api = MatrixHttpApi(self.base_url, token=self.token)
			
 
				-                api.send_message(self.to, e)
			
 
				+                api.send_message(self.to, str(e))
			
 
				 
			
 
				         else:
			
 
				             print("Bot is not logged in. Please login first.")
			
 
				 
			
 
				-# if __name__ == '__main__':
			
 
				-# 测试调用
			
 
				-# user = "bot1"
			
 
				-# pw = "aaaAAA111!!!"
			
 
				-# message = "123987456"
			
 
				-#
			
 
				-# bot = MatrixBot(user, pw)
			
 
				-# bot.send_message(message)
			
 
				+if __name__ == '__main__':
			
 
				+    # 测试调用
			
 
				+    user = "message-bot"
			
 
				+    pw = "aaaAAA111!!!"
			
 
				+    message = "123987456"
			
 
				+
			
 
				+    bot = MatrixBot(user, pw)
			
 
				+    bot.send_message(message)
		`@@ -0,0 +1 @@`
		`+pip install httpx beautifulsoup ollama matrix_client`