Bladeren bron

大概完成了

jack 11 maanden geleden
bovenliggende
commit
3c0fd75881
5 gewijzigde bestanden met toevoegingen van 59 en 167 verwijderingen
  1. 0 34
      api_deepseek.py
  2. 0 30
      api_kimi.py
  3. 0 36
      api_ollama.py
  4. 2 2
      config.json
  5. 57 65
      main.py

+ 0 - 34
api_deepseek.py

@@ -1,34 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from openai import OpenAI
-
-'''
-deepseek-chat
-'''
-
-
-class DeepSeek(object):
-    def call_deepseek(self, text, prompt_words):
-        try:
-            message = text + '\n\n' + prompt_words
-            print('call deepseek')
-            client = OpenAI(
-                api_key="sk-20afb9967a124b63a6f67dcb69f17d74",
-                base_url="https://api.deepseek.com",
-            )
-
-            completion = client.chat.completions.create(
-                model="deepseek-chat",
-                messages=[
-                    {"role": "system", "content": "你是一个数据整理助手"},
-                    {"role": "user", "content": f"{message}"},
-                ],
-                stream=False,
-                temperature=0.3,
-            )
-
-            result = completion.choices[0].message.content
-
-            return result
-        except Exception as e:
-            print(e)

+ 0 - 30
api_kimi.py

@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from openai import OpenAI
-
-'''
-moonshot-v1-8k
-moonshot-v1-32k
-moonshot-v1-128k
-'''
-
-class KIMI(object):
-    def call_kimi(self, message):
-        try:
-            print('call kimi')
-            client = OpenAI(
-                api_key="sk-Fz9tRF8naXReN2H7zcB1AEtnpOmhonFPJgxlVvQHpql54Ymu",
-                base_url="https://api.moonshot.cn/v1",
-            )
-
-            completion = client.chat.completions.create(
-                model="moonshot-v1-128k",
-                messages=[{"role": "user", "content": f"{message}"}],
-                temperature=0.3,
-            )
-
-            result = completion.choices[0].message.content
-
-            return result
-        except Exception as e:
-            print(e)

+ 0 - 36
api_ollama.py

@@ -1,36 +0,0 @@
-# -*- coding: utf-8 -*-
-import time
-from ollama import Client as oClient
-
-class OllamaChat(object):
-    def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
-        messages = text + '\n\n' + prompt_words
-        self.client = oClient(host=host)
-        self.model = model
-        self.messages = messages
-        self.temperature = temperature
-
-        print(f'use model: {self.model}')
-        try:
-            response_iter = self.client.chat(model=self.model,
-                                             messages=[
-                                                 {'role': 'system', 'content': role},
-                                                 {'role': 'user', 'content': self.messages}
-                                             ],
-                                             options={"temperature": self.temperature},
-                                             stream=False)
-            return response_iter['message']['content']
-        except Exception as e:
-            print(f"\n发生错误: {e}")
-
-
-# if __name__ == "__main__":
-#     C = ChatBot()
-#     start_time = time.time()
-#
-#     response_context = C.call_ollama('http://127.0.0.1:11434', 'hello,你好呀', 'llava:13b')
-#     print(response_context)
-#
-#     end_time = time.time()
-#     run_time = end_time - start_time
-#     print(f"程序运行时间:{run_time} 秒\n")

+ 2 - 2
config.json

@@ -16,8 +16,8 @@
       "https://foresightnews.pro/news",
       "https://www.web3sj.com/news/"
     ],
-    "prompt_words": "给你一个或多个网页的源代码, 里面是未清洗的网页源代码,你可以无视网页源代码的部分,关注内容就行,重复的话就不用说了,中间不需要有空行 请用中文回答",
-    "role": "你是一个新闻报播员, 用简短的话, 描述一下这些网页的内容"
+    "role": "你是一个新闻报播员, 负责理解和报播新闻, 请用中文回答",
+    "prompt_words": "这是最近的数字币相关的新闻, 你现在是一个新闻报播员, 帮我整理并总结以上新闻, 分析一下数字币接下来大概得走势, 再在此基础上, 给每一条新闻做一个情感分析, 最后输出一下总得分"
   },
   "A": {
     "target_url_list": [

+ 57 - 65
main.py

@@ -2,15 +2,15 @@
 import os
 import re
 import json
-from playwright.async_api import async_playwright
+import httpx
 import asyncio
+import time
 from bs4 import BeautifulSoup
-from api_ollama import *
-from api_kimi import *
-from api_deepseek import *
-from send_matrix import *
+from ollama import Client as oClient
+from send_matrix import MatrixBot
 
 key = 'web3'
+text_batch = 0
 
 
 class AINEWS:
@@ -27,8 +27,8 @@ class AINEWS:
             default_config = {
                 "example": {
                     "target_url_list": [],
-                    "prompt_words": "",
-                    "role": ""
+                    "role": "",
+                    "prompt_words": ""
                 }
             }
 
@@ -61,47 +61,23 @@ class AINEWS:
         k = config[key]
         return k['target_url_list'], k['prompt_words'], k['role']
 
-    @staticmethod
-    async def scroll_to_percentage(page):
-        percentage_list = [i for i in range(5, 101, 2)]
-        for percentage in percentage_list:
-            # 计算页面的指定百分比高度
-            height = await page.evaluate("() => document.body.scrollHeight")
-            scroll_position = height * (percentage / 100)
-            # 跳转到指定的百分比位置
-            await page.evaluate(f"window.scrollTo({{top: {scroll_position}, behavior: 'smooth'}})")
-            await asyncio.sleep(0.5)  # 使用异步 sleep
-        await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
-
     async def get_htmls(self, urls):
-        async with async_playwright() as p:
-            # 启动浏览器
-            browser = await p.chromium.launch(headless=True)
-            # 创建浏览器上下文
-            context = await browser.new_context()
-
+        async with httpx.AsyncClient() as client:
             async def get_html(url):
                 try:
                     print(f'正在打开: {url}')
-                    # 在上下文中打开新页面
-                    page = await context.new_page()
+                    # 发送 GET 请求获取页面内容
+                    response = await client.get(url)
+                    response.raise_for_status()  # 确保请求成功
 
-                    # 导航到指定网址
-                    await page.goto(url, wait_until='networkidle')  # 等待网络空闲
+                    # 使用 BeautifulSoup 解析 HTML 内容
+                    soup = BeautifulSoup(response.text, 'html.parser')
 
-                    # 滚动页面, 获取更多信息
-                    await self.scroll_to_percentage(page)
+                    # 提取纯文本内容
+                    text = soup.get_text(separator=' ', strip=True)
 
-                    # 获取渲染后的 HTML
-                    html = await page.content()
-                    # 关闭页面
-                    await page.close()
-
-                    # 使用 BeautifulSoup 格式化 HTML 内容
-                    soup = BeautifulSoup(html, 'html.parser')
-                    formatted_html = soup.get_text()
-                    cleaned_text = re.sub(r'[\n\t\r]+', ' ', formatted_html)
-                    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
+                    # 去除多余的空白字符
+                    cleaned_text = re.sub(r'\s+', ' ', text).strip()
 
                     return url, cleaned_text
                 except Exception as e:
@@ -115,38 +91,54 @@ class AINEWS:
             # 将结果存储在字典中
             url_to_text = {url: text for url, text in results}
 
-            # 关闭上下文和浏览器
-            await context.close()
-            await browser.close()
-
             return url_to_text
 
+    def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
+        message = text + '\n\n' + prompt_words
+        print(f'use model: {model}')
+        try:
+            response_iter = oClient(host=host).chat(model=model,
+                                                    messages=[
+                                                        {'role': 'system', 'content': role},
+                                                        {'role': 'user', 'content': message}
+                                                    ],
+                                                    options={"temperature": temperature},
+                                                    stream=False)
+            return response_iter['message']['content']
+        except Exception as e:
+            print(f"\n发生错误: {e}")
+            return None
+
+    def process_data(self, result_text, prompt_words, role):
+        process_send = []
+        if text_batch:
+            for k, v in result_text.items():
+                response_context = self.call_ollama('http://192.168.31.28:11434', role, v, prompt_words)
+                if response_context:
+                    message = f'{k}\n{response_context}\n'
+                    process_send.append(message)
+        else:
+            t = ''
+            for k, v in result_text.items():
+                t += f'{k}\n{v}\n'
+            response_context = self.call_ollama('http://192.168.31.28:11434', role, t, prompt_words)
+            if response_context:
+                process_send.append(response_context)
+        return process_send
+
     def main(self, target_url_list, prompt_words, role):
-        url_to_text = asyncio.run(self.get_htmls(target_url_list))
+        result_text = asyncio.run(self.get_htmls(target_url_list))
+        self.save_to_txt(result_text)
 
         # 创建消息bot实例
         bot = MatrixBot('message-bot', 'aaaAAA111!!!')
 
-        self.save_to_txt(url_to_text)
-
-        O = OllamaChat()
-        for k, v in url_to_text.items():
-            response_context = O.call_ollama('http://127.0.0.1:11434', role, v, prompt_words)
-            message = f'{k}\n{response_context}\n'
-            # 发送消息
-            bot.send_message(message)
-
-        # K = KIMI()
-        # response_context = K.call_kimi(prompt_words)
-        # print(response_context)
-
-        # D = DeepSeek()
-        # for k, v in url_to_text.items():
-        #     response_context = D.call_deepseek(v, prompt_words)
+        # 准备发送 text
+        process_send = self.process_data(result_text, prompt_words, role)
 
-        #     # 保存每一个字符串准备发送信息
-        #     message = f'{k}\n{response_context}\n'
-        #     print(message)
+        # 发送消息
+        for process_text in process_send:
+            bot.send_message(process_text)
 
 
 if __name__ == "__main__":