main.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import re
  4. import json
  5. import httpx
  6. import asyncio
  7. import time
  8. from bs4 import BeautifulSoup
  9. from ollama import Client as oClient
  10. from send_matrix import MatrixBot
  11. key_list = 'web3'
  12. text_batch = 0
  13. class AINEWS:
  14. def create_config_if_not_exists(self):
  15. # 获取当前文件的目录路径
  16. current_dir = os.path.dirname(os.path.abspath(__file__))
  17. # 构建 config.json 文件的完整路径
  18. config_path = os.path.join(current_dir, 'config.json')
  19. # 检查 config.json 文件是否存在
  20. if not os.path.exists(config_path):
  21. # 如果不存在,创建并写入默认的 JSON 数据
  22. default_config = {
  23. "example": {
  24. "target_url_list": [],
  25. "role": "",
  26. "prompt_words": ""
  27. }
  28. }
  29. # 写入 JSON 数据到 config.json 文件
  30. with open(config_path, 'w', encoding='utf-8') as f:
  31. json.dump(default_config, f, indent=4)
  32. print(f"Created {config_path} with default configuration.")
  33. def save_to_txt(self, url_to_text):
  34. current_file_path = os.path.dirname(__file__)
  35. save_file_path = os.path.join(current_file_path, 'save_txt')
  36. if not os.path.exists(save_file_path):
  37. os.makedirs(save_file_path)
  38. file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
  39. with open(file, 'w', encoding='utf-8') as file:
  40. file.write(str(url_to_text))
  41. print(f'txt文件已保存')
  42. def load_config(self, key):
  43. config = {}
  44. if os.path.exists('config.json'):
  45. with open('config.json', 'r', encoding='utf-8') as f:
  46. config = json.load(f)
  47. if not config:
  48. print('config.json is not exist!')
  49. exit(0)
  50. k = config[key]
  51. return k['target_url_list'], k['prompt_words'], k['role']
  52. async def get_htmls(self, urls):
  53. async with httpx.AsyncClient() as client:
  54. async def get_html(url):
  55. try:
  56. print(f'正在打开: {url}')
  57. # 发送 GET 请求获取页面内容
  58. response = await client.get(url)
  59. response.raise_for_status() # 确保请求成功
  60. # 使用 BeautifulSoup 解析 HTML 内容
  61. soup = BeautifulSoup(response.text, 'html.parser')
  62. # 提取纯文本内容
  63. text = soup.get_text(separator=' ', strip=True)
  64. # 去除多余的空白字符
  65. cleaned_text = re.sub(r'\s+', ' ', text).strip()
  66. return url, cleaned_text
  67. except Exception as e:
  68. print(f"Error fetching {url}: {e}")
  69. return url, ""
  70. # 使用 asyncio.gather 同时获取所有网站的 HTML
  71. tasks = [get_html(url) for url in urls]
  72. results = await asyncio.gather(*tasks)
  73. # 将结果存储在字典中
  74. url_to_text = {url: text for url, text in results}
  75. return url_to_text
  76. def call_ollama(self, host, role, text, prompt_words, model='llava:13b', temperature=0.4):
  77. message = text + '\n\n' + prompt_words
  78. print(f'use model: {model}')
  79. try:
  80. response_iter = oClient(host=host).chat(model=model,
  81. messages=[
  82. {'role': 'system', 'content': role},
  83. {'role': 'user', 'content': message}
  84. ],
  85. options={"temperature": temperature},
  86. stream=False)
  87. return response_iter['message']['content']
  88. except Exception as e:
  89. print(f"\n发生错误: {e}")
  90. return None
  91. def process_data(self, result_text, prompt_words, role):
  92. process_send = []
  93. if text_batch:
  94. for k, v in result_text.items():
  95. response_context = self.call_ollama('http://192.168.31.28:11434', role, v, prompt_words)
  96. if response_context:
  97. message = f'{k}\n{response_context}\n'
  98. process_send.append(message)
  99. else:
  100. t = ''
  101. for k, v in result_text.items():
  102. t += f'{k}\n{v}\n'
  103. response_context = self.call_ollama('http://192.168.31.28:11434', role, t, prompt_words)
  104. if response_context:
  105. process_send.append(response_context)
  106. return process_send
  107. def main(self, target_url_list, prompt_words, role):
  108. result_text = asyncio.run(self.get_htmls(target_url_list))
  109. self.save_to_txt(result_text)
  110. # 创建消息bot实例
  111. bot = MatrixBot('message-bot', 'aaaAAA111!!!')
  112. # 准备发送 text
  113. process_send = self.process_data(result_text, prompt_words, role)
  114. # 发送消息
  115. for process_text in process_send:
  116. bot.send_message(process_text)
  117. if __name__ == "__main__":
  118. ainews = AINEWS()
  119. ainews.create_config_if_not_exists()
  120. for key in key_list:
  121. target_url_list, prompt_words, role = ainews.load_config(key)
  122. ainews.main(target_url_list, prompt_words, role)
  123. print('done!')