|
@@ -8,20 +8,21 @@ from bs4 import BeautifulSoup
|
|
|
from api_ollama import *
|
|
from api_ollama import *
|
|
|
from api_kimi import *
|
|
from api_kimi import *
|
|
|
from api_deepseek import *
|
|
from api_deepseek import *
|
|
|
-from send_to_email import *
|
|
|
|
|
|
|
+from send_matrix import *
|
|
|
|
|
|
|
|
key = 'web3'
|
|
key = 'web3'
|
|
|
|
|
|
|
|
|
|
|
|
|
class AINEWS:
|
|
class AINEWS:
|
|
|
- def save_to_txt(self, text):
|
|
|
|
|
|
|
+ def save_to_txt(self, url_to_text):
|
|
|
current_file_path = os.path.dirname(__file__)
|
|
current_file_path = os.path.dirname(__file__)
|
|
|
save_file_path = os.path.join(current_file_path, 'save_txt')
|
|
save_file_path = os.path.join(current_file_path, 'save_txt')
|
|
|
if not os.path.exists(save_file_path):
|
|
if not os.path.exists(save_file_path):
|
|
|
os.makedirs(save_file_path)
|
|
os.makedirs(save_file_path)
|
|
|
file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
|
|
file = os.path.join(save_file_path, str(int(time.time())) + '.txt')
|
|
|
with open(file, 'w', encoding='utf-8') as file:
|
|
with open(file, 'w', encoding='utf-8') as file:
|
|
|
- file.write(text)
|
|
|
|
|
|
|
+ file.write(str(url_to_text))
|
|
|
|
|
+ print(f'txt文件已保存, 路径为: {file}')
|
|
|
|
|
|
|
|
def load_config(self, key):
|
|
def load_config(self, key):
|
|
|
config = {}
|
|
config = {}
|
|
@@ -49,7 +50,6 @@ class AINEWS:
|
|
|
await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
|
|
await page.evaluate("window.scrollTo({top: 0, behavior: 'smooth'})")
|
|
|
|
|
|
|
|
async def get_htmls(self, urls):
|
|
async def get_htmls(self, urls):
|
|
|
- htmls = []
|
|
|
|
|
async with async_playwright() as p:
|
|
async with async_playwright() as p:
|
|
|
# 启动浏览器
|
|
# 启动浏览器
|
|
|
browser = await p.chromium.launch(headless=True)
|
|
browser = await p.chromium.launch(headless=True)
|
|
@@ -62,12 +62,6 @@ class AINEWS:
|
|
|
# 在上下文中打开新页面
|
|
# 在上下文中打开新页面
|
|
|
page = await context.new_page()
|
|
page = await context.new_page()
|
|
|
|
|
|
|
|
- # 监听导航事件
|
|
|
|
|
- def handle_navigation(frame):
|
|
|
|
|
- print(f"导航发生: {frame.url}")
|
|
|
|
|
-
|
|
|
|
|
- page.on('framenavigated', handle_navigation)
|
|
|
|
|
-
|
|
|
|
|
# 导航到指定网址
|
|
# 导航到指定网址
|
|
|
await page.goto(url, wait_until='networkidle') # 等待网络空闲
|
|
await page.goto(url, wait_until='networkidle') # 等待网络空闲
|
|
|
|
|
|
|
@@ -78,52 +72,63 @@ class AINEWS:
|
|
|
html = await page.content()
|
|
html = await page.content()
|
|
|
# 关闭页面
|
|
# 关闭页面
|
|
|
await page.close()
|
|
await page.close()
|
|
|
- return html
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 使用 BeautifulSoup 格式化 HTML 内容
|
|
|
|
|
+ soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
|
+ formatted_html = soup.get_text()
|
|
|
|
|
+ cleaned_text = re.sub(r'[\n\t\r]+', ' ', formatted_html)
|
|
|
|
|
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
|
|
|
|
+
|
|
|
|
|
+ return url, cleaned_text
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
print(f"Error fetching {url}: {e}")
|
|
print(f"Error fetching {url}: {e}")
|
|
|
- return ""
|
|
|
|
|
|
|
+ return url, ""
|
|
|
|
|
|
|
|
# 使用 asyncio.gather 同时获取所有网站的 HTML
|
|
# 使用 asyncio.gather 同时获取所有网站的 HTML
|
|
|
tasks = [get_html(url) for url in urls]
|
|
tasks = [get_html(url) for url in urls]
|
|
|
- htmls_list = await asyncio.gather(*tasks)
|
|
|
|
|
|
|
+ results = await asyncio.gather(*tasks)
|
|
|
|
|
|
|
|
- # 使用 BeautifulSoup 格式化每个 HTML 内容
|
|
|
|
|
- formatted_htmls = []
|
|
|
|
|
- for html in htmls_list:
|
|
|
|
|
- soup = BeautifulSoup(html, 'html.parser')
|
|
|
|
|
- formatted_html = soup.get_text()
|
|
|
|
|
- cleaned_text = re.sub(r'[\n\t\r]+', ' ', formatted_html)
|
|
|
|
|
- cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
|
|
|
|
- formatted_htmls.append(cleaned_text)
|
|
|
|
|
-
|
|
|
|
|
- # 将所有格式化后的 HTML 内容合并到一个字符串中
|
|
|
|
|
- text = "\n".join(formatted_htmls)
|
|
|
|
|
|
|
+ # 将结果存储在字典中
|
|
|
|
|
+ url_to_text = {url: text for url, text in results}
|
|
|
|
|
|
|
|
# 关闭上下文和浏览器
|
|
# 关闭上下文和浏览器
|
|
|
await context.close()
|
|
await context.close()
|
|
|
await browser.close()
|
|
await browser.close()
|
|
|
|
|
|
|
|
- return text
|
|
|
|
|
|
|
+ return url_to_text
|
|
|
|
|
|
|
|
def main(self, target_url_list, prompt_words):
|
|
def main(self, target_url_list, prompt_words):
|
|
|
- text = asyncio.run(self.get_htmls(target_url_list))
|
|
|
|
|
|
|
+ url_to_text = asyncio.run(self.get_htmls(target_url_list))
|
|
|
|
|
+
|
|
|
|
|
+ self.save_to_txt(url_to_text)
|
|
|
|
|
|
|
|
- self.save_to_txt(text)
|
|
|
|
|
|
|
+ prepare_to_send = ''
|
|
|
|
|
|
|
|
- prompt_words += text
|
|
|
|
|
|
|
+ prompt_words += '帮我总结一下内容, 并且分析一下今天数字币为什么会跌'
|
|
|
|
|
|
|
|
- C = ChatBot('http://127.0.0.1:11434', prompt_words, 'llava:13b')
|
|
|
|
|
- response_context = C.start_chat()
|
|
|
|
|
- print(response_context)
|
|
|
|
|
|
|
+ C = ChatBot('http://127.0.0.1:11434', k, prompt_words, 'llava:13b')
|
|
|
|
|
+ for k, v in url_to_text.items():
|
|
|
|
|
+ response_context = C.call_deepseek(v, prompt_words)
|
|
|
|
|
+ message = f'{k}\n{response_context}\n'
|
|
|
|
|
+ prepare_to_send += message
|
|
|
|
|
+ print(message)
|
|
|
|
|
|
|
|
# K = KIMI()
|
|
# K = KIMI()
|
|
|
# response_context = K.call_kimi(prompt_words)
|
|
# response_context = K.call_kimi(prompt_words)
|
|
|
# print(response_context)
|
|
# print(response_context)
|
|
|
|
|
|
|
|
# D = DeepSeek()
|
|
# D = DeepSeek()
|
|
|
- # response_context = D.call_deepseek(prompt_words)
|
|
|
|
|
- # print(response_context)
|
|
|
|
|
|
|
+ # for k, v in url_to_text.items():
|
|
|
|
|
+ # response_context = D.call_deepseek(v, prompt_words)
|
|
|
|
|
+
|
|
|
|
|
+ # # 保存每一个字符串准备发送信息
|
|
|
|
|
+ # message = f'{k}\n{response_context}\n'
|
|
|
|
|
+ # prepare_to_send += message
|
|
|
|
|
+ # print(message)
|
|
|
|
|
|
|
|
|
|
+ # 发送消息
|
|
|
|
|
+ bot = MatrixBot('message-bot', 'aaaAAA111!!!')
|
|
|
|
|
+ bot.send_message(message)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
ainews = AINEWS()
|
|
ainews = AINEWS()
|