main.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. # -*- coding: utf-8 -*-
  2. import re
  3. from playwright.async_api import async_playwright
  4. import asyncio
  5. from bs4 import BeautifulSoup
  6. from api_ollama import *
  7. from api_kimi import *
  8. from api_deepseek import *
  9. from send_to_email import *
  10. class AINEWS:
  11. async def get_htmls(self, urls):
  12. htmls = []
  13. async with async_playwright() as p:
  14. # 启动浏览器
  15. browser = await p.chromium.launch(headless=True)
  16. # 创建浏览器上下文
  17. context = await browser.new_context()
  18. async def get_html(url):
  19. try:
  20. print(f'正在打开: {url}')
  21. # 在上下文中打开新页面
  22. page = await context.new_page()
  23. # 导航到指定网址
  24. await page.goto(url)
  25. # 获取渲染后的 HTML
  26. html = await page.content()
  27. # 关闭页面
  28. await page.close()
  29. return html
  30. except Exception as e:
  31. print(f"Error fetching {url}: {e}")
  32. return ""
  33. # 使用 asyncio.gather 同时获取所有网站的 HTML
  34. tasks = [get_html(url) for url in urls]
  35. htmls_list = await asyncio.gather(*tasks)
  36. # 使用 BeautifulSoup 格式化每个 HTML 内容
  37. formatted_htmls = []
  38. for html in htmls_list:
  39. soup = BeautifulSoup(html, 'html.parser')
  40. formatted_html = soup.get_text()
  41. cleaned_text = re.sub(r'[\n\t\r]+', ' ', formatted_html)
  42. cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
  43. formatted_htmls.append(cleaned_text)
  44. # 将所有格式化后的 HTML 内容合并到一个字符串中
  45. text = "\n".join(formatted_htmls)
  46. # 关闭上下文和浏览器
  47. await context.close()
  48. await browser.close()
  49. return text
  50. def main(self):
  51. urls = ["https://www.smzdm.com/jingxuan/", "https://faxian.smzdm.com/"]
  52. text = asyncio.run(self.get_htmls(urls))
  53. # print(text)
  54. prompt_words = '''
  55. 给你几个个网页的源代码, 里面是未清洗的网页源代码
  56. 你可以无视网页源代码的部分,关注内容就行,重复的话就不用说了
  57. 帮我总结一下内容, 请用中文回答
  58. '''
  59. prompt_words += text
  60. # C = ChatBot('http://erhe.top:27381', prompt_words, 'qwen2.5:3b')
  61. # response_context = C.start_chat()
  62. # print(response_context)
  63. # K = KIMI()
  64. # response_context = K.call_kimi(prompt_words)
  65. # print(response_context)
  66. D = DeepSeek()
  67. response_context = D.call_deepseek(prompt_words)
  68. print(response_context)
  69. if __name__ == "__main__":
  70. ainews = AINEWS()
  71. ainews.main()