message_check_base.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. # -*- coding: utf-8 -*-
  2. """
  3. 消息模块基础, 用于打开浏览器等相关操作
  4. """
  5. import random
  6. from playwright.sync_api import sync_playwright
  7. import sys
  8. import os
  9. import time
  10. sys.path.append(os.path.join(os.path.abspath(__file__).split('auto')[0] + 'auto'))
  11. from utils.utils_logs_handle import LogsHandle
  12. class CryptoCrawler:
  13. def __init__(self, url_list, selectors, check_difference=False, headless=True):
  14. self.url_list = url_list
  15. self.selectors = selectors
  16. self.check_difference = check_difference # 用于检测数据是否发生变化 (开关)
  17. self.data_difference = False # 用于检测数据是否发生变化 (结果) (默认 否)
  18. self.logs_handle = LogsHandle() # 记录日志
  19. self.db = 'CHECK'
  20. self.collection = 'check'
  21. self.headless = headless
  22. def main(self):
  23. with sync_playwright() as playwright:
  24. browser = playwright.webkit.launch(headless=self.headless)
  25. context = browser.new_context(viewport={'width': 1920, 'height': 1080})
  26. page = context.new_page()
  27. all_data = []
  28. for url_info in self.url_list:
  29. for key, url in url_info.items():
  30. result_list = []
  31. try:
  32. page.goto(url)
  33. page.wait_for_load_state('load')
  34. time.sleep(5) # 确保页面完全加载
  35. for selector in self.selectors:
  36. element = page.query_selector(selector)
  37. if element:
  38. res = element.text_content().strip()
  39. result_list.append({key: res})
  40. except Exception as e:
  41. err_str = f"Error fetching {url}: {e}"
  42. self.logs_handle.logs_write(self.collection, err_str, 'error', False)
  43. continue
  44. if result_list:
  45. all_data.append(result_list)
  46. time.sleep(random.randint(1, 3))
  47. browser.close()
  48. if all_data:
  49. return all_data
  50. else:
  51. return None