chaincatcher.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # -*- coding: utf-8 -*-
  2. '''
  3. 网络爬虫,抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换)
  4. '''
  5. import sys
  6. import os
  7. from playwright.sync_api import sync_playwright
  8. from bs4 import BeautifulSoup
  9. sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
  10. from utils.utils import *
  11. def chaincatcher_news():
  12. url = "https://www.chaincatcher.com/news"
  13. with sync_playwright() as p:
  14. browser = p.chromium.launch(headless=True)
  15. page = browser.new_page()
  16. try:
  17. page.goto(url)
  18. time.sleep(2)
  19. start_time = time.time()
  20. while time.time() - start_time < 10:
  21. page.mouse.wheel(0, 100)
  22. time.sleep(0.1)
  23. page_content = page.content()
  24. browser.close()
  25. soup = BeautifulSoup(page_content, 'html.parser')
  26. contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span]
  27. result = '\n'.join(contents)
  28. if result:
  29. result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d日 %H时%M分%S秒")}'
  30. title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  31. sub = 'ChainCatcher News'
  32. SendEmail(subject=sub, title=title, text=result).send()
  33. # GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message()
  34. else:
  35. print("No news found.")
  36. except Exception as e:
  37. raise e
  38. finally:
  39. browser.close()
  40. for retry in range(5):
  41. try:
  42. chaincatcher_news()
  43. break
  44. except Exception as e:
  45. sleep_time = 20
  46. print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}")
  47. time.sleep(sleep_time)