| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # -*- coding: utf-8 -*-
- '''
- 网络爬虫,抓取链捕手新闻(data-v-***** 此参数会失效, 定期更换)
- '''
- import sys
- import os
- from playwright.sync_api import sync_playwright
- from bs4 import BeautifulSoup
- sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
- from utils.utils import *
- def chaincatcher_news():
- url = "https://www.chaincatcher.com/news"
- with sync_playwright() as p:
- browser = p.chromium.launch(headless=True)
- page = browser.new_page()
- try:
- page.goto(url)
- time.sleep(2)
- start_time = time.time()
- while time.time() - start_time < 10:
- page.mouse.wheel(0, 100)
- time.sleep(0.1)
- page_content = page.content()
- browser.close()
- soup = BeautifulSoup(page_content, 'html.parser')
- contents = [span.get_text(strip=True) for span in soup.find_all('span', class_='text', attrs={'data-v-6560eea9': True}) if "微信扫码" not in span]
- result = '\n'.join(contents)
- if result:
- result += f'\n推送时间: {datetime.now().strftime("%Y年%m月%d日 %H时%M分%S秒")}'
- title = 'ChainCatcher' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
- sub = 'ChainCatcher News'
- SendEmail(subject=sub, title=title, text=result).send()
- # GotifyNotifier(title='ChainCatcher News', message=result, token_name='news').send_message()
- else:
- print("No news found.")
- except Exception as e:
- raise e
- finally:
- browser.close()
- for retry in range(5):
- try:
- chaincatcher_news()
- break
- except Exception as e:
- sleep_time = 20
- print(f"Error occurred: {e}. Retrying... {retry + 1} \t sleep time: {sleep_time}")
- time.sleep(sleep_time)
|