| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- # -*- coding: utf-8 -*-
- '''
- 反斗限免
- 1, 获取反斗限免数据
- 2, 储存到mongodb
- 3, 发送到指定邮件
- '''
- import re
- import sys
- import os
- sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
- from utils.utils import *
- config_json = LoadConfig().load_config()
- DEFAULT_RE_PUSH_TIMES = config_json['DEFAULT_RE_PUSH_TIMES']
- class APPRCN(object):
- def __init__(self):
- self.logs_handle = LogsHandle()
- self.now_day = time.strftime('%Y-%m-%d', time.localtime())
- self.base_url = 'https://free.apprcn.com/page/{}/'
- self.headers = {
- 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
- }
- db = 'NEWS'
- collection = 'apprcn-info'
- self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
- self.temp_datas = []
- def main(self):
- self.logs_handle.logs_write('apprcn', '开始获取反斗限免数据', 'start', False)
- response_data = self.req()
- if response_data:
- self.save_to_mongo(response_data)
- self.send_to_gotify()
- self.logs_handle.logs_write('apprcn', '反斗限免数据获取完成', 'done', False)
- print('done')
- else:
- self.logs_handle.logs_write('apprcn', '无法获取apprcn数据', 'error', False)
- def req(self):
- urls = ['https://free.apprcn.com/']
- for i in range(2, 10):
- urls.append(self.base_url.format(i))
- response_data = []
- for i in urls:
- response = httpx.get(url=i, headers=self.headers)
- if response.status_code != 200:
- self.logs_handle.logs_write('apprcn', '请求失败, 状态码: %s' % response.status_code, 'error', False)
- exit(0)
- response.encoding = 'utf-8'
- content_list = re.findall('<div class="content">([\S\s]*?)<div class="sidebar">', response.text)
- # 清理content数据
- content = ''
- if content_list:
- for i in ['\t', '\n']:
- content = content_list[0].replace(i, '')
- context_list = re.findall('<p class="note">(.*?)</p>', content)
- title_list = re.findall('title="(.*?)"', content)
- post_date_list = re.findall('<time>(.*?)</time>', content)
- source_data_list = re.findall('<a class="cat" href="(.*?)"', content)
- for title, context, post_date, source_data in zip(title_list, context_list, post_date_list,
- source_data_list):
- response_data.append({
- "title": title,
- "context": context,
- "source_url": source_data,
- 'link': '',
- "article_type": '',
- "article_source": '',
- "img_url": '',
- 'keyword': '',
- "posted_date": post_date,
- "create_time": int(time.time()),
- "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
- "repush_times": DEFAULT_RE_PUSH_TIMES
- })
- if response_data:
- return response_data
- else:
- self.logs_handle.logs_write('apprcn', '获取数据失败', 'error', False)
- def save_to_mongo(self, data):
- print('开始储存 反斗限免 数据')
- for data_to_insert in data:
- try:
- # 检查数据库中是否存在匹配的文档
- filter_criteria = {'title': data_to_insert.get('title', '')} # 确保 title 字段有值
- count = self.mongo.collection.count_documents(filter_criteria)
- if count == 0:
- # 如果没有找到匹配的文档,插入新文档
- result = self.mongo.collection.insert_one(data_to_insert)
- self.temp_datas.append(data_to_insert)
- except TypeError as te:
- print('\n%s' % te)
- self.logs_handle.logs_write('反斗限免', '写入数据库报错: %s' % te, 'error', False)
- return 0
- print('储存数据完成', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
- def send_to_gotify(self):
- if self.temp_datas:
- text = ''
- for data in self.temp_datas:
- text += '标题: %s\n内容: %s\n时间: %s\n链接: %s\n\n' % (
- data['title'], data['context'], data['posted_date'], data['source_url'])
- title = '反斗限免 - ' + str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
- sub = '反斗限免'
- SendEmail(subject=sub, title=title, text=text).send()
- # GotifyNotifier(title=title, message=text, token_name='news').send_message()
- self.logs_handle.logs_write('apprcn', '发送消息完成', 'done', False)
- else:
- self.logs_handle.logs_write('apprcn', '没有新数据, 不发送邮件', 'done', False)
- if __name__ == "__main__":
- APPRCN().main()
|