# -*- coding: UTF-8 -*- ''' 关键词搜索rss消息 ''' import random import re import time import httpx from datetime import datetime from tools_mongo_handle import MongoHandle from tools_send_email import SendEmail import tools_load_config config_json = tools_load_config.load_config() base_project = tools_load_config.get_base_path() PROJECT_NAME = config_json.get('PROJECT_NAME') class KeySearch(object): def __init__(self): db = 'KeyWordSearch' collection = 'KeyWordSearch' self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0) def get_data(self, source, key): if not key: return None key_url = { '什么值得买': f'https://rsshub.app/smzdm/keyword/{key}', '新浪微博': f'https://rsshub.app/weibo/keyword/{key}', '36kr': f'https://rsshub.app/36kr/search/articles/{key}', '虎嗅网': f'https://rsshub.app/huxiu/search/{key}', } result_data = {key: []} url = key_url.get(source) try: resp = httpx.get(url) except Exception as e: print(f'请求失败: {e}\n目标地址: {url}') return None if resp.status_code != 200: # 发邮件通知 print(f'请求失败, 状态码: {resp.status_code}, 源: {source}, 关键词: {key}') # LogsHandle().logs_write(title_source=PROJECT_NAME, content=f'请求失败, 状态码: {resp.status_code}', state='error', send_now=True) time.sleep(random.uniform(3, 5)) return None resp.encoding = 'utf-8' # 解析数据 pattern = r"<!\[CDATA\[(.*?)\]\]>\s*\s*(.*?)\s*\s*(.*?)" re_result = re.findall(pattern, resp.text) for result in re_result: if not result[0] or not result[1] or not result[2]: continue result_data[key].append([result[0].replace(' ', ""), datetime.strptime(result[1], '%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%d %H:%M:%S'), result[2]]) return result_data def save_to_mongo(self, result_data): new_data_to_email = {} for source, value in result_data.items(): for key, datas in value.items(): for data in datas: document = self.mongo.collection.find_one({'title': data[0], 'postdate': data[1], 'link': data[2], }) if document is None: data_to_insert = { 'source': source, 'keyword': key, 'title': data[0], 'postdate': data[1], 'link': data[2], 'create_datetime': datetime.now().strftime("%Y-%m-%d %H:%M:%S") } # 如果不存在,添加到列表并插入新文档 if data_to_insert['source'] not in new_data_to_email: new_data_to_email[data_to_insert['source']] = [data_to_insert] else: new_data_to_email[data_to_insert['source']].append(data_to_insert) # 插入新文档到MongoDB集合中 self.mongo.collection.insert_one(data_to_insert) else: # 如果存在,跳过 continue return new_data_to_email def main(self): # 输入的关键字 # input_keys = { # '什么值得买': [''], # '新浪微博': [''], # '36kr': [''], # '虎嗅网': [''], # } input_keys = { '什么值得买': ['京东', '券', '鼠标', '键盘', '硬盘', '咖啡', '显示器'], '新浪微博': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'], '36kr': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'], '虎嗅网': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'], } result_data = {} for key, value in input_keys.items(): for k in value: if not k: continue print(f'正在获取 {key} - {k} 数据') datas = self.get_data(key, k) time.sleep(random.uniform(4, 6)) if not datas: print(f'{k}: nodata') continue if key in result_data: result_data[key].update(datas) else: result_data.update({key: datas}) new_data_to_email = self.save_to_mongo(result_data) # 如果有新消息, 即时发送邮件 if new_data_to_email: for source, datas in new_data_to_email.items(): content = f'KeyWord Search Message\n\nSource site: {source}\n\n{"*" * 50}\n\nposted at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n\n{"*" * 50}\n\n' for data in datas: content += f'source: {data["source"]}, keyword: {data["keyword"]}\n' content += f'title: {data["title"]}, postdate: {data["postdate"]}\n' content += f'link: {data["link"]}\n' content += f'\n{"*" * 50}\n' SendEmail( subject=f"{source} - KeyWord Search Message", title=f'New Message ({datetime.now().strftime("%Y-%m-%d %H:%M:%S")})', text=content ).send() if __name__ == '__main__': print('keyword reminder start') search = KeySearch() search.main() print('keyword reminder done')