# -*- coding: UTF-8 -*-
'''
关键词搜索rss消息
'''
import random
import re
import time
import httpx
from datetime import datetime
from tools_mongo_handle import MongoHandle
from tools_send_email import SendEmail
import tools_load_config
config_json = tools_load_config.load_config()
base_project = tools_load_config.get_base_path()
PROJECT_NAME = config_json.get('PROJECT_NAME')
class KeySearch(object):
def __init__(self):
db = 'KeyWordSearch'
collection = 'KeyWordSearch'
self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
def get_data(self, source, key):
if not key:
return None
key_url = {
'什么值得买': f'https://rsshub.app/smzdm/keyword/{key}',
'新浪微博': f'https://rsshub.app/weibo/keyword/{key}',
'36kr': f'https://rsshub.app/36kr/search/articles/{key}',
'虎嗅网': f'https://rsshub.app/huxiu/search/{key}',
}
result_data = {key: []}
url = key_url.get(source)
try:
resp = httpx.get(url)
except Exception as e:
print(f'请求失败: {e}\n目标地址: {url}')
return None
if resp.status_code != 200:
# 发邮件通知
print(f'请求失败, 状态码: {resp.status_code}, 源: {source}, 关键词: {key}')
# LogsHandle().logs_write(title_source=PROJECT_NAME, content=f'请求失败, 状态码: {resp.status_code}', state='error', send_now=True)
time.sleep(random.uniform(3, 5))
return None
resp.encoding = 'utf-8'
# 解析数据
pattern = r"
\s*\s*(.*?)\s*\s*(.*?)"
re_result = re.findall(pattern, resp.text)
for result in re_result:
if not result[0] or not result[1] or not result[2]:
continue
result_data[key].append([result[0].replace(' ', ""),
datetime.strptime(result[1], '%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%d %H:%M:%S'),
result[2]])
return result_data
def save_to_mongo(self, result_data):
new_data_to_email = {}
for source, value in result_data.items():
for key, datas in value.items():
for data in datas:
document = self.mongo.collection.find_one({'title': data[0], 'postdate': data[1], 'link': data[2], })
if document is None:
data_to_insert = {
'source': source,
'keyword': key,
'title': data[0],
'postdate': data[1],
'link': data[2],
'create_datetime': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# 如果不存在,添加到列表并插入新文档
if data_to_insert['source'] not in new_data_to_email:
new_data_to_email[data_to_insert['source']] = [data_to_insert]
else:
new_data_to_email[data_to_insert['source']].append(data_to_insert)
# 插入新文档到MongoDB集合中
self.mongo.collection.insert_one(data_to_insert)
else:
# 如果存在,跳过
continue
return new_data_to_email
def main(self):
# 输入的关键字
# input_keys = {
# '什么值得买': [''],
# '新浪微博': [''],
# '36kr': [''],
# '虎嗅网': [''],
# }
input_keys = {
'什么值得买': ['京东', '券', '鼠标', '键盘', '硬盘', '咖啡', '显示器'],
'新浪微博': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
'36kr': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
'虎嗅网': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
}
result_data = {}
for key, value in input_keys.items():
for k in value:
if not k:
continue
print(f'正在获取 {key} - {k} 数据')
datas = self.get_data(key, k)
time.sleep(random.uniform(4, 6))
if not datas:
print(f'{k}: nodata')
continue
if key in result_data:
result_data[key].update(datas)
else:
result_data.update({key: datas})
new_data_to_email = self.save_to_mongo(result_data)
# 如果有新消息, 即时发送邮件
if new_data_to_email:
for source, datas in new_data_to_email.items():
content = f'KeyWord Search Message\n\nSource site: {source}\n\n{"*" * 50}\n\nposted at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n\n{"*" * 50}\n\n'
for data in datas:
content += f'source: {data["source"]}, keyword: {data["keyword"]}\n'
content += f'title: {data["title"]}, postdate: {data["postdate"]}\n'
content += f'link: {data["link"]}\n'
content += f'\n{"*" * 50}\n'
SendEmail(
subject=f"{source} - KeyWord Search Message",
title=f'New Message ({datetime.now().strftime("%Y-%m-%d %H:%M:%S")})',
text=content
).send()
if __name__ == '__main__':
print('keyword reminder start')
search = KeySearch()
search.main()
print('keyword reminder done')