| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- # -*- coding: utf-8 -*-
- """
- 自动清除大于指定天数的数据
- """
- import threading
- import time
- import sys
- import os
- from datetime import datetime
- import pymongo
- import smtplib
- from email.mime.text import MIMEText
- from email.header import Header
- sys.path.append(os.path.join(os.getcwd().split('auto_news_scheduler')[0], 'auto_news_scheduler'))
- base_project = os.path.join(os.getcwd().split('auto_news_scheduler')[0], 'auto_news_scheduler')
- import json
- config_path = os.path.join(base_project, 'config.json')
- config_json = {}
- with open(config_path, 'r') as f:
- config_json = json.load(f)
- if not config_json:
- print('No config file found')
- exit(0)
- PROJECT_NAME = config_json.get('PROJECT_NAME')
- DB_USER = config_json.get('DB_USER')
- DB_PASSWORD = config_json.get('DB_PASSWORD')
- DB_IP = config_json.get('DB_IP')
- DB_PORT = config_json.get('DB_PORT')
- MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
- MAIL_HOST = config_json.get('MAIL_HOST')
- MAIL_USER = config_json.get('MAIL_USER')
- MAIL_PASS = config_json.get('MAIL_PASS')
- MAIL_SENDER = config_json.get('MAIL_SENDER')
- MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
- now_day = time.strftime('%Y-%m-%d', time.localtime())
- rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
- base_project = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME)
- class MongoHandle(object):
- def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
- self.client = pymongo.MongoClient(MONGO_LINK)
- self.db = db
- self.collection = collection
- if del_db and db:
- # 检查数据库是否存在
- if db in self.client.list_database_names():
- # 删除数据库
- self.client.drop_database(db)
- self.db = self.client[db]
- if del_collection and self.collection:
- # 检查集合是否存在
- if self.collection in self.db.list_collection_names():
- # 删除集合
- self.db.drop_collection(collection)
- self.collection = self.db[collection]
- if auto_remove:
- self.auto_remove_data(auto_remove)
- def write_data(self, data):
- self.collection.insert_one(data)
- def auto_remove_data(self, day):
- for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
- self.collection.delete_one({'_id': data['_id']})
- class SendEmail(object):
- def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None:
- # 第三方 SMTP 服务
- self.mail_host = MAIL_HOST # 设置服务器
- self.mail_user = MAIL_USER # 用户名
- self.mail_pass = MAIL_PASS # 口令
- self.sender = MAIL_SENDER
- self.receivers = [MAIL_RECEIVERS]
- self.subject = subject
- self.title = title
- self.text = text
- def send(self):
- message = MIMEText(self.text, 'plain', 'utf-8')
- message['From'] = Header(self.title, 'utf-8')
- message['To'] = Header("auto collection", 'utf-8')
- subject = self.subject
- message['Subject'] = Header(subject, 'utf-8')
- try:
- smtpObj = smtplib.SMTP()
- smtpObj.connect(self.mail_host, 25)
- smtpObj.login(self.mail_user, self.mail_pass)
- smtpObj.sendmail(self.sender, self.receivers, message.as_string())
- print("邮件发送成功")
- except smtplib.SMTPException:
- print("Error: 无法发送邮件")
- class LogsHandle(object):
- def __init__(self):
- self.now_day = time.strftime('%Y-%m-%d', time.localtime())
- db = 'logs'
- collection = 'logs_' + self.now_day
- self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
- def logs_write(self, title_source=None, content=None, state=None, send_now=False):
- data_to_insert = {
- "title": title_source,
- "context": content,
- "state": state,
- "create_time": int(time.time()),
- "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- }
- self.mongo.collection.insert_one(data_to_insert)
- if send_now:
- subject = 'auto collection'
- title = 'auto collection - running logs: {}'.format(self.now_day)
- text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format(data_to_insert.setdefault('title'),
- data_to_insert.setdefault('content'),
- data_to_insert.setdefault('state'),
- data_to_insert.setdefault('create_datetime'),
- )
- Send = SendEmail(subject=subject, title=title, text=text)
- Send.send()
- class AutoRemoveData(object):
- def __init__(self):
- self.databases = [
- 'news',
- 'apprcn',
- 'HelloGithub'
- ]
- self.day = 60
- self.client = pymongo.MongoClient(MONGO_LINK)
- self.logs = LogsHandle()
- self.all_delete_count = 0
- def auto_remove_data(self, db_name, day):
- print(f'准备删除时间大于: {self.day} 数据')
- if db_name not in self.client.list_database_names():
- return
- deleted_count = 0
- db = self.client[db_name]
- for collection_name in db.list_collection_names():
- collection = db[collection_name]
- for data in collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
- collection.delete_one({'_id': data['_id']})
- deleted_count += 1
- self.all_delete_count += deleted_count
- msg = f"删除 {db_name} 库 {self.day} 天以上数据 {deleted_count} 条"
- if deleted_count:
- print(msg)
- self.logs.logs_write(f'自动删除 {self.day} 天以上数据', msg, 'delete', False)
- def main(self):
- self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'开始自动删除 {self.day} 天以上数据', 'start', False)
- threads = []
- for db_name in self.databases:
- thread = threading.Thread(target=self.auto_remove_data, args=(db_name, self.day))
- threads.append(thread)
- thread.start()
- for thread in threads:
- thread.join()
- print(f'删除时间大于: {self.day} 数据, 已完成')
- print(f'本次运行共删除: {self.all_delete_count} 条数据')
- self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'自动删除 {self.day} 天数以上数据完成', 'done', False)
- # if __name__ == "__main__":
- # A = AutoRemoveData()
- # A.main()
|