base_timing_remove_data.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. # -*- coding: utf-8 -*-
  2. """
  3. 自动清除大于指定天数的数据
  4. """
  5. import threading
  6. import time
  7. import sys
  8. import os
  9. from datetime import datetime
  10. import pymongo
  11. sys.path.append(os.path.join(os.path.abspath(__file__).split('AutoInfo')[0] + 'AutoInfo'))
  12. base_project = os.path.join(os.getcwd().split('AutoInfo')[0], 'AutoInfo')
  13. from utils.utils import LoadConfig, GotifyNotifier
  14. config_json = LoadConfig().load_config()
  15. base_project = LoadConfig().get_base_path()
  16. PROJECT_NAME = config_json.get('PROJECT_NAME')
  17. DB_USER = config_json.get('DB_USER')
  18. DB_PASSWORD = config_json.get('DB_PASSWORD')
  19. DB_IP = config_json.get('DB_IP')
  20. DB_PORT = config_json.get('DB_PORT')
  21. MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
  22. MAIL_HOST = config_json.get('MAIL_HOST')
  23. MAIL_USER = config_json.get('MAIL_USER')
  24. MAIL_PASS = config_json.get('MAIL_PASS')
  25. MAIL_SENDER = config_json.get('MAIL_SENDER')
  26. MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
  27. now_day = time.strftime('%Y-%m-%d', time.localtime())
  28. class MongoHandle(object):
  29. def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
  30. self.client = pymongo.MongoClient(MONGO_LINK)
  31. self.db = db
  32. self.collection = collection
  33. if del_db and db:
  34. # 检查数据库是否存在
  35. if db in self.client.list_database_names():
  36. # 删除数据库
  37. self.client.drop_database(db)
  38. self.db = self.client[db]
  39. if del_collection and self.collection:
  40. # 检查集合是否存在
  41. if self.collection in self.db.list_collection_names():
  42. # 删除集合
  43. self.db.drop_collection(collection)
  44. self.collection = self.db[collection]
  45. if auto_remove:
  46. self.auto_remove_data(auto_remove)
  47. def write_data(self, data):
  48. self.collection.insert_one(data)
  49. def auto_remove_data(self, day):
  50. for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
  51. self.collection.delete_one({'_id': data['_id']})
  52. class SendEmail(object):
  53. def __init__(self, subject='AutoInfo subject', title='AutoInfo title', text='AutoInfo text') -> None:
  54. # 第三方 SMTP 服务
  55. self.mail_host = MAIL_HOST # 设置服务器
  56. self.mail_user = MAIL_USER # 用户名
  57. self.mail_pass = MAIL_PASS # 口令
  58. self.sender = MAIL_SENDER
  59. self.receivers = [MAIL_RECEIVERS]
  60. self.subject = subject
  61. self.title = title
  62. self.text = text
  63. def send(self):
  64. if self.title:
  65. G = GotifyNotifier(title=self.title, message=self.subject)
  66. G.send_message()
  67. else:
  68. print("No error logs found for today.")
  69. class LogsHandle(object):
  70. def __init__(self):
  71. self.now_day = time.strftime('%Y-%m-%d', time.localtime())
  72. db = 'logs'
  73. collection = 'logs_' + self.now_day
  74. self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
  75. def logs_write(self, title_source=None, content=None, state=None, send_now=False):
  76. data_to_insert = {
  77. "title": title_source,
  78. "context": content,
  79. "state": state,
  80. "create_time": int(time.time()),
  81. "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  82. }
  83. self.mongo.collection.insert_one(data_to_insert)
  84. if send_now:
  85. subject = 'auto collection'
  86. title = 'auto collection - running logs: {}'.format(self.now_day)
  87. text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format(
  88. data_to_insert.setdefault('title'),
  89. data_to_insert.setdefault('content'),
  90. data_to_insert.setdefault('state'),
  91. data_to_insert.setdefault('create_datetime'),
  92. )
  93. Send = SendEmail(subject=subject, title=title, text=text)
  94. Send.send()
  95. class AutoRemoveData(object):
  96. def __init__(self):
  97. self.databases = [
  98. 'spider_news',
  99. 'apprcn',
  100. 'HelloGithub'
  101. ]
  102. self.day = 60
  103. self.client = pymongo.MongoClient(MONGO_LINK)
  104. self.logs = LogsHandle()
  105. self.all_delete_count = 0
  106. def auto_remove_data(self, db_name, day):
  107. print(f'准备删除时间大于: {self.day} 数据')
  108. if db_name not in self.client.list_database_names():
  109. return
  110. deleted_count = 0
  111. db = self.client[db_name]
  112. for collection_name in db.list_collection_names():
  113. collection = db[collection_name]
  114. for data in collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
  115. collection.delete_one({'_id': data['_id']})
  116. deleted_count += 1
  117. self.all_delete_count += deleted_count
  118. msg = f"删除 {db_name} 库 {self.day} 天以上数据 {deleted_count} 条"
  119. if deleted_count:
  120. print(msg)
  121. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', msg, 'delete', False)
  122. def main(self):
  123. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'开始自动删除 {self.day} 天以上数据', 'start', False)
  124. threads = []
  125. for db_name in self.databases:
  126. thread = threading.Thread(target=self.auto_remove_data, args=(db_name, self.day))
  127. threads.append(thread)
  128. thread.start()
  129. for thread in threads:
  130. thread.join()
  131. print(f'删除时间大于: {self.day} 数据, 已完成')
  132. print(f'本次运行共删除: {self.all_delete_count} 条数据')
  133. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'自动删除 {self.day} 天数以上数据完成', 'done', False)
  134. if __name__ == "__main__":
  135. A = AutoRemoveData()
  136. A.main()