utils_timing_remove_data.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. # -*- coding: utf-8 -*-
  2. """
  3. 自动清除大于指定天数的数据
  4. """
  5. import threading
  6. import time
  7. import sys
  8. import os
  9. from datetime import datetime
  10. import pymongo
  11. import smtplib
  12. from email.mime.text import MIMEText
  13. from email.header import Header
  14. sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
  15. base_project = os.path.join(os.getcwd().split('auto')[0], 'auto')
  16. import json
  17. config_path = os.path.join(base_project, 'config.json')
  18. config_json = {}
  19. with open(config_path, 'r') as f:
  20. config_json = json.load(f)
  21. if not config_json:
  22. print('No config file found')
  23. exit(0)
  24. PROJECT_NAME = config_json.get('PROJECT_NAME')
  25. DB_USER = config_json.get('DB_USER')
  26. DB_PASSWORD = config_json.get('DB_PASSWORD')
  27. DB_IP = config_json.get('DB_IP')
  28. DB_PORT = config_json.get('DB_PORT')
  29. MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
  30. MAIL_HOST = config_json.get('MAIL_HOST')
  31. MAIL_USER = config_json.get('MAIL_USER')
  32. MAIL_PASS = config_json.get('MAIL_PASS')
  33. MAIL_SENDER = config_json.get('MAIL_SENDER')
  34. MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
  35. now_day = time.strftime('%Y-%m-%d', time.localtime())
  36. rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
  37. base_project = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME)
  38. class MongoHandle(object):
  39. def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
  40. self.client = pymongo.MongoClient(MONGO_LINK)
  41. self.db = db
  42. self.collection = collection
  43. if del_db and db:
  44. # 检查数据库是否存在
  45. if db in self.client.list_database_names():
  46. # 删除数据库
  47. self.client.drop_database(db)
  48. self.db = self.client[db]
  49. if del_collection and self.collection:
  50. # 检查集合是否存在
  51. if self.collection in self.db.list_collection_names():
  52. # 删除集合
  53. self.db.drop_collection(collection)
  54. self.collection = self.db[collection]
  55. if auto_remove:
  56. self.auto_remove_data(auto_remove)
  57. def write_data(self, data):
  58. self.collection.insert_one(data)
  59. def auto_remove_data(self, day):
  60. for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
  61. self.collection.delete_one({'_id': data['_id']})
  62. class SendEmail(object):
  63. def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None:
  64. # 第三方 SMTP 服务
  65. self.mail_host = MAIL_HOST # 设置服务器
  66. self.mail_user = MAIL_USER # 用户名
  67. self.mail_pass = MAIL_PASS # 口令
  68. self.sender = MAIL_SENDER
  69. self.receivers = [MAIL_RECEIVERS]
  70. self.subject = subject
  71. self.title = title
  72. self.text = text
  73. def send(self):
  74. message = MIMEText(self.text, 'plain', 'utf-8')
  75. message['From'] = Header(self.title, 'utf-8')
  76. message['To'] = Header("auto collection", 'utf-8')
  77. subject = self.subject
  78. message['Subject'] = Header(subject, 'utf-8')
  79. try:
  80. smtpObj = smtplib.SMTP()
  81. smtpObj.connect(self.mail_host, 25)
  82. smtpObj.login(self.mail_user, self.mail_pass)
  83. smtpObj.sendmail(self.sender, self.receivers, message.as_string())
  84. print("邮件发送成功")
  85. except smtplib.SMTPException:
  86. print("Error: 无法发送邮件")
  87. class LogsHandle(object):
  88. def __init__(self):
  89. self.now_day = time.strftime('%Y-%m-%d', time.localtime())
  90. db = 'logs'
  91. collection = 'logs_' + self.now_day
  92. self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
  93. def logs_write(self, title_source=None, content=None, state=None, send_now=False):
  94. data_to_insert = {
  95. "title": title_source,
  96. "context": content,
  97. "state": state,
  98. "create_time": int(time.time()),
  99. "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  100. }
  101. self.mongo.collection.insert_one(data_to_insert)
  102. if send_now:
  103. subject = 'auto collection'
  104. title = 'auto collection - running logs: {}'.format(self.now_day)
  105. text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format(data_to_insert.setdefault('title'),
  106. data_to_insert.setdefault('content'),
  107. data_to_insert.setdefault('state'),
  108. data_to_insert.setdefault('create_datetime'),
  109. )
  110. Send = SendEmail(subject=subject, title=title, text=text)
  111. Send.send()
  112. class AutoRemoveData(object):
  113. def __init__(self):
  114. self.databases = [
  115. 'news',
  116. 'apprcn',
  117. 'HelloGithub'
  118. ]
  119. self.day = 60
  120. self.client = pymongo.MongoClient(MONGO_LINK)
  121. self.logs = LogsHandle()
  122. self.all_delete_count = 0
  123. def auto_remove_data(self, db_name, day):
  124. print(f'准备删除时间大于: {self.day} 数据')
  125. if db_name not in self.client.list_database_names():
  126. return
  127. deleted_count = 0
  128. db = self.client[db_name]
  129. for collection_name in db.list_collection_names():
  130. collection = db[collection_name]
  131. for data in collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
  132. collection.delete_one({'_id': data['_id']})
  133. deleted_count += 1
  134. self.all_delete_count += deleted_count
  135. msg = f"删除 {db_name} 库 {self.day} 天以上数据 {deleted_count} 条"
  136. if deleted_count:
  137. print(msg)
  138. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', msg, 'delete', False)
  139. def main(self):
  140. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'开始自动删除 {self.day} 天以上数据', 'start', False)
  141. threads = []
  142. for db_name in self.databases:
  143. thread = threading.Thread(target=self.auto_remove_data, args=(db_name, self.day))
  144. threads.append(thread)
  145. thread.start()
  146. for thread in threads:
  147. thread.join()
  148. print(f'删除时间大于: {self.day} 数据, 已完成')
  149. print(f'本次运行共删除: {self.all_delete_count} 条数据')
  150. self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'自动删除 {self.day} 天数以上数据完成', 'done', False)
  151. if __name__ == "__main__":
  152. A = AutoRemoveData()
  153. A.main()