news_get_hello_github.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # -*- coding: utf-8 -*-
  2. '''
  3. Hello Github
  4. '''
  5. import os
  6. import sys
  7. import time
  8. from datetime import datetime
  9. import httpx
  10. sys.path.append(os.path.join(os.path.abspath(__file__).split('auto')[0] + 'auto'))
  11. from utils.utils_mongo_handle import MongoHandle
  12. from utils.utils_logs_handle import LogsHandle
  13. from utils.utils_send_email import SendEmail
  14. from base.base_load_config import load_config
  15. config_json = load_config()
  16. DEFAULT_RE_PUSH_TIMES = config_json['DEFAULT_RE_PUSH_TIMES']
  17. class HelloGithub(object):
  18. def __init__(self):
  19. self.logs_handle = LogsHandle()
  20. self.now_day = time.strftime('%Y-%m-%d', time.localtime())
  21. self.base_url = 'https://api.hellogithub.com/v1/?sort_by=last&tid=&page={}'
  22. self.headers = {
  23. 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
  24. }
  25. self.db = 'NEWS'
  26. self.collection = 'HelloGithub_info'
  27. self.source_url = 'https://hellogithub.com/repository/'
  28. self.send_email_datas = []
  29. self.send_email_now = 0
  30. def main(self):
  31. self.logs_handle.logs_write('HelloGithub', '开始获取 HelloGithub 数据', 'start', False)
  32. targets = ['featured']
  33. response_datas = []
  34. for target in targets:
  35. response_data = self.req(target)
  36. response_datas += response_data
  37. if response_datas:
  38. self.save_to_mongo(response_datas)
  39. else:
  40. self.logs_handle.logs_write('HelloGithub', '获取 HelloGithub 数据失败', 'error', False)
  41. self.logs_handle.logs_write('HelloGithub', 'HelloGithub 数据获取完成', 'done', False)
  42. print('获取 HelloGithub 数据 done')
  43. if self.send_email_now:
  44. if self.send_email_datas:
  45. self.send_to_email()
  46. else:
  47. print('没有新数据, 不发送邮件')
  48. def req(self, target):
  49. print('开始获取 HelloGithub {} 数据'.format(target))
  50. response_data = []
  51. for i in range(1, 5):
  52. url = 'https://api.hellogithub.com/v1/?sort_by={}&tid=&page={}'.format(target, i)
  53. try:
  54. response = httpx.get(url=url, headers=self.headers)
  55. except Exception as e:
  56. print("请求出错{}, \nurl: {}".format(e, url))
  57. continue
  58. if response.status_code != 200:
  59. print(
  60. '获取 HelloGithub {} 数据, 状态码: {}, 程序退出\n检查目标地址: https://api.hellogithub.com/v1/?sort_by={}&tid=&page={}'.format(
  61. target, response.status_code, target, i))
  62. self.logs_handle.logs_write('HelloGithub', '请求失败, 状态码: %s' % response.status_code, 'error',
  63. False)
  64. exit(0)
  65. json_data = response.json()
  66. for d in json_data.setdefault('data'):
  67. response_data.append({
  68. "title": d.setdefault('title', ''),
  69. "context": '---'.join([d.setdefault('summary', ''), d.setdefault('description', '')]),
  70. "source_url": 'https://hellogithub.com',
  71. 'link': self.source_url + d.setdefault('item_id'),
  72. "article_type": '',
  73. "article_source": target,
  74. "img_url": '',
  75. 'keyword': '',
  76. "posted_date": d.setdefault('updated_at'),
  77. "create_time": int(time.time()),
  78. "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
  79. "repush_times": DEFAULT_RE_PUSH_TIMES
  80. })
  81. if response_data:
  82. return response_data
  83. else:
  84. self.logs_handle.logs_write('HelloGithub', '获取数据失败', 'error', False)
  85. def save_to_mongo(self, data):
  86. print(f'开始储存 HelloGithub 数据')
  87. for data_to_insert in data:
  88. mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False,
  89. auto_remove=0)
  90. try:
  91. # 检查数据库中是否存在匹配的文档
  92. filter_criteria = {'title': data_to_insert.get('title', '')} # 确保 title 字段有值
  93. count = mongo.collection.count_documents(filter_criteria)
  94. if count == 0:
  95. # 如果没有找到匹配的文档,插入新文档
  96. result = mongo.collection.insert_one(data_to_insert)
  97. # 准备发送邮件的数据
  98. self.send_email_datas.append(data_to_insert)
  99. except TypeError as te:
  100. print('\n%s' % te)
  101. self.logs_handle.logs_write('HelloGithub', '写入数据库报错: %s' % te, 'error', False)
  102. return 0
  103. print(f'处理 HelloGithub 数据完成', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  104. def send_to_email(self):
  105. title = 'HelloGithub - info'
  106. subject = 'HelloGithub - info'
  107. text = '********************************************************\n'
  108. for data in self.send_email_datas:
  109. text += '标题: {}\n'.format(data['title'])
  110. text += '正文: {}\n'.format(data['context'])
  111. text += '文章地址: {}\n'.format(data['source_url'])
  112. text += '文章时间: {}\n'.format(data['posted_date'])
  113. text += '获取时间: {}\n'.format(data['create_datetime'])
  114. text += '********************************************************\n\n'
  115. send_email = SendEmail(subject=subject, title=title, text=text)
  116. send_email.send()
  117. self.logs_handle.logs_write('HelloGithub', f'{title}-发送邮件完成', 'done', False)
  118. if __name__ == "__main__":
  119. HelloGithub().main()