1 yıl önce · 3854e070b2
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 
				-# auto_news_scheduler
			
 
				+# auto_news
			
 
				 
			
 
				-通过python定时任务执行的爬虫

			
 
				-main.py主入口控制
			
 
				+定时爬取各种新闻

			
 
				+存到mongodb

			
 
				+通过青龙面板执行定时任务
			
--- a/config.json
+++ b/config.json
@@ -0,0 +1,15 @@
 
				+{
			
 
				+  "PROJECT_NAME": "auto",
			
 
				+  "MAIL_HOST": "smtp.163.com",
			
 
				+  "MAIL_USER": "pushmessagebot@163.com",
			
 
				+  "MAIL_PASS": "WSMSRKBKXIHIQWTU",
			
 
				+  "MAIL_SENDER": "pushmessagebot@163.com",
			
 
				+  "MAIL_RECEIVERS": "pushmessagebot@163.com",
			
 
				+  "DB_USER": "root",
			
 
				+  "DB_PASSWORD": "aaaAAA111!!!",
			
 
				+  "DB_IP": "192.168.100.146",
			
 
				+  "DB_PORT": "38001",
			
 
				+  "USE_PROXY": true,
			
 
				+  "PROXY_HOST": "192.168.100.146",
			
 
				+  "PROXY_PORT": 7890
			
 
				+}
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,45 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+全局定时
			
 
				+例子
			
 
				+scheduler.add_job(midnight_task, 'cron', hour=0, minute=0) # 每天定时执行
			
 
				+scheduler.add_job(test_error, 'interval', seconds=2) # 循环间隔多少秒执行
			
 
				+scheduler.add_job(weekly_task, 'cron', day_of_week='mon,wed,sat', hour=22, minute=30) # 添加定时任务，设置为每周一、三、六晚上10点30分执行
			
 
				+'''
			
 
				+from apscheduler.schedulers.background import BackgroundScheduler
			
 
				+import time
			
 
				+
			
 
				+
			
 
				+def hello_world():
			
 
				+    print("Hello World")
			
 
				+
			
 
				+
			
 
				+def hello_kitty():
			
 
				+    print("Hello Kitty")
			
 
				+
			
 
				+
			
 
				+def test_error():
			
 
				+    try:
			
 
				+        a = 1 / 0
			
 
				+    except ZeroDivisionError:
			
 
				+        print("Division by zero")
			
 
				+
			
 
				+
			
 
				+# 创建 BackgroundScheduler 实例
			
 
				+scheduler = BackgroundScheduler()
			
 
				+
			
 
				+# 添加定时任务
			
 
				+scheduler.add_job(hello_world, 'interval', seconds=10)
			
 
				+scheduler.add_job(hello_kitty, 'interval', seconds=15)
			
 
				+scheduler.add_job(test_error, 'interval', seconds=2)
			
 
				+
			
 
				+# 启动调度器
			
 
				+scheduler.start()
			
 
				+
			
 
				+# 为了防止程序退出，这里使用一个无限循环
			
 
				+try:
			
 
				+    while True:
			
 
				+        time.sleep(1)
			
 
				+except (KeyboardInterrupt, SystemExit):
			
 
				+    # 关闭调度器
			
 
				+    scheduler.shutdown()
			
--- a/news_get_36kr_info.py
+++ b/news_get_36kr_info.py
@@ -0,0 +1,161 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+获取36kr讯息数据, 通过rsshub获取数据, 可能需要使用代理
			
 
				+https://www.36kr.com/
			
 
				+'''
			
 
				+import datetime
			
 
				+import json
			
 
				+import random
			
 
				+import re
			
 
				+import xmltodict
			
 
				+import time
			
 
				+import httpx
			
 
				+
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class Get36krInfo:
			
 
				+    def __init__(self):
			
 
				+        self.base_url = 'https://rsshub.app/36kr/'
			
 
				+        self.local_key = [
			
 
				+            'news',
			
 
				+            'newsflashes',
			
 
				+            'recommend',
			
 
				+            'life',
			
 
				+            'estate',
			
 
				+            'workplace'
			
 
				+        ]
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.db = 'NEWS'
			
 
				+        self.collection = '36kr_info'
			
 
				+        self.send_email_datas = {}
			
 
				+
			
 
				+    def req(self):
			
 
				+        result_data = []
			
 
				+        local_key = self.local_key[:]
			
 
				+        random.shuffle(local_key)
			
 
				+        for key in local_key:
			
 
				+            url = self.base_url + key
			
 
				+            try:
			
 
				+                response = httpx.get(url)
			
 
				+            except TimeoutError as timeout_error:
			
 
				+                print(timeout_error)
			
 
				+                continue
			
 
				+
			
 
				+            if response.status_code != 200:
			
 
				+                self.logs_handle.logs_write('36kr_info', '请求失败, 状态码: %s' % response.status_code, 'error', False)
			
 
				+                time.sleep(20)
			
 
				+                continue
			
 
				+
			
 
				+            response.encoding = 'utf-8'
			
 
				+
			
 
				+            html = response.text
			
 
				+
			
 
				+            xml_dict = xmltodict.parse(html)
			
 
				+
			
 
				+            source = ''
			
 
				+            items = []
			
 
				+            try:
			
 
				+                source = xml_dict['rss']['channel']['title']
			
 
				+            except Exception as e:
			
 
				+                print('获取 source 失败')
			
 
				+
			
 
				+            try:
			
 
				+                items = xml_dict['rss']['channel']['item']
			
 
				+            except Exception as e:
			
 
				+                print('获取 items 失败')
			
 
				+
			
 
				+            for item in items:
			
 
				+                # 清洗
			
 
				+                if item.get('description'):
			
 
				+                    item['description'] = re.sub(r'<[^>]+>', '', item.get('description'))
			
 
				+
			
 
				+                result_data.append({
			
 
				+                    "title": item.get('title') or '',
			
 
				+                    "context": item.get('description') or '',
			
 
				+                    "source_url": url,
			
 
				+                    'link': item.get('link') or '',
			
 
				+                    "article_type": source,
			
 
				+                    "article_source": key,
			
 
				+                    "img_url": '',
			
 
				+                    'keyword': '',
			
 
				+                    "posted_date": item.get('pubDate') or '',
			
 
				+                    "create_time": int(time.time()),
			
 
				+                    "create_datetime": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                })
			
 
				+
			
 
				+            if len(local_key) != 1:
			
 
				+                sleep_time = random.uniform(10, 15)
			
 
				+                time.sleep(sleep_time)
			
 
				+
			
 
				+        return result_data
			
 
				+
			
 
				+    def save_to_mongo(self, result_data):
			
 
				+        new_datas = []
			
 
				+
			
 
				+        print(f'正在处理 {self.collection}数据')
			
 
				+
			
 
				+        mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+        for data_to_insert in result_data:
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = mongo.collection.count_documents(filter_criteria)
			
 
				+
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+                    # 准备发送邮件的数据
			
 
				+                    new_datas.append(data_to_insert)
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write(f'{self.collection}', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print(f'处理 {self.collection} 数据完成')
			
 
				+
			
 
				+        return new_datas
			
 
				+
			
 
				+    def send_to_email(self, new_datas):
			
 
				+        title = self.collection
			
 
				+        subject = self.collection
			
 
				+        text = '********************************************************\n'
			
 
				+        for data in new_datas:
			
 
				+            text += '标题: {}\n'.format(data['title'])
			
 
				+            text += '正文: {}\n'.format(data['context'])
			
 
				+            text += '文章地址: {}\n'.format(data['link'])
			
 
				+            text += '文章时间: {}\n'.format(data['posted_date'])
			
 
				+            text += '获取时间: {}\n'.format(data['create_datetime'])
			
 
				+            text += '********************************************************\n\n'
			
 
				+
			
 
				+        send_email = SendEmail(subject=subject, title=title, text=text)
			
 
				+        send_email.send()
			
 
				+        self.logs_handle.logs_write(f'self.collection', f'{title}-发送邮件完成', 'done', False)
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('36kr - info', '任务开始', 'start', False)
			
 
				+
			
 
				+        result_data = self.req()
			
 
				+
			
 
				+        if result_data:
			
 
				+            new_datas = self.save_to_mongo(result_data)
			
 
				+
			
 
				+            if new_datas:
			
 
				+                self.send_to_email(new_datas)
			
 
				+            else:
			
 
				+                print('无新数据')
			
 
				+
			
 
				+            self.logs_handle.logs_write('36kr - info', '36kr - info 数据获取完成', 'done', False)
			
 
				+            print('done')
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('36kr - info', '无法获取 36kr - info 数据', 'error', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    g = Get36krInfo()
			
 
				+    g.main()
			
--- a/news_get_36kr_key.py
+++ b/news_get_36kr_key.py
@@ -0,0 +1,157 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+获取36kr讯息数据, 通过rsshub获取数据, 可能需要使用代理
			
 
				+https://www.36kr.com/
			
 
				+'''
			
 
				+import datetime
			
 
				+import json
			
 
				+import random
			
 
				+import re
			
 
				+import xmltodict
			
 
				+import time
			
 
				+import httpx
			
 
				+
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class Get36krKey:
			
 
				+    def __init__(self):
			
 
				+        self.base_url = 'https://rsshub.app/36kr/search/articles/'
			
 
				+        self.local_key = [
			
 
				+            '数字币',
			
 
				+            # '测试网'
			
 
				+        ]
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.db = 'NEWS'
			
 
				+        self.collection = '36kr_key'
			
 
				+        self.send_email_datas = {}
			
 
				+
			
 
				+    def req(self):
			
 
				+        result_data = []
			
 
				+        local_key = self.local_key[:]
			
 
				+        random.shuffle(local_key)
			
 
				+        for key in local_key:
			
 
				+            url = self.base_url + key
			
 
				+            try:
			
 
				+                response = httpx.get(url)
			
 
				+            except TimeoutError as timeout_error:
			
 
				+                print(timeout_error)
			
 
				+                continue
			
 
				+
			
 
				+            if response.status_code != 200:
			
 
				+                self.logs_handle.logs_write('36kr_key', '请求失败, 状态码: %s' % response.status_code, 'error', False)
			
 
				+                time.sleep(20)
			
 
				+                continue
			
 
				+
			
 
				+            response.encoding = 'utf-8'
			
 
				+
			
 
				+            html = response.text
			
 
				+
			
 
				+            xml_dict = xmltodict.parse(html)
			
 
				+
			
 
				+            source = ''
			
 
				+            items = []
			
 
				+            try:
			
 
				+                source = xml_dict['rss']['channel']['title']
			
 
				+            except Exception as e:
			
 
				+                print('获取 source 失败')
			
 
				+
			
 
				+            try:
			
 
				+                items = xml_dict['rss']['channel']['item']
			
 
				+            except Exception as e:
			
 
				+                print('获取 items 失败')
			
 
				+
			
 
				+            for item in items:
			
 
				+                # 清洗
			
 
				+                if item.get('description'):
			
 
				+                    item['description'] = re.sub(r'<[^>]+>', '', item.get('description'))
			
 
				+
			
 
				+                result_data.append({
			
 
				+                    "title": item.get('title') or '',
			
 
				+                    "context": item.get('description') or '',
			
 
				+                    "source_url": url,
			
 
				+                    'link': item.get('link') or '',
			
 
				+                    "article_type": source.replace(' ', ''),
			
 
				+                    "article_source": '36kr-Search',
			
 
				+                    "img_url": '',
			
 
				+                    'keyword': key,
			
 
				+                    "posted_date": item.get('pubDate') or '',
			
 
				+                    "create_time": int(time.time()),
			
 
				+                    "create_datetime": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                })
			
 
				+
			
 
				+            if len(local_key) != 1:
			
 
				+                sleep_time = random.uniform(10, 15)
			
 
				+                time.sleep(sleep_time)
			
 
				+
			
 
				+        return result_data
			
 
				+
			
 
				+    def save_to_mongo(self, result_data):
			
 
				+        new_datas = []
			
 
				+
			
 
				+        print(f'正在处理 {self.collection}数据')
			
 
				+
			
 
				+        mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+        for data_to_insert in result_data:
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = mongo.collection.count_documents(filter_criteria)
			
 
				+
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+                    # 准备发送邮件的数据
			
 
				+                    new_datas.append(data_to_insert)
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write(f'{self.collection}', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print(f'处理 {self.collection} 数据完成')
			
 
				+
			
 
				+        return new_datas
			
 
				+
			
 
				+    def send_to_email(self, new_datas):
			
 
				+        title = self.collection
			
 
				+        subject = self.collection
			
 
				+        text = '********************************************************\n'
			
 
				+        for data in new_datas:
			
 
				+            text += '标题: {}\n'.format(data['title'])
			
 
				+            text += '正文: {}\n'.format(data['context'])
			
 
				+            text += '文章地址: {}\n'.format(data['link'])
			
 
				+            text += '文章时间: {}\n'.format(data['posted_date'])
			
 
				+            text += '获取时间: {}\n'.format(data['create_datetime'])
			
 
				+            text += '********************************************************\n\n'
			
 
				+
			
 
				+        send_email = SendEmail(subject=subject, title=title, text=text)
			
 
				+        send_email.send()
			
 
				+        self.logs_handle.logs_write(f'self.collection', f'{title}-发送邮件完成', 'done', False)
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('36kr - key', '任务开始', 'start', False)
			
 
				+
			
 
				+        result_data = self.req()
			
 
				+
			
 
				+        if result_data:
			
 
				+            new_datas = self.save_to_mongo(result_data)
			
 
				+
			
 
				+            if new_datas:
			
 
				+                self.send_to_email(new_datas)
			
 
				+            else:
			
 
				+                print('无新数据')
			
 
				+
			
 
				+            self.logs_handle.logs_write('36kr - key', '36kr - key 数据获取完成', 'done', False)
			
 
				+            print('done')
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('36kr - key', '无法获取 36kr - key 数据', 'error', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    g = Get36krKey()
			
 
				+    g.main()
			
--- a/news_get_apprcn.py
+++ b/news_get_apprcn.py
@@ -0,0 +1,124 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+反斗限免
			
 
				+1, 获取反斗限免数据
			
 
				+2, 储存到mongodb
			
 
				+3, 发送到指定邮件
			
 
				+'''
			
 
				+import re
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import httpx
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class APPRCN(object):
			
 
				+    def __init__(self):
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.base_url = 'https://free.apprcn.com/page/{}/'
			
 
				+        self.headers = {
			
 
				+            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
			
 
				+        }
			
 
				+        db = 'NEWS'
			
 
				+        collection = 'apprcn_info'
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+        self.send_email_datas = []
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('apprcn', '开始获取反斗限免数据', 'start', False)
			
 
				+
			
 
				+        response_data = self.req()
			
 
				+
			
 
				+        if response_data:
			
 
				+            self.save_to_mongo(response_data)
			
 
				+
			
 
				+            self.send_to_email()
			
 
				+
			
 
				+            self.logs_handle.logs_write('apprcn', '反斗限免数据获取完成', 'done', False)
			
 
				+            print('done')
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('apprcn', '无法获取apprcn数据', 'error', False)
			
 
				+
			
 
				+    def req(self):
			
 
				+        urls = ['https://free.apprcn.com/']
			
 
				+        for i in range(2, 10):
			
 
				+            urls.append(self.base_url.format(i))
			
 
				+
			
 
				+        response_data = []
			
 
				+        for i in urls:
			
 
				+            response = httpx.get(url=i, headers=self.headers)
			
 
				+            if response.status_code != 200:
			
 
				+                self.logs_handle.logs_write('apprcn', '请求失败, 状态码: %s' % response.status_code, 'error', False)
			
 
				+                exit(0)
			
 
				+
			
 
				+            response.encoding = 'utf-8'
			
 
				+
			
 
				+            content_list = re.findall('<div class="content">([\S\s]*?)<div class="sidebar">', response.text)
			
 
				+
			
 
				+            # 清理content数据
			
 
				+            content = ''
			
 
				+            if content_list:
			
 
				+                for i in ['\t', '\n']:
			
 
				+                    content = content_list[0].replace(i, '')
			
 
				+
			
 
				+            context_list = re.findall('<p class="note">(.*?)</p>', content)
			
 
				+            title_list = re.findall('title="(.*?)"', content)
			
 
				+            post_date_list = re.findall('<time>(.*?)</time>', content)
			
 
				+            source_data_list = re.findall('<a class="cat" href="(.*?)"', content)
			
 
				+
			
 
				+            for title, context, post_date, source_data in zip(title_list, context_list, post_date_list, source_data_list):
			
 
				+                response_data.append({
			
 
				+                    "title": title,
			
 
				+                    "context": context,
			
 
				+                    "source_url": source_data,
			
 
				+                    'link': '',
			
 
				+                    "article_type": '',
			
 
				+                    "article_source": '',
			
 
				+                    "img_url": '',
			
 
				+                    'keyword': '',
			
 
				+                    "posted_date": post_date,
			
 
				+                    "create_time": int(time.time()),
			
 
				+                    "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                })
			
 
				+
			
 
				+        if response_data:
			
 
				+            return response_data
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('apprcn', '获取数据失败', 'error', False)
			
 
				+
			
 
				+    def save_to_mongo(self, data):
			
 
				+        print('开始储存 反斗限免 数据')
			
 
				+        for data_to_insert in data:
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = self.mongo.collection.count_documents(filter_criteria)
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = self.mongo.collection.insert_one(data_to_insert)
			
 
				+                    self.send_email_datas.append(data_to_insert)
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write('反斗限免', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print('储存数据完成')
			
 
				+
			
 
				+    def send_to_email(self):
			
 
				+        if self.send_email_datas:
			
 
				+            text = ''
			
 
				+            for data in self.send_email_datas:
			
 
				+                text += '标题: %s\n内容: %s\n时间: %s\n链接: %s\n\n' % (data['title'], data['context'], data['posted_date'], data['source_url'])
			
 
				+            send_email = SendEmail(subject='反斗限免', title='反斗限免', text=text)
			
 
				+            send_email.send()
			
 
				+            self.logs_handle.logs_write('apprcn', '发送邮件完成', 'done', False)
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('apprcn', '没有新数据, 不发送邮件', 'done', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    A = APPRCN()
			
 
				+    A.main()
			
--- a/news_get_chiphell.py
+++ b/news_get_chiphell.py
@@ -0,0 +1,233 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+chiphell
			
 
				+'''
			
 
				+import os
			
 
				+import random
			
 
				+import sys
			
 
				+import threading
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+import re
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import httpx
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class CHIPHELL(object):
			
 
				+    def __init__(self):
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.base_url = 'https://www.chiphell.com/'
			
 
				+        self.href_url = 'portal.php?mod=list&catid={}'
			
 
				+        self.db = 'NEWS'
			
 
				+        self.collection = 'chiphell_info'
			
 
				+        self.headers = {
			
 
				+            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
			
 
				+        }
			
 
				+        self.send_email_datas = []
			
 
				+
			
 
				+    def req(self, source, target):
			
 
				+        print(f'正在获取 {source} 数据')
			
 
				+        # sleep_time = random.uniform(10, 15)
			
 
				+        sleep_time = random.uniform(1, 2)
			
 
				+        print(f'睡眠 {sleep_time} 秒')
			
 
				+        time.sleep(sleep_time)
			
 
				+        result_list = []
			
 
				+        try:
			
 
				+            resp = httpx.get(url=self.base_url + self.href_url.format(target), headers=self.headers)
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+            return 0
			
 
				+        if resp.status_code == 200:
			
 
				+            resp.encoding = 'utf-8'
			
 
				+            # print(resp.text)
			
 
				+            dl_list = re.findall('<dt class="xs2">([\S\s]*?)</dl>', resp.text)
			
 
				+
			
 
				+            for dl in dl_list:
			
 
				+                if dl:
			
 
				+                    url_list = re.findall('<a href="(.*?)" target="_blank" ', dl)
			
 
				+                    title_list = re.findall('class="xi2"  style="">(.*?)</a> </dt>', dl)
			
 
				+                    img_url_list = re.findall('target="_blank"><img src="(.*?)"', dl)
			
 
				+                    context_list = re.findall('class="tn" /></a></div>([\S\s]*?)</dd>', dl)
			
 
				+                    post_time_list = re.findall('<span class="xg1"> (.*?)</span>', dl)
			
 
				+
			
 
				+                    for url, title, img_url, context, post_time in zip(url_list, title_list, img_url_list, context_list, post_time_list):
			
 
				+                        # 清理正文内容的空格和换行等字符
			
 
				+                        if context:
			
 
				+                            for i in [' ', '\n']:
			
 
				+                                context = context.replace(i, '')
			
 
				+                            context = context.replace('\r', ' ')
			
 
				+
			
 
				+                        result_list.append({
			
 
				+                            "title": title,
			
 
				+                            "context": context,
			
 
				+                            "source_url": self.base_url + url,
			
 
				+                            'link': '',
			
 
				+                            "article_type": source.split(' - ')[1],
			
 
				+                            "article_source": source.split(' - ')[0],
			
 
				+                            "img_url": img_url,
			
 
				+                            'keyword': '',
			
 
				+                            "posted_date": post_time,
			
 
				+                            "create_time": int(time.time()),
			
 
				+                            "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                        })
			
 
				+
			
 
				+        else:
			
 
				+            print(resp.status_code)
			
 
				+            return 0
			
 
				+
			
 
				+        return result_list
			
 
				+
			
 
				+    def save_to_mongo(self, collection, source_data):
			
 
				+        print(f'正在处理 {self.collection} 数据')
			
 
				+        mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+        for data_to_insert in source_data:
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = mongo.collection.count_documents(filter_criteria)
			
 
				+
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+                    # 准备发送邮件的数据
			
 
				+                    self.send_email_datas.append(data_to_insert)
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write('chiphell', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print(f'处理 chiphell - {collection}数据完成')
			
 
				+
			
 
				+    def send_to_email(self):
			
 
				+        title = 'chiphell - info'
			
 
				+        subject = 'chiphell - info'
			
 
				+        text = '********************************************************\n'
			
 
				+        for data in self.send_email_datas:
			
 
				+            text += '标题: {}\n'.format(data['title'])
			
 
				+            text += '正文: {}\n'.format(data['context'])
			
 
				+            text += '板块: {}\n'.format(data['article_source'])
			
 
				+            text += '类型: {}\n'.format(data['article_type'])
			
 
				+            text += '文章地址: {}\n'.format(data['source_url'])
			
 
				+            text += '文章时间: {}\n'.format(data['posted_date'])
			
 
				+            text += '获取时间: {}\n'.format(data['create_datetime'])
			
 
				+            text += '********************************************************\n\n'
			
 
				+
			
 
				+        send_email = SendEmail(subject=subject, title=title, text=text)
			
 
				+        send_email.send()
			
 
				+        self.logs_handle.logs_write('chiphell', f'{title}-发送邮件完成', 'done', False)
			
 
				+
			
 
				+    def main(self):
			
 
				+        category = {
			
 
				+            '评测': {
			
 
				+                '笔记本': '19',
			
 
				+                '机箱': '11',
			
 
				+                #     '处理器': '13',
			
 
				+                #     '散热器': '14',
			
 
				+                #     '主板': '15',
			
 
				+                #     '内存': '137',
			
 
				+                #     '外设': '18',
			
 
				+                #     '电源': '35',
			
 
				+                '存储': '23',
			
 
				+                '显示设备': '21',
			
 
				+                #     '台式机': '88',
			
 
				+                '显卡': '10',
			
 
				+                #     '相机': '116'
			
 
				+            },
			
 
				+            '电脑': {
			
 
				+                '配件开箱': '98',
			
 
				+                '整机搭建': '99',
			
 
				+                '桌面书房': '101'
			
 
				+            },
			
 
				+            '掌设': {
			
 
				+                '智能手机': '40',
			
 
				+                '智能穿戴': '89',
			
 
				+                '笔电平板': '41',
			
 
				+                # '周边附件': '92'
			
 
				+            },
			
 
				+            # '摄影': {
			
 
				+            #     '微单卡片': '52',
			
 
				+            #     '单反单电': '51',
			
 
				+            #     '经典旁轴': '53',
			
 
				+            #     '怀旧菲林': '54',
			
 
				+            #     '影音摄像': '57',
			
 
				+            #     '周边附件': '55'
			
 
				+            # },
			
 
				+            # '汽车': {
			
 
				+            #     '买菜车': '58',
			
 
				+            #     '商务车': '59',
			
 
				+            #     '性能车': '63',
			
 
				+            #     '旅行车': '60',
			
 
				+            #     'SUV': '61',
			
 
				+            #     'MPV': '95',
			
 
				+            #     '摩托轻骑': '65',
			
 
				+            #     '改装配件': '96'
			
 
				+            # },
			
 
				+            # '单车': {
			
 
				+            #     '山地车': '108',
			
 
				+            #     '公路车': '109',
			
 
				+            #     '折叠车': '110',
			
 
				+            #     '休旅车': '111'
			
 
				+            # },
			
 
				+            # '腕表': {
			
 
				+            #     '机械表': '128',
			
 
				+            #     '电子表': '126'
			
 
				+            # },
			
 
				+            '视听': {
			
 
				+                '耳机耳放': '71',
			
 
				+                '音箱功放': '72',
			
 
				+                # '解码转盘': '73',
			
 
				+                '随身设备': '74'
			
 
				+            },
			
 
				+            '美食': {
			
 
				+                '当地美食': '68',
			
 
				+                '世界美食': '117',
			
 
				+                '私房菜品': '69',
			
 
				+                '美食器材': '70'
			
 
				+            },
			
 
				+            # '家居': {
			
 
				+            #     '家居': '132'
			
 
				+            # },
			
 
				+        }
			
 
				+
			
 
				+        response_datas = {}
			
 
				+
			
 
				+        for source1, tags in category.items():
			
 
				+            # source1作为表名, 先放到response_datas里面
			
 
				+            if source1 not in response_datas:
			
 
				+                response_datas[source1] = []
			
 
				+
			
 
				+            for source2, target in tags.items():
			
 
				+                source = source1 + ' - ' + source2
			
 
				+                response_data = self.req(source, target)
			
 
				+                if response_data != 0:
			
 
				+                    response_datas[source1] += response_data
			
 
				+
			
 
				+        if response_datas:
			
 
				+            threads = []
			
 
				+
			
 
				+            for k, v in response_datas.items():
			
 
				+                thread = threading.Thread(target=self.save_to_mongo, args=(k, v,))
			
 
				+                threads.append(thread)
			
 
				+                thread.start()
			
 
				+
			
 
				+            for thread in threads:
			
 
				+                thread.join()
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('chiphell', '获取数据为空', 'error', False)
			
 
				+            return False
			
 
				+
			
 
				+        # 如果 self.send_email_datas 中有数据, 则发送邮件
			
 
				+        if self.send_email_datas:
			
 
				+            self.send_to_email()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    C = CHIPHELL()
			
 
				+    C.main()
			
--- a/news_get_hello_github.py
+++ b/news_get_hello_github.py
@@ -0,0 +1,130 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+Hello Github
			
 
				+'''
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+import threading
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import httpx
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class HelloGithub(object):
			
 
				+    def __init__(self):
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.base_url = 'https://api.hellogithub.com/v1/?sort_by=last&tid=&page={}'
			
 
				+        self.headers = {
			
 
				+            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
			
 
				+        }
			
 
				+        self.db = 'NEWS'
			
 
				+        self.collection = 'HelloGithub_info'
			
 
				+        self.source_url = 'https://hellogithub.com/repository/'
			
 
				+        self.send_email_datas = []
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('HelloGithub', '开始获取 HelloGithub 数据', 'start', False)
			
 
				+
			
 
				+        targets = ['last', 'hot']
			
 
				+
			
 
				+        response_datas = []
			
 
				+
			
 
				+        for target in targets:
			
 
				+            response_data = self.req(target)
			
 
				+            response_datas += response_data
			
 
				+
			
 
				+        if response_datas:
			
 
				+            self.save_to_mongo(response_datas)
			
 
				+
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('HelloGithub', '获取 HelloGithub 数据失败', 'error', False)
			
 
				+
			
 
				+        self.logs_handle.logs_write('HelloGithub', 'HelloGithub 数据获取完成', 'done', False)
			
 
				+        print('获取 HelloGithub 数据 done')
			
 
				+
			
 
				+        if self.send_email_datas:
			
 
				+            self.send_to_email()
			
 
				+        else:
			
 
				+            print('没有新数据, 不发送邮件')
			
 
				+
			
 
				+    def req(self, target):
			
 
				+        print('开始获取 HelloGithub {} 数据'.format(target))
			
 
				+        response_data = []
			
 
				+        for i in range(1, 5):
			
 
				+            response = httpx.get(url='https://api.hellogithub.com/v1/?sort_by={}&tid=&page={}'.format(target, i),
			
 
				+                                 headers=self.headers)
			
 
				+            if response.status_code != 200:
			
 
				+                self.logs_handle.logs_write('HelloGithub', '请求失败, 状态码: %s' % response.status_code, 'error',
			
 
				+                                            False)
			
 
				+                exit(0)
			
 
				+
			
 
				+            json_data = response.json()
			
 
				+            for d in json_data.setdefault('data'):
			
 
				+                response_data.append({
			
 
				+                    "title": d.setdefault('title', ''),
			
 
				+                    "context": d.setdefault('summary', '') + ' --- ' + d.setdefault('description'),
			
 
				+                    "source_url": 'https://hellogithub.com',
			
 
				+                    'link': self.source_url + d.setdefault('item_id'),
			
 
				+                    "article_type": '',
			
 
				+                    "article_source": target,
			
 
				+                    "img_url": '',
			
 
				+                    'keyword': '',
			
 
				+                    "posted_date": d.setdefault('updated_at'),
			
 
				+                    "create_time": int(time.time()),
			
 
				+                    "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                })
			
 
				+
			
 
				+        if response_data:
			
 
				+            return response_data
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('HelloGithub', '获取数据失败', 'error', False)
			
 
				+
			
 
				+    def save_to_mongo(self, data):
			
 
				+        print(f'开始储存 HelloGithub 数据')
			
 
				+        for data_to_insert in data:
			
 
				+            mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = mongo.collection.count_documents(filter_criteria)
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+                    # 准备发送邮件的数据
			
 
				+                    self.send_email_datas.append(data_to_insert)
			
 
				+
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write('HelloGithub', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print(f'处理 HelloGithub 数据完成')
			
 
				+
			
 
				+    def send_to_email(self):
			
 
				+        title = 'HelloGithub - info'
			
 
				+        subject = 'HelloGithub - info'
			
 
				+        text = '********************************************************\n'
			
 
				+        for data in self.send_email_datas:
			
 
				+            text += '标题: {}\n'.format(data['title'])
			
 
				+            text += '正文: {}\n'.format(data['context'])
			
 
				+            text += '文章地址: {}\n'.format(data['source_url'])
			
 
				+            text += '文章时间: {}\n'.format(data['posted_date'])
			
 
				+            text += '获取时间: {}\n'.format(data['create_datetime'])
			
 
				+            text += '********************************************************\n\n'
			
 
				+
			
 
				+        send_email = SendEmail(subject=subject, title=title, text=text)
			
 
				+        send_email.send()
			
 
				+        self.logs_handle.logs_write('HelloGithub', f'{title}-发送邮件完成', 'done', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    H = HelloGithub()
			
 
				+    H.main()
			
--- a/news_get_news.py
+++ b/news_get_news.py
@@ -0,0 +1,141 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import time
			
 
				+import httpx
			
 
				+from datetime import datetime
			
 
				+
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class HotNews():
			
 
				+    def __init__(self):
			
 
				+        self.base_url = 'https://www.anyknew.com/go/'
			
 
				+        self.email_subject = '聚合新闻'
			
 
				+        self.email_title = 'Anyknew'
			
 
				+        self.email_text = '获取数据时间:\n{0}\n{1}\n\n\n\n'.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ('-' * 90))
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        self.db = 'NEWS'
			
 
				+        self.collection = 'Anyknew_info'
			
 
				+        self.targets = {
			
 
				+            'universal': 'https://www.anyknew.com/api/v1/cats/universal',
			
 
				+            'finance': 'https://www.anyknew.com/api/v1/cats/aam',
			
 
				+            'science': 'https://www.anyknew.com/api/v1/cats/st',
			
 
				+            'life': 'https://www.anyknew.com/api/v1/cats/life',
			
 
				+            'binary': 'https://www.anyknew.com/api/v1/cats/binary'
			
 
				+        }
			
 
				+        self.send_email_datas = []
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('聚合新闻', '任务开始', 'start', False)
			
 
				+
			
 
				+        resp_data = self.req()
			
 
				+
			
 
				+        if resp_data:
			
 
				+            self.save_to_mongo(resp_data)
			
 
				+
			
 
				+            if self.send_email_datas:
			
 
				+                print('准备发送邮件')
			
 
				+                self.send_to_email()
			
 
				+            else:
			
 
				+                print('无新数据')
			
 
				+
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('聚合新闻', '获取数据为空', 'error', False)
			
 
				+            return False
			
 
				+
			
 
				+        self.logs_handle.logs_write('聚合新闻', '任务完成', 'done', False)
			
 
				+
			
 
				+    def req(self):
			
 
				+        print('开始请求数据')
			
 
				+        result_data = []
			
 
				+        for target in self.targets:
			
 
				+            url = self.targets[target]
			
 
				+
			
 
				+            resp = httpx.get(url=url)
			
 
				+            resp_json = resp.json()
			
 
				+            data = resp_json.setdefault('data')
			
 
				+            cat = data.setdefault('cat')
			
 
				+            sites = cat.setdefault('sites')
			
 
				+
			
 
				+            for site in sites:
			
 
				+                site_name = site.setdefault('site')
			
 
				+                subs = site.setdefault('subs')
			
 
				+                target_and_site = '{}-{}'.format(target, site_name)
			
 
				+
			
 
				+                for items in subs:
			
 
				+                    for item in items:
			
 
				+                        if item == 'items':
			
 
				+                            detail = items['items']
			
 
				+                            for d in detail:
			
 
				+                                if target == 'universal':
			
 
				+                                    tag = 'Anyknew - 综合'
			
 
				+                                elif target == 'finance':
			
 
				+                                    tag = 'Anyknew - 金融'
			
 
				+                                elif target == 'science':
			
 
				+                                    tag = 'Anyknew - 科学'
			
 
				+                                elif target == 'life':
			
 
				+                                    tag = 'Anyknew - 生活'
			
 
				+                                elif target == 'binary':
			
 
				+                                    tag = 'Anyknew - 二进制'
			
 
				+                                else:
			
 
				+                                    tag = 'Anyknew'
			
 
				+
			
 
				+                                result_data.append({
			
 
				+                                    "title": d.get('title') or '',
			
 
				+                                    "context": d.get('more') or '',
			
 
				+                                    "source_url": url,
			
 
				+                                    'link': self.base_url + (str(d.get('iid')) or ''),
			
 
				+                                    "article_type": target_and_site,
			
 
				+                                    "article_source": tag,
			
 
				+                                    "img_url": '',
			
 
				+                                    'keyword': '',
			
 
				+                                    "posted_date": d.get('add_date') or '',
			
 
				+                                    "create_time": int(time.time()),
			
 
				+                                    "create_datetime": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+                                })
			
 
				+
			
 
				+        print('已获取数据')
			
 
				+        return result_data
			
 
				+
			
 
				+    def save_to_mongo(self, source_data):
			
 
				+        print(f'开始处理Anyknew数据')
			
 
				+        mongo = MongoHandle(db=self.db, collection=self.collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+        for data_to_insert in source_data:
			
 
				+            try:
			
 
				+                # 检查数据库中是否存在匹配的文档
			
 
				+                filter_criteria = {'title': data_to_insert.get('title', '')}  # 确保 title 字段有值
			
 
				+                count = mongo.collection.count_documents(filter_criteria)
			
 
				+
			
 
				+                if count == 0:
			
 
				+                    # 如果没有找到匹配的文档，插入新文档
			
 
				+                    result = mongo.collection.insert_one(data_to_insert)
			
 
				+                    self.send_email_datas.append(data_to_insert)
			
 
				+
			
 
				+            except TypeError as te:
			
 
				+                print('\n%s' % te)
			
 
				+                self.logs_handle.logs_write('聚合新闻', '写入数据库报错: %s' % te, 'error', False)
			
 
				+                return 0
			
 
				+        print(f'Anyknew数据处理')
			
 
				+
			
 
				+    def send_to_email(self):
			
 
				+        text = '********************************************************\n'
			
 
				+        for data in self.send_email_datas:
			
 
				+            text += '标题: {}\n'.format(data['title'])
			
 
				+            text += '正文: {}\n'.format(data['context'])
			
 
				+            text += '文章地址: {}\n'.format(data['link'])
			
 
				+            text += '类型: {}\n'.format(data['article_type'])
			
 
				+            text += '板块: {}\n'.format(data['article_source'])
			
 
				+            text += '文章时间: {}\n'.format(data['posted_date'])
			
 
				+            text += '获取时间: {}\n'.format(data['create_datetime'])
			
 
				+            text += '********************************************************\n\n'
			
 
				+
			
 
				+        send_email = SendEmail(subject='Anyknew', title='Anyknew_info', text=text)
			
 
				+        send_email.send()
			
 
				+        print('邮件已发送')
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    HotNews().main()
			
--- a/news_get_rsshub.py
+++ b/news_get_rsshub.py
@@ -0,0 +1,148 @@
 
				+# -*- coding: UTF-8 -*-
			
 
				+'''
			
 
				+关键词搜索rss消息
			
 
				+'''
			
 
				+import random
			
 
				+import re
			
 
				+import time
			
 
				+import httpx
			
 
				+from datetime import datetime
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+
			
 
				+
			
 
				+class KeySearch(object):
			
 
				+    def __init__(self):
			
 
				+        db = 'KeyWordSearch'
			
 
				+        collection = 'KeyWordSearch'
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def get_data(self, source, key):
			
 
				+        if not key:
			
 
				+            return None
			
 
				+        key_url = {
			
 
				+            '什么值得买': f'https://rsshub.app/smzdm/keyword/{key}',
			
 
				+            '新浪微博': f'https://rsshub.app/weibo/keyword/{key}',
			
 
				+            '36kr': f'https://rsshub.app/36kr/search/articles/{key}',
			
 
				+            '虎嗅网': f'https://rsshub.app/huxiu/search/{key}',
			
 
				+        }
			
 
				+        result_data = {key: []}
			
 
				+        url = key_url.get(source)
			
 
				+
			
 
				+        try:
			
 
				+            resp = httpx.get(url)
			
 
				+        except Exception as e:
			
 
				+            print(f'请求失败: {e}\n目标地址: {url}')
			
 
				+            return None
			
 
				+        if resp.status_code != 200:
			
 
				+            # 发邮件通知
			
 
				+            print(f'请求失败, 状态码: {resp.status_code}, 源: {source}, 关键词: {key}')
			
 
				+            # LogsHandle().logs_write(title_source=PROJECT_NAME, content=f'请求失败, 状态码: {resp.status_code}', state='error', send_now=True)
			
 
				+            time.sleep(random.uniform(3, 5))
			
 
				+            return None
			
 
				+        resp.encoding = 'utf-8'
			
 
				+
			
 
				+        # 解析数据
			
 
				+        pattern = r"<title><!\[CDATA\[(.*?)\]\]></title>\s*<description><!\[CDATA\[.*?\]\]></description>\s*<pubDate>(.*?)</pubDate>\s*<guid.*?</guid>\s*<link>(.*?)</link>"
			
 
				+
			
 
				+        re_result = re.findall(pattern, resp.text)
			
 
				+
			
 
				+        for result in re_result:
			
 
				+            if not result[0] or not result[1] or not result[2]:
			
 
				+                continue
			
 
				+            result_data[key].append([result[0].replace(' ', ""),
			
 
				+                                     datetime.strptime(result[1], '%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%d %H:%M:%S'),
			
 
				+                                     result[2]])
			
 
				+
			
 
				+        return result_data
			
 
				+
			
 
				+    def save_to_mongo(self, result_data):
			
 
				+        new_data_to_email = {}
			
 
				+        for source, value in result_data.items():
			
 
				+            for key, datas in value.items():
			
 
				+                for data in datas:
			
 
				+                    document = self.mongo.collection.find_one({'title': data[0], 'postdate': data[1], 'link': data[2], })
			
 
				+                    if document is None:
			
 
				+                        data_to_insert = {
			
 
				+                            'source': source,
			
 
				+                            'keyword': key,
			
 
				+                            'title': data[0],
			
 
				+                            'postdate': data[1],
			
 
				+                            'link': data[2],
			
 
				+                            'create_datetime': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+                        }
			
 
				+                        # 如果不存在，添加到列表并插入新文档
			
 
				+                        if data_to_insert['source'] not in new_data_to_email:
			
 
				+                            new_data_to_email[data_to_insert['source']] = [data_to_insert]
			
 
				+                        else:
			
 
				+                            new_data_to_email[data_to_insert['source']].append(data_to_insert)
			
 
				+
			
 
				+                        # 插入新文档到MongoDB集合中
			
 
				+                        self.mongo.collection.insert_one(data_to_insert)
			
 
				+                    else:
			
 
				+                        # 如果存在，跳过
			
 
				+                        continue
			
 
				+        return new_data_to_email
			
 
				+
			
 
				+    def main(self):
			
 
				+        # 输入的关键字
			
 
				+        # input_keys = {
			
 
				+        #     '什么值得买': [''],
			
 
				+        #     '新浪微博': [''],
			
 
				+        #     '36kr': [''],
			
 
				+        #     '虎嗅网': [''],
			
 
				+        # }
			
 
				+        input_keys = {
			
 
				+            '什么值得买': ['京东', '券', '鼠标', '键盘', '硬盘', '咖啡', '显示器'],
			
 
				+            '新浪微博': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
			
 
				+            '36kr': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
			
 
				+            '虎嗅网': ['测试网', '比特币', 'web3', 'CoinToEarn', 'YourAirdropETH', 'VIP8888883', 'duola_eth', 'sanyi_eth', 'kuangshenbtc', 'jianshubiji'],
			
 
				+        }
			
 
				+
			
 
				+        result_data = {}
			
 
				+        for key, value in input_keys.items():
			
 
				+            for k in value:
			
 
				+                if not k:
			
 
				+                    continue
			
 
				+                print(f'正在获取 {key} - {k} 数据')
			
 
				+                datas = self.get_data(key, k)
			
 
				+                time.sleep(random.uniform(4, 6))
			
 
				+                if not datas:
			
 
				+                    print(f'{k}: nodata')
			
 
				+                    continue
			
 
				+
			
 
				+                if key in result_data:
			
 
				+                    result_data[key].update(datas)
			
 
				+                else:
			
 
				+                    result_data.update({key: datas})
			
 
				+
			
 
				+        new_data_to_email = self.save_to_mongo(result_data)
			
 
				+
			
 
				+        # 如果有新消息, 即时发送邮件
			
 
				+        if new_data_to_email:
			
 
				+            for source, datas in new_data_to_email.items():
			
 
				+                content = f'KeyWord Search Message\n\nSource site: {source}\n\n{"*" * 50}\n\nposted at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n\n{"*" * 50}\n\n'
			
 
				+                for data in datas:
			
 
				+                    content += f'source: {data["source"]}, keyword: {data["keyword"]}\n'
			
 
				+                    content += f'title: {data["title"]}, postdate: {data["postdate"]}\n'
			
 
				+                    content += f'link: {data["link"]}\n'
			
 
				+                    content += f'\n{"*" * 50}\n'
			
 
				+
			
 
				+                SendEmail(
			
 
				+                    subject=f"{source} - KeyWord Search Message",
			
 
				+                    title=f'New Message ({datetime.now().strftime("%Y-%m-%d %H:%M:%S")})',
			
 
				+                    text=content
			
 
				+                ).send()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print('keyword reminder start')
			
 
				+    search = KeySearch()
			
 
				+    search.main()
			
 
				+    print('keyword reminder done')
			
--- a/spider_get_and_check_dlt.py
+++ b/spider_get_and_check_dlt.py
@@ -0,0 +1,300 @@
 
				+# -*-coding: utf-8 -*-
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+import threading
			
 
				+from datetime import datetime
			
 
				+import time
			
 
				+import httpx
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class GetData(object):
			
 
				+    def __init__(self, get_num=9999999):
			
 
				+        self.get_num = get_num
			
 
				+        self.url = 'https://webapi.sporttery.cn/gateway/lottery/getHistoryPageListV1.qry?gameNo=85&provinceId=0&pageSize={}&isVerify=1&pageNo=1'.format(get_num)
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.email_subject = 'dlt'
			
 
				+        self.email_title = '超级大乐透最新一期开奖查询对比'
			
 
				+        self.email_text = '获取数据时间:\n{0}\n{1}\n\n\n\n'.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ('-' * 90))
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'dlt'
			
 
				+        collection = 'dlt_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def main(self):
			
 
				+        data_list = self.req()
			
 
				+
			
 
				+        result_data = self.data_handle(data_list)
			
 
				+
			
 
				+        return result_data
			
 
				+
			
 
				+    def req(self):
			
 
				+        resp = httpx.get(self.url)
			
 
				+        if resp.status_code != 200:
			
 
				+            print('state code: {}'.format(resp.status_code))
			
 
				+            log_detail = '访问失败, 状态码:{},url:{}'.format(resp.status_code, self.url)
			
 
				+            self.logs_handle.logs_write('auto_get_and_check_dlt', log_detail, 'error', False)
			
 
				+            exit(0)
			
 
				+
			
 
				+        resp_json = resp.json()
			
 
				+
			
 
				+        value = resp_json.setdefault('value')
			
 
				+        data_list = value.setdefault('list')
			
 
				+
			
 
				+        if not data_list:
			
 
				+            self.logs_handle.logs_write('auto_get_and_check_dlt', '返回的数据为空, 获取数据失败', 'error', False)
			
 
				+            return
			
 
				+
			
 
				+        print('已获取数据')
			
 
				+        return data_list
			
 
				+
			
 
				+    def data_handle(self, data_list):
			
 
				+        result_data = []
			
 
				+
			
 
				+        for d in data_list:
			
 
				+            numbers = d.setdefault('lotteryUnsortDrawresult')
			
 
				+            try:
			
 
				+                if len(numbers.split(' ')) < 7:
			
 
				+                    continue
			
 
				+            except Exception as e:
			
 
				+                print('numbers: {}, err: {}'.format(numbers, e))
			
 
				+                continue
			
 
				+
			
 
				+            red_list = numbers.split(' ')[:5]
			
 
				+            blue_list = numbers.split(' ')[5:]
			
 
				+
			
 
				+            red_list.sort()
			
 
				+            blue_list.sort()
			
 
				+
			
 
				+            try:
			
 
				+                # 切开红球,蓝球数组
			
 
				+                red1 = red_list[0]
			
 
				+                red2 = red_list[1]
			
 
				+                red3 = red_list[2]
			
 
				+                red4 = red_list[3]
			
 
				+                red5 = red_list[4]
			
 
				+                blue1 = blue_list[0]
			
 
				+                blue2 = blue_list[1]
			
 
				+            except Exception as e:
			
 
				+                print('红球或蓝球数据丢失')
			
 
				+
			
 
				+            result_data.append({
			
 
				+                'serial': d.setdefault('lotteryDrawNum'),
			
 
				+                'red1': red1 or '',
			
 
				+                'red2': red2 or '',
			
 
				+                'red3': red3 or '',
			
 
				+                'red4': red4 or '',
			
 
				+                'red5': red5 or '',
			
 
				+                'blue1': blue1 or '',
			
 
				+                'blue2': blue2 or '',
			
 
				+                'drawPdfUrl': d.setdefault('drawPdfUrl'),
			
 
				+                'date': d.setdefault('lotteryDrawTime'),
			
 
				+                'pool': d.setdefault('poolBalanceAfterdraw')
			
 
				+            })
			
 
				+
			
 
				+        if result_data:
			
 
				+            return result_data
			
 
				+        else:
			
 
				+            self.logs_handle.logs_write('auto_get_and_check_dlt', '返回的数据为空, 获取数据失败', 'error', False)
			
 
				+            exit(0)
			
 
				+
			
 
				+
			
 
				+class CheckMyDLT(object):
			
 
				+    def __init__(self, data):
			
 
				+        self.my_dlt = [
			
 
				+            ['10', '11', '16', '17', '18', '11', '12'],
			
 
				+            ['02', '03', '11', '12', '23', '05', '06'],
			
 
				+            ['07', '09', '15', '17', '22', '09', '11'],
			
 
				+            ['05', '06', '07', '34', '35', '02', '09'],
			
 
				+            ['09', '10', '11', '21', '22', '04', '05']
			
 
				+        ]
			
 
				+        self.data = data
			
 
				+
			
 
				+    def main(self):
			
 
				+        print('开始数据对比')
			
 
				+        prepare_send_text, prepare_send_subject = self.process_text()
			
 
				+
			
 
				+        self.send_data(prepare_send_subject, prepare_send_text)
			
 
				+
			
 
				+    def process_text(self):
			
 
				+        text = ''
			
 
				+        serial_text = None
			
 
				+        subject = None
			
 
				+        for data in self.data:
			
 
				+            red_list = [data['red1'], data['red2'], data['red3'], data['red4'], data['red5']]
			
 
				+            blue_list = [data['blue1'], data['blue2']]
			
 
				+
			
 
				+            # 只查询一期时, subject显示, 如果查询多期,则subject不显示
			
 
				+            if len(data) == 1:
			
 
				+                subject = '{}'.format(data['serial'])
			
 
				+
			
 
				+            # 组成每期数据的text
			
 
				+            serial_text = 'serial: {}\t\tlottery draw date: {}\t\tbonus pool: {} RMB\n{}\nlottery draw num: {} + {}\n'.format(data['serial'], data['date'], data['pool'], '*' * 90,
			
 
				+                                                                                                                              red_list, blue_list)
			
 
				+
			
 
				+            for my_num in self.my_dlt:
			
 
				+                my_red_list = my_num[:5]
			
 
				+                my_blue_list = my_num[5:]
			
 
				+
			
 
				+                # 使用列表推导式找出两个列表中都存在的元素
			
 
				+                red_common_elements = [element for element in red_list if element in my_red_list]
			
 
				+                blue_common_elements = [element for element in blue_list if element in my_blue_list]
			
 
				+
			
 
				+                # 计算相等元素的数量
			
 
				+                red_equal_count = len(red_common_elements)
			
 
				+                blue_equal_count = len(blue_common_elements)
			
 
				+
			
 
				+                serial_text += 'my nums: {} + {}\t\tred hit: {}\tblue hit: {}\n'.format(my_red_list, my_blue_list, red_equal_count, blue_equal_count)
			
 
				+
			
 
				+            text += serial_text
			
 
				+            text += '{}\n\n\n\n'.format('*' * 90)
			
 
				+
			
 
				+        return text, subject
			
 
				+
			
 
				+    def send_data(self, subject, text):
			
 
				+        title = '超级大乐透最新一期开奖查询对比'
			
 
				+        SendEmail(subject, title, text).send()
			
 
				+
			
 
				+
			
 
				+class SaveToDB(object):
			
 
				+    def __init__(self, data):
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'dlt'
			
 
				+        collection = 'dlt_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=True, auto_remove=0)
			
 
				+
			
 
				+        self.data = data
			
 
				+
			
 
				+    def save_data(self):
			
 
				+        print('开始保存数据')
			
 
				+        for data in self.data:
			
 
				+            data_to_insert = {
			
 
				+                "serial": data.setdefault('serial'),
			
 
				+                "red1": data.setdefault('red1'),
			
 
				+                "red2": data.setdefault('red2'),
			
 
				+                "red3": data.setdefault('red3'),
			
 
				+                "red4": data.setdefault('red4'),
			
 
				+                "red5": data.setdefault('red5'),
			
 
				+                "blue1": data.setdefault('blue1'),
			
 
				+                "blue2": data.setdefault('blue2'),
			
 
				+                "date": data.setdefault('date'),
			
 
				+                "pool": data.setdefault('pool'),
			
 
				+                "drawPdfUrl": data.setdefault('drawPdfUrl'),
			
 
				+                "create_time": int(time.time()),
			
 
				+                "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+            }
			
 
				+
			
 
				+            self.mongo.collection.insert_one(data_to_insert)
			
 
				+        print('数据已储存, 共储存数据{}条'.format(len(self.data)))
			
 
				+
			
 
				+
			
 
				+class DLT(object):
			
 
				+    def start(self, n):
			
 
				+        # # 获取数据
			
 
				+        G = GetData(n)
			
 
				+        data = G.main()
			
 
				+        return data
			
 
				+
			
 
				+    def check(self, data):
			
 
				+        # # 读取数据并发送到邮件
			
 
				+        Check = CheckMyDLT(data)
			
 
				+        Check.main()
			
 
				+
			
 
				+    def mongo(self, data):
			
 
				+        # 存 mongodb
			
 
				+        Mongo = SaveToDB(data)
			
 
				+        Mongo.save_data()
			
 
				+
			
 
				+    def main(self):
			
 
				+        L = LogsHandle()
			
 
				+        L.logs_write('auto_get_and_check_dlt', 'dlt任务开始', 'start', False)
			
 
				+
			
 
				+        data = self.start(30)
			
 
				+
			
 
				+        if data:
			
 
				+            tasks = [
			
 
				+                self.check,
			
 
				+                self.mongo
			
 
				+            ]
			
 
				+
			
 
				+            threads = []
			
 
				+
			
 
				+            for i in tasks:
			
 
				+                thread = threading.Thread(target=i, args=(data,))
			
 
				+                threads.append(thread)
			
 
				+                thread.start()
			
 
				+
			
 
				+            for thread in threads:
			
 
				+                thread.join()
			
 
				+
			
 
				+            L.logs_write('auto_get_and_check_dlt', 'dlt任务结束', 'start', False)
			
 
				+            print('done')
			
 
				+        else:
			
 
				+            L.logs_write('auto_get_and_check_dlt', '获取数据失败', 'error', False)
			
 
				+
			
 
				+
			
 
				+class Luanch(object):
			
 
				+    def start(self, n):
			
 
				+        # # 获取数据
			
 
				+        G = GetData(n)
			
 
				+        data = G.main()
			
 
				+        return data
			
 
				+
			
 
				+    def check(self, data):
			
 
				+        # # 读取数据并发送到邮件
			
 
				+        Check = CheckMyDLT(data)
			
 
				+        Check.main()
			
 
				+
			
 
				+    def mongo(self, data):
			
 
				+        # 存 mongodb
			
 
				+        Mongo = SaveToDB(data)
			
 
				+        Mongo.save_data()
			
 
				+
			
 
				+    def main(self):
			
 
				+        Logs = LogsHandle()
			
 
				+        Logs.logs_write('auto_get_and_check_dlt', 'dlt任务开始', 'start', False)
			
 
				+
			
 
				+        data = self.start(30)
			
 
				+
			
 
				+        if data:
			
 
				+            tasks = [
			
 
				+                self.check,
			
 
				+                self.mongo
			
 
				+            ]
			
 
				+
			
 
				+            threads = []
			
 
				+
			
 
				+            for i in tasks:
			
 
				+                thread = threading.Thread(target=i, args=(data,))
			
 
				+                threads.append(thread)
			
 
				+                thread.start()
			
 
				+
			
 
				+            for thread in threads:
			
 
				+                thread.join()
			
 
				+
			
 
				+            Logs.logs_write('auto_get_and_check_dlt', 'dlt任务结束', 'start', False)
			
 
				+            print('done')
			
 
				+        else:
			
 
				+            Logs.logs_write('auto_get_and_check_dlt', '获取数据失败', 'error', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    L = Luanch()
			
 
				+    L.main()
			
 
				+
			
 
				+    # ## 单独获取数据
			
 
				+    # G = GetData()
			
 
				+    # data = G.main()
			
 
				+    # re_data = data[::-1]
			
 
				+    # save_txt = ''
			
 
				+    # for item in re_data:
			
 
				+    #     save_txt += f'[[{item["red1"]}, {item["red2"]}, {item["red3"]}, {item["red4"]}, {item["red5"]}], [{item["blue1"]}, {item["blue2"]}]],\n'
			
 
				+    #
			
 
				+    # with open('dlt.txt', 'w') as f:
			
 
				+    #     f.write(save_txt)
			
--- a/spider_get_and_check_ssq.py
+++ b/spider_get_and_check_ssq.py
@@ -0,0 +1,94 @@
 
				+# -*-coding: utf-8 -*-
			
 
				+import datetime
			
 
				+import os
			
 
				+import sqlite3
			
 
				+from selenium import webdriver
			
 
				+import httpx
			
 
				+
			
 
				+
			
 
				+def get_cookies(url):
			
 
				+    chrome_options = webdriver.ChromeOptions()
			
 
				+    args = ['--headless', '--no-sandbox', '--disable-gpu', '--disable-dev-shm-usage']
			
 
				+    for arg in args:
			
 
				+        chrome_options.add_argument(arg)
			
 
				+    driver = webdriver.Chrome(options=chrome_options)
			
 
				+    driver.get(url)
			
 
				+
			
 
				+    result_cookie = driver.get_cookies()
			
 
				+    if result_cookie:
			
 
				+        return result_cookie
			
 
				+    else:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+def req(url, cookies):
			
 
				+    with httpx.Client() as client:
			
 
				+        headers = {
			
 
				+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
			
 
				+            "Connection": "keep-alive",
			
 
				+            "Cookie": cookies,
			
 
				+            "Host": "www.cwl.gov.cn",
			
 
				+            "User-Agent": "Mozilla/5.0"
			
 
				+        }
			
 
				+        res = client.get(url, headers=headers, follow_redirects=True)
			
 
				+
			
 
				+        if res.status_code != 200:
			
 
				+            print(res.status_code)
			
 
				+            log_file_path = os.path.join(get_path.get_logs_path(), str(datetime.date.today()) + '.log')
			
 
				+            with open(log_file_path, 'a') as f:
			
 
				+                f.write("\n spider_dlt: %s")
			
 
				+            return
			
 
				+
			
 
				+        res_json = res.json()
			
 
				+        data_handle(res_json['result'])
			
 
				+
			
 
				+
			
 
				+def data_handle(source_data):
			
 
				+    ssq_db_path = os.path.join(utils_get_path.get_db_path(), 'ssq.db')
			
 
				+    conn = sqlite3.connect(ssq_db_path)
			
 
				+
			
 
				+    c = conn.cursor()
			
 
				+
			
 
				+    c.execute('drop table if exists data;')
			
 
				+
			
 
				+    c.execute(
			
 
				+        'create table if not exists `ssq` (id INT PRIMARY KEY NOT NULL, `code` varchar(10),`red1` varchar(2),`red2` varchar(2),`red3` varchar(2),`red4` varchar(2),`red5` varchar(2),`red6` varchar(2),`blue` varchar(2),`date` varchar(12),`sales` varchar(15),`poolmoney` varchar(15),`content` varchar(255));')
			
 
				+
			
 
				+    id = 1
			
 
				+    for data in source_data:
			
 
				+        insert_sql = "INSERT INTO ssq ('id', 'code', 'red1', 'red2', 'red3', 'red4', 'red5', 'red6', 'blue', 'date', 'sales', 'poolmoney', 'content') VALUES ({0}, '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}')".format(
			
 
				+            id,
			
 
				+            data.setdefault('code'),
			
 
				+            data.setdefault('red').split(',')[0],
			
 
				+            data.setdefault('red').split(',')[1],
			
 
				+            data.setdefault('red').split(',')[2],
			
 
				+            data.setdefault('red').split(',')[3],
			
 
				+            data.setdefault('red').split(',')[4],
			
 
				+            data.setdefault('red').split(',')[5],
			
 
				+            data.setdefault('blue'),
			
 
				+            data.setdefault('date'),
			
 
				+            data.setdefault('sales'),
			
 
				+            data.setdefault('poolmoney'),
			
 
				+            data.setdefault('content')
			
 
				+        )
			
 
				+        c.execute(insert_sql)
			
 
				+        conn.commit()
			
 
				+        id += 1
			
 
				+
			
 
				+    conn.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    url = 'http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount=&issueStart=&issueEnd=&dayStart=&dayEnd=&pageNo=1&pageSize=10&week=&systemType=PC'
			
 
				+
			
 
				+    # result_cookie = util_get_cookies.get_cookies(url)
			
 
				+    #
			
 
				+    # cookies = '{}={}'.format(result_cookie[0].setdefault('name'), result_cookie[0].setdefault('value'))
			
 
				+    #
			
 
				+    # print(cookies)
			
 
				+
			
 
				+    # 测试时使用的 cookies
			
 
				+    cookies = "HMF_CI=1b2fd73192f2054a429b2bfa4f58c3ff98119441420133cc8a04ca9c95aa2266eaec5bb7cf1d37df5f9864b8629ba407bacc9c58cadf26e2d726582df3870b0969"
			
 
				+
			
 
				+    req(url, cookies)
			
--- a/spider_get_free_ip_proxy.py
+++ b/spider_get_free_ip_proxy.py
@@ -0,0 +1,94 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+获取ip代理
			
 
				+'''
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+from datetime import datetime
			
 
				+import time
			
 
				+import re
			
 
				+import httpx
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+
			
 
				+
			
 
				+class IpProxy(object):
			
 
				+    def __init__(self):
			
 
				+        self.log_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'free_ip'
			
 
				+        collection = 'free_ip'
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=True, auto_remove=0)
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.log_handle.logs_write('get_free_ip_proxy', '开始获取免费ip', 'start', False)
			
 
				+        text = self.request()
			
 
				+
			
 
				+        result_list = self.re_data(text)
			
 
				+
			
 
				+        self.db(result_list)
			
 
				+        self.log_handle.logs_write('get_free_ip_proxy', '获取免费ip已完成', 'done', False)
			
 
				+
			
 
				+    def request(self):
			
 
				+        print('开始获取免费代理ip')
			
 
				+        url = 'https://www.dailiproxy.com/cn-free/'
			
 
				+
			
 
				+        headers = {
			
 
				+            'User-Agent': 'Mozilla/5.0',
			
 
				+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+            'Accept-Encoding': 'gzip, deflate, br',
			
 
				+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
			
 
				+        }
			
 
				+
			
 
				+        response = httpx.get(url=url, headers=headers)
			
 
				+
			
 
				+        if not response.status_code == 200:
			
 
				+            self.log_handle.logs_write('get_free_ip_proxy', '请求状态码: %s' % response.status_code, 'error', False)
			
 
				+        else:
			
 
				+            response.encoding = "utf-8"
			
 
				+
			
 
				+            text = response.text
			
 
				+
			
 
				+            print('获取免费代理ip完成')
			
 
				+            return text
			
 
				+
			
 
				+    def re_data(self, text):
			
 
				+        print('开始整理数据')
			
 
				+        result_list = []
			
 
				+        try:
			
 
				+            re_text = re.findall('var json = ([\S\s]*?);', text)
			
 
				+
			
 
				+            for r in re_text:
			
 
				+                for rr in eval(r):
			
 
				+                    result_list.append(rr)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+            return None
			
 
				+
			
 
				+        return result_list
			
 
				+
			
 
				+    def db(self, result_list):
			
 
				+        print('开始储存免费代理ip')
			
 
				+
			
 
				+        for data in result_list:
			
 
				+            data_to_insert = {
			
 
				+                "ip": data.setdefault('date').replace(' ', ''),
			
 
				+                "port": data.setdefault('port').replace(' ', ''),
			
 
				+                "location": data.setdefault('high'),
			
 
				+                "speed": data.setdefault('low'),
			
 
				+                "sunset": data.setdefault('sunset'),
			
 
				+                "rq": data.setdefault('rq').replace(' ', ''),
			
 
				+                "create_time": int(time.time()),
			
 
				+                "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+            }
			
 
				+
			
 
				+            self.mongo.collection.insert_one(data_to_insert)
			
 
				+        print('数据已储存, 共储存数据{}条'.format(len(result_list)))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    I = IpProxy()
			
 
				+    I.main()
			
--- a/spider_get_one_week_weather.py
+++ b/spider_get_one_week_weather.py
@@ -0,0 +1,72 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+获取天气预报
			
 
				+'''
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import httpx
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+from tools_logs_handle import LogsHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+
			
 
				+
			
 
				+class Weather():
			
 
				+    def __init__(self):
			
 
				+        self.email_subject = '天气预报'
			
 
				+        self.email_title = 'Weather forecast'
			
 
				+        self.email_text = '获取数据时间:\n{0}\n{1}\n\n\n\n'.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), ('-' * 90))
			
 
				+        self.logs_handle = LogsHandle()
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs_handle.logs_write('Weather forecast', '开始获取天气预报数据', 'start', False)
			
 
				+        try:
			
 
				+            area_code = '59287'
			
 
				+            one_week = [
			
 
				+                '/tomorrow-%s.htm' % area_code,
			
 
				+                '/third-%s.htm' % area_code,
			
 
				+                '/fourth-%s.htm' % area_code,
			
 
				+                '/fifth-%s.htm' % area_code,
			
 
				+                '/sixth-%s.htm' % area_code,
			
 
				+                '/seventh-%s.htm' % area_code,
			
 
				+            ]
			
 
				+            url = "https://tianqi.2345.com/today-%s.htm" % area_code
			
 
				+            header = {
			
 
				+                'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8'
			
 
				+            }
			
 
				+            response = httpx.get(url=url, headers=header)  # 增加headers参数，简单伪装UA
			
 
				+            response.encoding = "utf-8"
			
 
				+            bs = BeautifulSoup(response.text, 'html.parser')  # 这里我们用html.parser解析器
			
 
				+
			
 
				+            one_week_weather = []
			
 
				+            for week in one_week:
			
 
				+                a = bs.find_all('a', href=week)  # 查找对应元素
			
 
				+                a = ' '.join(a[0].text.split())
			
 
				+                one_week_weather.append(a)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+            self.logs_handle.logs_write('Weather forecast', e, 'error', False)
			
 
				+            exit(0)
			
 
				+
			
 
				+        subject = "天气预报"
			
 
				+        title = "weather"
			
 
				+        text = "天气预报: {}获取并发送\n\n".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())) + '\n\n'.join(one_week_weather)
			
 
				+
			
 
				+        S = SendEmail(subject=subject, title=title, text=text)
			
 
				+        S.send()
			
 
				+
			
 
				+        self.logs_handle.logs_write('Weather forecast', '天气预报数据已获取', 'done', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    L = LogsHandle()
			
 
				+    L.logs_write('Weather forecast', '开始获取天气预报数据', 'start', False)
			
 
				+    W = Weather()
			
 
				+    W.main()
			
 
				+    L.logs_write('Weather forecast', '天气预报数据已获取', 'done', False)
			
--- a/tools_load_config.py
+++ b/tools_load_config.py
@@ -0,0 +1,33 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+用于读取config.json
			
 
				+无需定时
			
 
				+'''
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+def load_config():
			
 
				+    try:
			
 
				+        sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+        base_project = os.path.join(os.getcwd().split('auto')[0], 'auto')
			
 
				+
			
 
				+        config_path = os.path.join(base_project, 'config.json')
			
 
				+        config_json = {}
			
 
				+        with open(config_path, 'r') as f:
			
 
				+            config_json = json.load(f)
			
 
				+
			
 
				+        if not config_json:
			
 
				+            print('No config file found')
			
 
				+            exit(0)
			
 
				+    except Exception as e:
			
 
				+        print(e)
			
 
				+        exit(0)
			
 
				+
			
 
				+    return config_json
			
 
				+
			
 
				+
			
 
				+def get_base_path():
			
 
				+    return os.path.join(os.getcwd().split('auto')[0], 'auto')
			
--- a/tools_logs_handle.py
+++ b/tools_logs_handle.py
@@ -0,0 +1,75 @@
 
				+# -*- coding: UTF-8 -*-
			
 
				+'''
			
 
				+获取每天日期,新建 logs 文件
			
 
				+存到 logs 文件夹中
			
 
				+'''
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+from tools_mongo_handle import MongoHandle
			
 
				+from tools_send_email import SendEmail
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+
			
 
				+class LogsHandle(object):
			
 
				+    def __init__(self):
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'logs'
			
 
				+        collection = 'logs_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def logs_generate(self):
			
 
				+        data_to_insert = {
			
 
				+            "title": "logs",
			
 
				+            "context": 'generate logs',
			
 
				+            "state": "create",
			
 
				+            "create_time": int(time.time()),
			
 
				+            "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        }
			
 
				+
			
 
				+        self.mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+    def logs_send(self):
			
 
				+        subject = 'auto collection logs'
			
 
				+        title = 'auto collection - logs: {}'.format(self.now_day)
			
 
				+        text = ''
			
 
				+
			
 
				+        # TODO
			
 
				+        # 从 mongodb 读取日志, 拼接 text, 发送邮件
			
 
				+        # 查询所有文档
			
 
				+        cursor = self.mongo.collection.find()
			
 
				+        # 遍历结果集
			
 
				+        for record in cursor:
			
 
				+            text += "logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}\n\n".format(record.setdefault('title'),
			
 
				+                                                                                                  record.setdefault('content'),
			
 
				+                                                                                                  record.setdefault('state'),
			
 
				+                                                                                                  record.setdefault('create_datetime'),
			
 
				+                                                                                                  )
			
 
				+
			
 
				+        S = SendEmail(subject=subject, title=title, text=text)
			
 
				+        S.send()
			
 
				+
			
 
				+    def logs_write(self, title_source=None, content=None, state=None, send_now=False):
			
 
				+        data_to_insert = {
			
 
				+            "title": title_source,
			
 
				+            "context": content,
			
 
				+            "state": state,
			
 
				+            "create_time": int(time.time()),
			
 
				+            "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        }
			
 
				+
			
 
				+        self.mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+        if send_now:
			
 
				+            subject = 'auto collection'
			
 
				+            title = 'auto collection - running logs: {}'.format(self.now_day)
			
 
				+            text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format(data_to_insert.setdefault('title'),
			
 
				+                                                                                             data_to_insert.setdefault('content'),
			
 
				+                                                                                             data_to_insert.setdefault('state'),
			
 
				+                                                                                             data_to_insert.setdefault('create_datetime'),
			
 
				+                                                                                             )
			
 
				+
			
 
				+            Send = SendEmail(subject=subject, title=title, text=text)
			
 
				+            Send.send()
			
--- a/tools_mongo_handle.py
+++ b/tools_mongo_handle.py
@@ -0,0 +1,52 @@
 
				+# -*-coding: utf-8 -*-
			
 
				+import pymongo
			
 
				+import time
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+
			
 
				+
			
 
				+class MongoHandle(object):
			
 
				+    def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
			
 
				+        self.client = pymongo.MongoClient(MONGO_LINK)
			
 
				+        self.db = db
			
 
				+        self.collection = collection
			
 
				+
			
 
				+        if del_db and db:
			
 
				+            # 检查数据库是否存在
			
 
				+            if db in self.client.list_database_names():
			
 
				+                # 删除数据库
			
 
				+                self.client.drop_database(db)
			
 
				+        self.db = self.client[db]
			
 
				+
			
 
				+        if del_collection and self.collection:
			
 
				+            # 检查集合是否存在
			
 
				+            if self.collection in self.db.list_collection_names():
			
 
				+                # 删除集合
			
 
				+                self.db.drop_collection(collection)
			
 
				+        self.collection = self.db[collection]
			
 
				+
			
 
				+        if auto_remove:
			
 
				+            self.auto_remove_data(auto_remove)
			
 
				+
			
 
				+    def write_data(self, data):
			
 
				+        self.collection.insert_one(data)
			
 
				+
			
 
				+    def auto_remove_data(self, day):
			
 
				+        for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
			
 
				+            self.collection.delete_one({'_id': data['_id']})
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     mongo = MongoHandle('test_db', 'test_collection', False, False, 0)
			
 
				+#     mongo.collection.insert_one({'name': 'test'})
			
 
				+#     mongo.collection.insert_many([{'name': 'test1'}, {'name': 'test2'}])
			
 
				+#     print(mongo.collection.find_one())
			
 
				+#     print(mongo.collection.find())
			
 
				+#     print('done!')
			
--- a/tools_proxy.py
+++ b/tools_proxy.py
@@ -0,0 +1,44 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import subprocess
			
 
				+import os
			
 
				+import platform
			
 
				+from tools_load_config import load_config
			
 
				+
			
 
				+comfig_json = load_config()
			
 
				+
			
 
				+use_proxy = comfig_json.get('USE_PROXY')
			
 
				+proxy_host = comfig_json.get('PROXY_HOST')
			
 
				+proxy_port = comfig_json.get('PROXY_PORT')
			
 
				+
			
 
				+if use_proxy:
			
 
				+    system = platform.system()
			
 
				+
			
 
				+    if system == 'Windows':
			
 
				+        env = os.environ.copy()
			
 
				+        command = ['set',
			
 
				+                   'http_proxy=http://{}:{}'.format(proxy_host, proxy_port),
			
 
				+                   '&',
			
 
				+                   'set',
			
 
				+                   'https_proxy=http://{}:{}'.format(proxy_host, proxy_port)
			
 
				+                   ]
			
 
				+    elif system == 'Darwin':
			
 
				+        env = os.environ.copy()
			
 
				+        command = ['export',
			
 
				+                   'https_proxy=http://{}:{}'.format(proxy_host, proxy_port),
			
 
				+                   'http_proxy=http://{}:{}'.format(proxy_host, proxy_port),
			
 
				+                   'all_proxy=socks5://{}:{}'.format(proxy_host, proxy_port)
			
 
				+                   ]
			
 
				+    elif system == 'Linux':
			
 
				+        env = os.environ.copy()
			
 
				+        command = ['export',
			
 
				+                   'https_proxy=http://{}:{}'.format(proxy_host, proxy_port),
			
 
				+                   'http_proxy=http://{}:{}'.format(proxy_host, proxy_port),
			
 
				+                   'all_proxy=socks5://{}:{}'.format(proxy_host, proxy_port)
			
 
				+                   ]
			
 
				+    else:
			
 
				+        print("未知操作系统")
			
 
				+        exit(0)
			
 
				+
			
 
				+    result = subprocess.run(command, text=True, capture_output=True)
			
 
				+
			
 
				+    print(result)
			
--- a/tools_send_email.py
+++ b/tools_send_email.py
@@ -0,0 +1,50 @@
 
				+# -*- coding: UTF-8 -*-
			
 
				+import smtplib
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.header import Header
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+MAIL_HOST = config_json.get('MAIL_HOST')
			
 
				+MAIL_USER = config_json.get('MAIL_USER')
			
 
				+MAIL_PASS = config_json.get('MAIL_PASS')
			
 
				+MAIL_SENDER = config_json.get('MAIL_SENDER')
			
 
				+MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
			
 
				+
			
 
				+
			
 
				+class SendEmail(object):
			
 
				+    def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None:
			
 
				+        # 第三方 SMTP 服务
			
 
				+        self.mail_host = MAIL_HOST  # 设置服务器
			
 
				+        self.mail_user = MAIL_USER  # 用户名
			
 
				+        self.mail_pass = MAIL_PASS  # 口令
			
 
				+
			
 
				+        self.sender = MAIL_SENDER
			
 
				+        self.receivers = [MAIL_RECEIVERS]
			
 
				+
			
 
				+        self.subject = subject
			
 
				+        self.title = title
			
 
				+        self.text = text
			
 
				+
			
 
				+    def send(self):
			
 
				+        message = MIMEText(self.text, 'plain', 'utf-8')
			
 
				+        message['From'] = Header(self.title, 'utf-8')
			
 
				+        message['To'] = Header("auto", 'utf-8')
			
 
				+        message['Subject'] = Header(self.subject, 'utf-8')
			
 
				+
			
 
				+        try:
			
 
				+            smtpObj = smtplib.SMTP()
			
 
				+            smtpObj.connect(self.mail_host, 25)
			
 
				+            smtpObj.login(self.mail_user, self.mail_pass)
			
 
				+            smtpObj.sendmail(self.sender, self.receivers, message.as_string())
			
 
				+            print("邮件发送成功")
			
 
				+        except smtplib.SMTPException:
			
 
				+            print("Error: 无法发送邮件")
			
--- a/tools_sync_psql.py
+++ b/tools_sync_psql.py
@@ -0,0 +1,116 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+from datetime import datetime
			
 
				+from pymongo import MongoClient
			
 
				+import psycopg2
			
 
				+
			
 
				+
			
 
				+def mongo():
			
 
				+    # mongodb
			
 
				+    client = MongoClient('mongodb://root:aaaAAA111!!!@home.erhe.link:38000/')
			
 
				+
			
 
				+    # 指定数据库名称
			
 
				+    db_name = 'NEWS'  # 替换为你的数据库名称
			
 
				+
			
 
				+    # 选择数据库
			
 
				+    db = client[db_name]
			
 
				+
			
 
				+    # 列出数据库中的所有集合
			
 
				+    collections = db.list_collection_names()
			
 
				+
			
 
				+    all_data = []
			
 
				+
			
 
				+    for collection_name in collections:
			
 
				+        # 选择集合
			
 
				+        collection = db[collection_name]
			
 
				+
			
 
				+        # 读取集合中的所有数据
			
 
				+        for document in collection.find({}, {'_id': 0}):
			
 
				+            all_data.append(document)
			
 
				+
			
 
				+    sorted_data = []
			
 
				+
			
 
				+    if all_data:
			
 
				+        sorted_data = sorted(all_data, key=lambda x: x['create_time'], reverse=True)
			
 
				+
			
 
				+    return sorted_data
			
 
				+
			
 
				+
			
 
				+def pg(sorted_data):
			
 
				+    # 数据库连接参数
			
 
				+    db_params = {
			
 
				+        'dbname': 'auto',
			
 
				+        'user': 'odoo',
			
 
				+        'password': 'odoo',
			
 
				+        'host': '192.168.100.146',
			
 
				+        'port': '5432'
			
 
				+    }
			
 
				+    conn = psycopg2.connect(**db_params)
			
 
				+    for doc in sorted_data:
			
 
				+        # 如果有title，就不插入
			
 
				+        try:
			
 
				+            cur = conn.cursor()
			
 
				+
			
 
				+            create_time_dt = None
			
 
				+            if doc.get('create_time'):
			
 
				+                create_time_dt = datetime.utcfromtimestamp(doc['create_time'])
			
 
				+
			
 
				+            values = {
			
 
				+                'name': doc.get('title'),
			
 
				+                'context': doc.get('context') or '',
			
 
				+                'source_url': doc.get('source_url') or '',
			
 
				+                'link': doc.get('line') or '',
			
 
				+                'article_type': doc.get('article_type') or '',
			
 
				+                'article_source': doc.get('article_source') or '',
			
 
				+                'img_url': doc.get('img_url') or '',
			
 
				+                'keyword': doc.get('keyword') or '',
			
 
				+                'posted_date': doc.get('posted_date') or '',
			
 
				+                'create_time_ts': doc.get('create_time') or '',
			
 
				+                'create_time': create_time_dt,
			
 
				+                'create_datetime': datetime.strptime(doc['create_datetime'], '%Y-%m-%d %H:%M:%S') if doc.get('create_datetime') else None
			
 
				+            }
			
 
				+
			
 
				+            # 将create_time转换为适合数据库的时间戳格式
			
 
				+            create_time_dt = datetime.utcfromtimestamp(values['create_time_ts']) if values['create_time_ts'] else None
			
 
				+            values['create_time'] = create_time_dt
			
 
				+
			
 
				+            # 将create_datetime转换为适合数据库的时间戳格式
			
 
				+            create_datetime_str = doc.get('create_datetime')
			
 
				+            values['create_datetime'] = datetime.strptime(create_datetime_str, '%Y-%m-%d %H:%M:%S') if create_datetime_str else None
			
 
				+
			
 
				+            # 检查数据库中是否已存在相同title的记录
			
 
				+            check_query = "SELECT id FROM news_info WHERE name = %s;"
			
 
				+            cur.execute(check_query, (values['name'],))
			
 
				+
			
 
				+            # 如果没有找到记录，则插入新记录
			
 
				+            if not cur.fetchone():
			
 
				+                insert_query = """
			
 
				+                    INSERT INTO news_info (name, context, source_url, link, article_type, article_source, img_url, keyword, posted_date, create_time, create_datetime)
			
 
				+                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
			
 
				+                    """
			
 
				+                cur.execute(insert_query, (
			
 
				+                    values['name'],
			
 
				+                    values['context'],
			
 
				+                    values['source_url'],
			
 
				+                    values['link'],
			
 
				+                    values['article_type'],
			
 
				+                    values['article_source'],
			
 
				+                    values['img_url'],
			
 
				+                    values['keyword'],
			
 
				+                    values['posted_date'],
			
 
				+                    values['create_time'],
			
 
				+                    values['create_datetime']
			
 
				+                ))
			
 
				+                conn.commit()
			
 
				+                print(f'已保存{values}')
			
 
				+        except Exception as e:
			
 
				+            print("Error during search: ", e)
			
 
				+        finally:
			
 
				+            # 关闭游标和连接
			
 
				+            if 'cur' in locals():
			
 
				+                cur.close()
			
 
				+    conn.close()
			
 
				+
			
 
				+
			
 
				+sorted_data = mongo()
			
 
				+if sorted_data:
			
 
				+    pg(sorted_data)
			
--- a/utils_daily_logs_generate.py
+++ b/utils_daily_logs_generate.py
@@ -0,0 +1,83 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+设置每天 00:00:00 新建一个日志记录
			
 
				+'''
			
 
				+import os
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import pymongo
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+MAIL_HOST = config_json.get('MAIL_HOST')
			
 
				+MAIL_USER = config_json.get('MAIL_USER')
			
 
				+MAIL_PASS = config_json.get('MAIL_PASS')
			
 
				+MAIL_SENDER = config_json.get('MAIL_SENDER')
			
 
				+MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
			
 
				+
			
 
				+now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
			
 
				+
			
 
				+
			
 
				+class LogsHandle(object):
			
 
				+    def __init__(self):
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'logs'
			
 
				+        collection = 'logs_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def logs_generate(self):
			
 
				+        data_to_insert = {
			
 
				+            "title": "logs",
			
 
				+            "context": 'generate logs',
			
 
				+            "state": "create",
			
 
				+            "create_time": int(time.time()),
			
 
				+            "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        }
			
 
				+
			
 
				+        self.mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+
			
 
				+class MongoHandle(object):
			
 
				+    def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
			
 
				+        self.client = pymongo.MongoClient(MONGO_LINK)
			
 
				+        self.db = db
			
 
				+        self.collection = collection
			
 
				+
			
 
				+        if del_db and db:
			
 
				+            # 检查数据库是否存在
			
 
				+            if db in self.client.list_database_names():
			
 
				+                # 删除数据库
			
 
				+                self.client.drop_database(db)
			
 
				+        self.db = self.client[db]
			
 
				+
			
 
				+        if del_collection and self.collection:
			
 
				+            # 检查集合是否存在
			
 
				+            if self.collection in self.db.list_collection_names():
			
 
				+                # 删除集合
			
 
				+                self.db.drop_collection(collection)
			
 
				+        self.collection = self.db[collection]
			
 
				+
			
 
				+        if auto_remove:
			
 
				+            self.auto_remove_data(auto_remove)
			
 
				+
			
 
				+    def write_data(self, data):
			
 
				+        self.collection.insert_one(data)
			
 
				+
			
 
				+    def auto_remove_data(self, day):
			
 
				+        for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
			
 
				+            self.collection.delete_one({'_id': data['_id']})
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print("新建当天日志记录...")
			
 
				+    LogsHandle().logs_generate()
			
 
				+    print("当天日志记录已创建...")
			
--- a/utils_daily_logs_send.py
+++ b/utils_daily_logs_send.py
@@ -0,0 +1,125 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+设置每天 23:59 执行, 读取当天数据库中, 所有日志, 发送到指定邮箱
			
 
				+'''
			
 
				+import time
			
 
				+import os
			
 
				+import pymongo
			
 
				+import smtplib
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.header import Header
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+MAIL_HOST = config_json.get('MAIL_HOST')
			
 
				+MAIL_USER = config_json.get('MAIL_USER')
			
 
				+MAIL_PASS = config_json.get('MAIL_PASS')
			
 
				+MAIL_SENDER = config_json.get('MAIL_SENDER')
			
 
				+MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
			
 
				+
			
 
				+now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
			
 
				+
			
 
				+
			
 
				+class LogsHandle(object):
			
 
				+    def __init__(self):
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'logs'
			
 
				+        collection = 'logs_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def logs_send(self):
			
 
				+        subject = 'auto collection logs'
			
 
				+        title = 'auto collection - daily logs: {}'.format(self.now_day)
			
 
				+        text = ''
			
 
				+
			
 
				+        # TODO
			
 
				+        # 从 mongodb 读取日志, 拼接 text, 发送邮件
			
 
				+        # 查询所有文档
			
 
				+        cursor = self.mongo.collection.find()
			
 
				+        # 遍历结果集
			
 
				+        for record in cursor:
			
 
				+            text += "logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}\n\n".format(record.setdefault('title'),
			
 
				+                                                                                                  record.setdefault('content'),
			
 
				+                                                                                                  record.setdefault('state'),
			
 
				+                                                                                                  record.setdefault('create_datetime'),
			
 
				+                                                                                                  )
			
 
				+        S = SendEmail(subject=subject, title=title, text=text)
			
 
				+        S.send()
			
 
				+
			
 
				+
			
 
				+class MongoHandle(object):
			
 
				+    def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
			
 
				+        self.client = pymongo.MongoClient(MONGO_LINK)
			
 
				+        self.db = db
			
 
				+        self.collection = collection
			
 
				+
			
 
				+        if del_db and db:
			
 
				+            # 检查数据库是否存在
			
 
				+            if db in self.client.list_database_names():
			
 
				+                # 删除数据库
			
 
				+                self.client.drop_database(db)
			
 
				+        self.db = self.client[db]
			
 
				+
			
 
				+        if del_collection and self.collection:
			
 
				+            # 检查集合是否存在
			
 
				+            if self.collection in self.db.list_collection_names():
			
 
				+                # 删除集合
			
 
				+                self.db.drop_collection(collection)
			
 
				+        self.collection = self.db[collection]
			
 
				+
			
 
				+        if auto_remove:
			
 
				+            self.auto_remove_data(auto_remove)
			
 
				+
			
 
				+    def write_data(self, data):
			
 
				+        self.collection.insert_one(data)
			
 
				+
			
 
				+    def auto_remove_data(self, day):
			
 
				+        for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
			
 
				+            self.collection.delete_one({'_id': data['_id']})
			
 
				+
			
 
				+
			
 
				+class SendEmail(object):
			
 
				+    def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None:
			
 
				+        # 第三方 SMTP 服务
			
 
				+        self.mail_host = MAIL_HOST  # 设置服务器
			
 
				+        self.mail_user = MAIL_USER  # 用户名
			
 
				+        self.mail_pass = MAIL_PASS  # 口令
			
 
				+
			
 
				+        self.sender = MAIL_SENDER
			
 
				+        self.receivers = [MAIL_RECEIVERS]
			
 
				+
			
 
				+        self.subject = subject
			
 
				+        self.title = title
			
 
				+        self.text = text
			
 
				+
			
 
				+    def send(self):
			
 
				+        message = MIMEText(self.text, 'plain', 'utf-8')
			
 
				+        message['From'] = Header(self.title, 'utf-8')
			
 
				+        message['To'] = Header("auto collection", 'utf-8')
			
 
				+
			
 
				+        subject = self.subject
			
 
				+        message['Subject'] = Header(subject, 'utf-8')
			
 
				+
			
 
				+        try:
			
 
				+            smtpObj = smtplib.SMTP()
			
 
				+            smtpObj.connect(self.mail_host, 25)
			
 
				+            smtpObj.login(self.mail_user, self.mail_pass)
			
 
				+            smtpObj.sendmail(self.sender, self.receivers, message.as_string())
			
 
				+            print("邮件发送成功")
			
 
				+        except smtplib.SMTPException:
			
 
				+            print("Error: 无法发送邮件")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print("发送当天日志:start")
			
 
				+    LogsHandle().logs_send()
			
 
				+    print("发送当天日志:done")
			
--- a/utils_timing_remove_data.py
+++ b/utils_timing_remove_data.py
@@ -0,0 +1,196 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+自动清除大于指定天数的数据
			
 
				+"""
			
 
				+import threading
			
 
				+import time
			
 
				+import sys
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+import pymongo
			
 
				+import smtplib
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.header import Header
			
 
				+
			
 
				+sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
			
 
				+base_project = os.path.join(os.getcwd().split('auto')[0], 'auto')
			
 
				+import json
			
 
				+
			
 
				+config_path = os.path.join(base_project, 'config.json')
			
 
				+config_json = {}
			
 
				+with open(config_path, 'r') as f:
			
 
				+    config_json = json.load(f)
			
 
				+
			
 
				+if not config_json:
			
 
				+    print('No config file found')
			
 
				+    exit(0)
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+MAIL_HOST = config_json.get('MAIL_HOST')
			
 
				+MAIL_USER = config_json.get('MAIL_USER')
			
 
				+MAIL_PASS = config_json.get('MAIL_PASS')
			
 
				+MAIL_SENDER = config_json.get('MAIL_SENDER')
			
 
				+MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
			
 
				+
			
 
				+now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
			
 
				+base_project = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME)
			
 
				+
			
 
				+
			
 
				+class MongoHandle(object):
			
 
				+    def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0):
			
 
				+        self.client = pymongo.MongoClient(MONGO_LINK)
			
 
				+        self.db = db
			
 
				+        self.collection = collection
			
 
				+
			
 
				+        if del_db and db:
			
 
				+            # 检查数据库是否存在
			
 
				+            if db in self.client.list_database_names():
			
 
				+                # 删除数据库
			
 
				+                self.client.drop_database(db)
			
 
				+        self.db = self.client[db]
			
 
				+
			
 
				+        if del_collection and self.collection:
			
 
				+            # 检查集合是否存在
			
 
				+            if self.collection in self.db.list_collection_names():
			
 
				+                # 删除集合
			
 
				+                self.db.drop_collection(collection)
			
 
				+        self.collection = self.db[collection]
			
 
				+
			
 
				+        if auto_remove:
			
 
				+            self.auto_remove_data(auto_remove)
			
 
				+
			
 
				+    def write_data(self, data):
			
 
				+        self.collection.insert_one(data)
			
 
				+
			
 
				+    def auto_remove_data(self, day):
			
 
				+        for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
			
 
				+            self.collection.delete_one({'_id': data['_id']})
			
 
				+
			
 
				+
			
 
				+class SendEmail(object):
			
 
				+    def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None:
			
 
				+        # 第三方 SMTP 服务
			
 
				+        self.mail_host = MAIL_HOST  # 设置服务器
			
 
				+        self.mail_user = MAIL_USER  # 用户名
			
 
				+        self.mail_pass = MAIL_PASS  # 口令
			
 
				+
			
 
				+        self.sender = MAIL_SENDER
			
 
				+        self.receivers = [MAIL_RECEIVERS]
			
 
				+
			
 
				+        self.subject = subject
			
 
				+        self.title = title
			
 
				+        self.text = text
			
 
				+
			
 
				+    def send(self):
			
 
				+        message = MIMEText(self.text, 'plain', 'utf-8')
			
 
				+        message['From'] = Header(self.title, 'utf-8')
			
 
				+        message['To'] = Header("auto collection", 'utf-8')
			
 
				+
			
 
				+        subject = self.subject
			
 
				+        message['Subject'] = Header(subject, 'utf-8')
			
 
				+
			
 
				+        try:
			
 
				+            smtpObj = smtplib.SMTP()
			
 
				+            smtpObj.connect(self.mail_host, 25)
			
 
				+            smtpObj.login(self.mail_user, self.mail_pass)
			
 
				+            smtpObj.sendmail(self.sender, self.receivers, message.as_string())
			
 
				+            print("邮件发送成功")
			
 
				+        except smtplib.SMTPException:
			
 
				+            print("Error: 无法发送邮件")
			
 
				+
			
 
				+
			
 
				+class LogsHandle(object):
			
 
				+    def __init__(self):
			
 
				+        self.now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+        db = 'logs'
			
 
				+        collection = 'logs_' + self.now_day
			
 
				+        self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0)
			
 
				+
			
 
				+    def logs_write(self, title_source=None, content=None, state=None, send_now=False):
			
 
				+        data_to_insert = {
			
 
				+            "title": title_source,
			
 
				+            "context": content,
			
 
				+            "state": state,
			
 
				+            "create_time": int(time.time()),
			
 
				+            "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        }
			
 
				+
			
 
				+        self.mongo.collection.insert_one(data_to_insert)
			
 
				+
			
 
				+        if send_now:
			
 
				+            subject = 'auto collection'
			
 
				+            title = 'auto collection - running logs: {}'.format(self.now_day)
			
 
				+            text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format(data_to_insert.setdefault('title'),
			
 
				+                                                                                             data_to_insert.setdefault('content'),
			
 
				+                                                                                             data_to_insert.setdefault('state'),
			
 
				+                                                                                             data_to_insert.setdefault('create_datetime'),
			
 
				+                                                                                             )
			
 
				+
			
 
				+            Send = SendEmail(subject=subject, title=title, text=text)
			
 
				+            Send.send()
			
 
				+
			
 
				+
			
 
				+class AutoRemoveData(object):
			
 
				+    def __init__(self):
			
 
				+        self.databases = [
			
 
				+            'news',
			
 
				+            'apprcn',
			
 
				+            'HelloGithub'
			
 
				+        ]
			
 
				+
			
 
				+        self.day = 60
			
 
				+
			
 
				+        self.client = pymongo.MongoClient(MONGO_LINK)
			
 
				+
			
 
				+        self.logs = LogsHandle()
			
 
				+
			
 
				+        self.all_delete_count = 0
			
 
				+
			
 
				+    def auto_remove_data(self, db_name, day):
			
 
				+        print(f'准备删除时间大于: {self.day} 数据')
			
 
				+        if db_name not in self.client.list_database_names():
			
 
				+            return
			
 
				+
			
 
				+        deleted_count = 0
			
 
				+
			
 
				+        db = self.client[db_name]
			
 
				+
			
 
				+        for collection_name in db.list_collection_names():
			
 
				+            collection = db[collection_name]
			
 
				+            for data in collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}):
			
 
				+                collection.delete_one({'_id': data['_id']})
			
 
				+                deleted_count += 1
			
 
				+
			
 
				+        self.all_delete_count += deleted_count
			
 
				+
			
 
				+        msg = f"删除 {db_name} 库 {self.day} 天以上数据 {deleted_count} 条"
			
 
				+        if deleted_count:
			
 
				+            print(msg)
			
 
				+            self.logs.logs_write(f'自动删除 {self.day} 天以上数据', msg, 'delete', False)
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'开始自动删除 {self.day} 天以上数据', 'start', False)
			
 
				+        threads = []
			
 
				+
			
 
				+        for db_name in self.databases:
			
 
				+            thread = threading.Thread(target=self.auto_remove_data, args=(db_name, self.day))
			
 
				+            threads.append(thread)
			
 
				+            thread.start()
			
 
				+
			
 
				+        for thread in threads:
			
 
				+            thread.join()
			
 
				+
			
 
				+        print(f'删除时间大于: {self.day} 数据, 已完成')
			
 
				+        print(f'本次运行共删除: {self.all_delete_count} 条数据')
			
 
				+        self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'自动删除 {self.day} 天数以上数据完成', 'done', False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    A = AutoRemoveData()
			
 
				+    A.main()
			
--- a/utils_timing_rss_gen.py
+++ b/utils_timing_rss_gen.py
@@ -0,0 +1,155 @@
 
				+# -*-coding: utf-8 -*-
			
 
				+'''
			
 
				+在批量执行 rss 爬虫时, 获取数据后会生成 rss 文件,无需执行此代码, 如需手动生成 rss 文件,执行此代码即可
			
 
				+'''
			
 
				+import os
			
 
				+import threading
			
 
				+import PyRSS2Gen
			
 
				+import time
			
 
				+import pymongo
			
 
				+import tools_load_config
			
 
				+
			
 
				+config_json = tools_load_config.load_config()
			
 
				+base_project = tools_load_config.get_base_path()
			
 
				+
			
 
				+PROJECT_NAME = config_json.get('PROJECT_NAME')
			
 
				+DB_USER = config_json.get('DB_USER')
			
 
				+DB_PASSWORD = config_json.get('DB_PASSWORD')
			
 
				+DB_IP = config_json.get('DB_IP')
			
 
				+DB_PORT = config_json.get('DB_PORT')
			
 
				+MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/'
			
 
				+MAIL_HOST = config_json.get('MAIL_HOST')
			
 
				+MAIL_USER = config_json.get('MAIL_USER')
			
 
				+MAIL_PASS = config_json.get('MAIL_PASS')
			
 
				+MAIL_SENDER = config_json.get('MAIL_SENDER')
			
 
				+MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS')
			
 
				+
			
 
				+now_day = time.strftime('%Y-%m-%d', time.localtime())
			
 
				+rss_base_url = 'http://home.erhe.link:20002/xmlfile/'
			
 
				+base_project = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME)
			
 
				+
			
 
				+
			
 
				+def string_mapping(string):
			
 
				+    mapping_dict = {
			
 
				+        '掌设': 'handheld',
			
 
				+        '汽车': 'car',
			
 
				+        '评测': 'testing',
			
 
				+        '美食': 'food',
			
 
				+        '电脑': 'pc',
			
 
				+        '视听': 'audiovisual',
			
 
				+        '腕表': 'watch',
			
 
				+        '单车': 'bicycle',
			
 
				+        '摄影': 'photograph',
			
 
				+        '家居': 'home'
			
 
				+    }
			
 
				+    for key in mapping_dict:
			
 
				+        if key in string:
			
 
				+            string = string.replace(key, mapping_dict[key])
			
 
				+
			
 
				+    return string
			
 
				+
			
 
				+
			
 
				+def handle_data(db_and_collection):
			
 
				+    db = db_and_collection['db']
			
 
				+    collection = db_and_collection['collection']
			
 
				+    title = db.replace('RSS_', '')
			
 
				+    print(f'读取 {db} -- {collection} 数据')
			
 
				+    client = pymongo.MongoClient(MONGO_LINK)
			
 
				+    _get_db = client[db]
			
 
				+    _get_collection = _get_db[collection]
			
 
				+    gen_file_name = f'{title}_{collection}_rss.xml'
			
 
				+    # 替换中英文
			
 
				+    gen_file_name = string_mapping(gen_file_name)
			
 
				+
			
 
				+    link = db_and_collection['link']
			
 
				+    description = db_and_collection['source_type']
			
 
				+    lastBuildDate = now_day
			
 
				+    items = []
			
 
				+    xml_file = os.path.join(base_project, 'news', 'rss_xmlfile')
			
 
				+    path = os.path.join(xml_file, gen_file_name)
			
 
				+    xml_url_file = os.path.join(xml_file, 'rss_url.txt')
			
 
				+
			
 
				+    # TODO
			
 
				+    # load mongodb
			
 
				+    # save to item
			
 
				+    for data in _get_collection.find():
			
 
				+        t = data.setdefault('title')
			
 
				+        d = data.setdefault('context')
			
 
				+        l = data.setdefault('source_url')
			
 
				+        item = PyRSS2Gen.RSSItem(
			
 
				+            title=t,
			
 
				+            link=l,
			
 
				+            description=d,
			
 
				+            pubDate=data.setdefault('posted_date'),
			
 
				+        )
			
 
				+        items.append(item)
			
 
				+
			
 
				+    gen2rss(gen_file_name, title, link, description, lastBuildDate, items, path)
			
 
				+
			
 
				+    with open(xml_url_file, 'a') as f:
			
 
				+        f.write(rss_base_url + gen_file_name + '\n\n')
			
 
				+
			
 
				+
			
 
				+def gen2rss(gen_file_name, title, link, description, lastBuildDate, items, path):
			
 
				+    rss = PyRSS2Gen.RSS2(
			
 
				+        title=title,
			
 
				+        link=link,
			
 
				+        description=description,
			
 
				+        lastBuildDate=lastBuildDate,
			
 
				+        items=items)
			
 
				+
			
 
				+    print('正在生成rss文件: 路径: %s, 文件名: %s' % (path, gen_file_name))
			
 
				+    rss.write_xml(open(path, "w", encoding='utf-8'), encoding='utf-8')
			
 
				+
			
 
				+
			
 
				+def run():
			
 
				+    xml_file = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME, 'news', 'rss_xmlfile')
			
 
				+    if not os.path.exists(xml_file):
			
 
				+        os.mkdir(xml_file)
			
 
				+
			
 
				+    rss_url_txt = os.path.join(xml_file, 'rss_url.txt')
			
 
				+    if os.path.exists(rss_url_txt):
			
 
				+        os.remove(rss_url_txt)
			
 
				+
			
 
				+    # 每一个列表, 0数据库名, 1爬虫目标网站url, 2目标网站名称
			
 
				+    dbs = [
			
 
				+        ['RSS_HelloGithub', 'https://hellogithub.com/', 'HelloGithub'],
			
 
				+        ['RSS_apprcn', 'https://free.apprcn.com/', '反斗限免'],
			
 
				+        ['RSS_news', 'https://www.anyknew.com/', '聚合新闻'],
			
 
				+        ['RSS_chiphell', 'https://www.chiphell.com/', 'chiphell']
			
 
				+    ]
			
 
				+
			
 
				+    db_and_collections = []
			
 
				+
			
 
				+    for db in dbs:
			
 
				+        client = pymongo.MongoClient(MONGO_LINK)
			
 
				+        if db[0] not in client.list_database_names():
			
 
				+            print(f'找不到数据库 {db[0]}')
			
 
				+            continue
			
 
				+        _get_db = client[db[0]]
			
 
				+        for collection in _get_db.list_collection_names():
			
 
				+            db_and_collections.append({
			
 
				+                'db': db[0],
			
 
				+                'collection': collection,
			
 
				+                'link': db[1],
			
 
				+                'source_type': db[2]
			
 
				+            })
			
 
				+
			
 
				+    # for d in db_and_collections:
			
 
				+    #     handle_data(d)
			
 
				+
			
 
				+    # 进程列表
			
 
				+    threads = []
			
 
				+
			
 
				+    # 使用 for 循环批量创建线程
			
 
				+    for i in db_and_collections:
			
 
				+        thread = threading.Thread(target=handle_data, args=(i,))
			
 
				+        threads.append(thread)
			
 
				+        thread.start()
			
 
				+
			
 
				+    # 等待所有线程执行完成
			
 
				+    for thread in threads:
			
 
				+        thread.join()
			
 
				+
			
 
				+
			
 
				+run()