# -*- coding: utf-8 -*- from datetime import datetime from pymongo import MongoClient import psycopg2 def mongo(): # mongodb client = MongoClient('mongodb://root:aaaAAA111!!!@192.168.31.178:38000/') # 指定数据库名称 db_name = 'NEWS' # 替换为你的数据库名称 # 选择数据库 db = client[db_name] # 列出数据库中的所有集合 collections = db.list_collection_names() all_data = [] for collection_name in collections: # 选择集合 collection = db[collection_name] # 读取集合中的所有数据 for document in collection.find({}, {'_id': 0}): all_data.append(document) sorted_data = [] if all_data: sorted_data = sorted(all_data, key=lambda x: x['create_time'], reverse=True) return sorted_data def pg(sorted_data): # 数据库连接参数 db_params = { 'dbname': 'auto', 'user': 'odoo', 'password': 'odoo', 'host': '192.168.31.178', 'port': '5432' } conn = psycopg2.connect(**db_params) for doc in sorted_data: # 如果有title,就不插入 try: cur = conn.cursor() create_time_dt = None if doc.get('create_time'): create_time_dt = datetime.utcfromtimestamp(doc['create_time']) values = { 'name': doc.get('title'), 'context': doc.get('context') or '', 'source_url': doc.get('source_url') or '', 'link': doc.get('line') or '', 'article_type': doc.get('article_type') or '', 'article_source': doc.get('article_source') or '', 'img_url': doc.get('img_url') or '', 'keyword': doc.get('keyword') or '', 'posted_date': doc.get('posted_date') or '', 'create_time_ts': doc.get('create_time') or '', 'create_time': create_time_dt, 'create_datetime': datetime.strptime(doc['create_datetime'], '%Y-%m-%d %H:%M:%S') if doc.get('create_datetime') else None } # 将create_time转换为适合数据库的时间戳格式 create_time_dt = datetime.utcfromtimestamp(values['create_time_ts']) if values['create_time_ts'] else None values['create_time'] = create_time_dt # 将create_datetime转换为适合数据库的时间戳格式 create_datetime_str = doc.get('create_datetime') values['create_datetime'] = datetime.strptime(create_datetime_str, '%Y-%m-%d %H:%M:%S') if create_datetime_str else None # 检查数据库中是否已存在相同title的记录 check_query = "SELECT id FROM news_info WHERE name = %s;" cur.execute(check_query, (values['name'],)) # 如果没有找到记录,则插入新记录 if not cur.fetchone(): insert_query = """ INSERT INTO news_info (name, context, source_url, link, article_type, article_source, img_url, keyword, posted_date, create_time, create_datetime) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ cur.execute(insert_query, ( values['name'], values['context'], values['source_url'], values['link'], values['article_type'], values['article_source'], values['img_url'], values['keyword'], values['posted_date'], values['create_time'], values['create_datetime'] )) conn.commit() print(f'已保存{values}') except Exception as e: print("Error during search: ", e) finally: # 关闭游标和连接 if 'cur' in locals(): cur.close() conn.close() sorted_data = mongo() if sorted_data: pg(sorted_data) print("Done!")