| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- # -*- coding: utf-8 -*-
- from datetime import datetime
- from pymongo import MongoClient
- import psycopg2
- def mongo():
- # mongodb
- client = MongoClient('mongodb://root:aaaAAA111!!!@192.168.31.178:38000/')
- # 指定数据库名称
- db_name = 'NEWS' # 替换为你的数据库名称
- # 选择数据库
- db = client[db_name]
- # 列出数据库中的所有集合
- collections = db.list_collection_names()
- all_data = []
- for collection_name in collections:
- # 选择集合
- collection = db[collection_name]
- # 读取集合中的所有数据
- for document in collection.find({}, {'_id': 0}):
- all_data.append(document)
- sorted_data = []
- if all_data:
- sorted_data = sorted(all_data, key=lambda x: x['create_time'], reverse=True)
- return sorted_data
- def pg(sorted_data):
- # 数据库连接参数
- db_params = {
- 'dbname': 'auto',
- 'user': 'odoo',
- 'password': 'odoo',
- 'host': '192.168.31.178',
- 'port': '5432'
- }
- conn = psycopg2.connect(**db_params)
- for doc in sorted_data:
- # 如果有title,就不插入
- try:
- cur = conn.cursor()
- create_time_dt = None
- if doc.get('create_time'):
- create_time_dt = datetime.utcfromtimestamp(doc['create_time'])
- values = {
- 'name': doc.get('title'),
- 'context': doc.get('context') or '',
- 'source_url': doc.get('source_url') or '',
- 'link': doc.get('line') or '',
- 'article_type': doc.get('article_type') or '',
- 'article_source': doc.get('article_source') or '',
- 'img_url': doc.get('img_url') or '',
- 'keyword': doc.get('keyword') or '',
- 'posted_date': doc.get('posted_date') or '',
- 'create_time_ts': doc.get('create_time') or '',
- 'create_time': create_time_dt,
- 'create_datetime': datetime.strptime(doc['create_datetime'], '%Y-%m-%d %H:%M:%S') if doc.get('create_datetime') else None
- }
- # 将create_time转换为适合数据库的时间戳格式
- create_time_dt = datetime.utcfromtimestamp(values['create_time_ts']) if values['create_time_ts'] else None
- values['create_time'] = create_time_dt
- # 将create_datetime转换为适合数据库的时间戳格式
- create_datetime_str = doc.get('create_datetime')
- values['create_datetime'] = datetime.strptime(create_datetime_str, '%Y-%m-%d %H:%M:%S') if create_datetime_str else None
- # 检查数据库中是否已存在相同title的记录
- check_query = "SELECT id FROM news_info WHERE name = %s;"
- cur.execute(check_query, (values['name'],))
- # 如果没有找到记录,则插入新记录
- if not cur.fetchone():
- insert_query = """
- INSERT INTO news_info (name, context, source_url, link, article_type, article_source, img_url, keyword, posted_date, create_time, create_datetime)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
- """
- cur.execute(insert_query, (
- values['name'],
- values['context'],
- values['source_url'],
- values['link'],
- values['article_type'],
- values['article_source'],
- values['img_url'],
- values['keyword'],
- values['posted_date'],
- values['create_time'],
- values['create_datetime']
- ))
- conn.commit()
- print(f'已保存{values}')
- except Exception as e:
- print("Error during search: ", e)
- finally:
- # 关闭游标和连接
- if 'cur' in locals():
- cur.close()
- conn.close()
- sorted_data = mongo()
- if sorted_data:
- pg(sorted_data)
- print("Done!")
|