tools_sync_psql.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. from pymongo import MongoClient
  4. import psycopg2
  5. def mongo():
  6. # mongodb
  7. client = MongoClient('mongodb://root:aaaAAA111!!!@192.168.31.178:38000/')
  8. # 指定数据库名称
  9. db_name = 'NEWS' # 替换为你的数据库名称
  10. # 选择数据库
  11. db = client[db_name]
  12. # 列出数据库中的所有集合
  13. collections = db.list_collection_names()
  14. all_data = []
  15. for collection_name in collections:
  16. # 选择集合
  17. collection = db[collection_name]
  18. # 读取集合中的所有数据
  19. for document in collection.find({}, {'_id': 0}):
  20. all_data.append(document)
  21. sorted_data = []
  22. if all_data:
  23. sorted_data = sorted(all_data, key=lambda x: x['create_time'], reverse=True)
  24. return sorted_data
  25. def pg(sorted_data):
  26. # 数据库连接参数
  27. db_params = {
  28. 'dbname': 'auto',
  29. 'user': 'odoo',
  30. 'password': 'odoo',
  31. 'host': '192.168.31.178',
  32. 'port': '5432'
  33. }
  34. conn = psycopg2.connect(**db_params)
  35. for doc in sorted_data:
  36. # 如果有title,就不插入
  37. try:
  38. cur = conn.cursor()
  39. create_time_dt = None
  40. if doc.get('create_time'):
  41. create_time_dt = datetime.utcfromtimestamp(doc['create_time'])
  42. values = {
  43. 'name': doc.get('title'),
  44. 'context': doc.get('context') or '',
  45. 'source_url': doc.get('source_url') or '',
  46. 'link': doc.get('line') or '',
  47. 'article_type': doc.get('article_type') or '',
  48. 'article_source': doc.get('article_source') or '',
  49. 'img_url': doc.get('img_url') or '',
  50. 'keyword': doc.get('keyword') or '',
  51. 'posted_date': doc.get('posted_date') or '',
  52. 'create_time_ts': doc.get('create_time') or '',
  53. 'create_time': create_time_dt,
  54. 'create_datetime': datetime.strptime(doc['create_datetime'], '%Y-%m-%d %H:%M:%S') if doc.get('create_datetime') else None
  55. }
  56. # 将create_time转换为适合数据库的时间戳格式
  57. create_time_dt = datetime.utcfromtimestamp(values['create_time_ts']) if values['create_time_ts'] else None
  58. values['create_time'] = create_time_dt
  59. # 将create_datetime转换为适合数据库的时间戳格式
  60. create_datetime_str = doc.get('create_datetime')
  61. values['create_datetime'] = datetime.strptime(create_datetime_str, '%Y-%m-%d %H:%M:%S') if create_datetime_str else None
  62. # 检查数据库中是否已存在相同title的记录
  63. check_query = "SELECT id FROM news_info WHERE name = %s;"
  64. cur.execute(check_query, (values['name'],))
  65. # 如果没有找到记录,则插入新记录
  66. if not cur.fetchone():
  67. insert_query = """
  68. INSERT INTO news_info (name, context, source_url, link, article_type, article_source, img_url, keyword, posted_date, create_time, create_datetime)
  69. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  70. """
  71. cur.execute(insert_query, (
  72. values['name'],
  73. values['context'],
  74. values['source_url'],
  75. values['link'],
  76. values['article_type'],
  77. values['article_source'],
  78. values['img_url'],
  79. values['keyword'],
  80. values['posted_date'],
  81. values['create_time'],
  82. values['create_datetime']
  83. ))
  84. conn.commit()
  85. print(f'已保存{values}')
  86. except Exception as e:
  87. print("Error during search: ", e)
  88. finally:
  89. # 关闭游标和连接
  90. if 'cur' in locals():
  91. cur.close()
  92. conn.close()
  93. sorted_data = mongo()
  94. if sorted_data:
  95. pg(sorted_data)
  96. print("Done!")