1 an în urmă · c04cae4bd2
--- a/flaticon/flaticon.py
+++ b/flaticon/flaticon.py
@@ -1,11 +1,12 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # 共两个步骤, 1, 将目标图片的信息拉到数据库(标题, 所有img的url), 2, 从数据库中读取对应目标站点的所有未下载过的img的url, 下载到本地
			
 
				 # 需要安装psql, 并且 CREATE DATABASE collect; 运行会自动建表
			
 
				+import socket
			
 
				 import sys
			
 
				 import os
			
 
				 import time
			
 
				 import random
			
 
				-
			
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				 import psycopg2
			
 
				 
			
 
				 sys.path.append(os.path.join(os.path.abspath(__file__).split('ResourceCollection')[0] + 'ResourceCollection'))
			
@@ -13,8 +14,7 @@ import httpx
 
				 from playwright.sync_api import sync_playwright
			
 
				 
			
 
				 target = 'flaticon'
			
 
				-step = 2  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2
			
 
				-remote_databases = 1
			
 
				+step = 4  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
			
 
				 local_proxy = 0
			
 
				 title_selector = '#pack-view__inner > section.pack-view__header > h1'  # 获取标题选择器
			
 
				 img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'  # 获取图片的url
			
@@ -23,7 +23,12 @@ not_find_page_selector = '#viewport > div.errorpage.e404 > h1'  # 当无法获
 
				 
			
 
				 project_root = os.path.join(os.path.abspath(__file__).split('ResourceCollection')[0] + 'ResourceCollection')
			
 
				 
			
 
				-if remote_databases:
			
 
				+#  获取局域网ip, 如果不是局域网, 则用公网连接数据库
			
 
				+s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
			
 
				+s.connect(('10.255.255.255', 1))
			
 
				+IP = s.getsockname()[0]
			
 
				+s.close()
			
 
				+if '192.168.100' not in IP:
			
 
				     psql_params = {
			
 
				         "host": "home.erhe.link",
			
 
				         "port": 55434,
			
@@ -130,11 +135,13 @@ def open_browser(target_urls):
 
				 
			
 
				 def download_img(load_data, target_file_path):
			
 
				     print('正在下载图片')
			
 
				-    for index, data in enumerate(load_data):  # 循环内是单张图片
			
 
				-        multitasking_download(index, data, load_data, target_file_path)
			
 
				+    with ThreadPoolExecutor(max_workers=4) as executor:
			
 
				+        executor.map(single_img_download,
			
 
				+                     [(index, data, load_data, target_file_path) for index, data in enumerate(load_data)])
			
 
				 
			
 
				 
			
 
				-def multitasking_download(index, data, load_data, target_file_path):
			
 
				+def single_img_download(args):
			
 
				+    index, data, load_data, target_file_path = args
			
 
				     # 连接数据库, 准备反写下载状态
			
 
				     conn = psycopg2.connect(**psql_params)
			
 
				     cursor = conn.cursor()