Răsfoiți Sursa

添加多线程

jack 1 an în urmă
părinte
comite
c04cae4bd2
1 a modificat fișierele cu 14 adăugiri și 7 ștergeri
  1. 14 7
      flaticon/flaticon.py

+ 14 - 7
flaticon/flaticon.py

@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
 # 共两个步骤, 1, 将目标图片的信息拉到数据库(标题, 所有img的url), 2, 从数据库中读取对应目标站点的所有未下载过的img的url, 下载到本地
 # 需要安装psql, 并且 CREATE DATABASE collect; 运行会自动建表
+import socket
 import sys
 import os
 import time
 import random
-
+from concurrent.futures import ThreadPoolExecutor
 import psycopg2
 
 sys.path.append(os.path.join(os.path.abspath(__file__).split('ResourceCollection')[0] + 'ResourceCollection'))
@@ -13,8 +14,7 @@ import httpx
 from playwright.sync_api import sync_playwright
 
 target = 'flaticon'
-step = 2  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2
-remote_databases = 1
+step = 4  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
 local_proxy = 0
 title_selector = '#pack-view__inner > section.pack-view__header > h1'  # 获取标题选择器
 img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'  # 获取图片的url
@@ -23,7 +23,12 @@ not_find_page_selector = '#viewport > div.errorpage.e404 > h1'  # 当无法获
 
 project_root = os.path.join(os.path.abspath(__file__).split('ResourceCollection')[0] + 'ResourceCollection')
 
-if remote_databases:
+#  获取局域网ip, 如果不是局域网, 则用公网连接数据库
+s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+s.connect(('10.255.255.255', 1))
+IP = s.getsockname()[0]
+s.close()
+if '192.168.100' not in IP:
     psql_params = {
         "host": "home.erhe.link",
         "port": 55434,
@@ -130,11 +135,13 @@ def open_browser(target_urls):
 
 def download_img(load_data, target_file_path):
     print('正在下载图片')
-    for index, data in enumerate(load_data):  # 循环内是单张图片
-        multitasking_download(index, data, load_data, target_file_path)
+    with ThreadPoolExecutor(max_workers=4) as executor:
+        executor.map(single_img_download,
+                     [(index, data, load_data, target_file_path) for index, data in enumerate(load_data)])
 
 
-def multitasking_download(index, data, load_data, target_file_path):
+def single_img_download(args):
+    index, data, load_data, target_file_path = args
     # 连接数据库, 准备反写下载状态
     conn = psycopg2.connect(**psql_params)
     cursor = conn.cursor()