|
|
@@ -19,9 +19,9 @@ class ImageCollector:
|
|
|
def __init__(self):
|
|
|
self.target = 'flaticon' # 整体目标文件夹/psql表名
|
|
|
self.category = '' # 细分分类文件夹
|
|
|
- self.step = 4 # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
|
|
|
+ self.step = 2 # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
|
|
|
self.local_proxy = 0
|
|
|
- self.thread_count = 8
|
|
|
+ self.thread_count = 1
|
|
|
self.title_selector = '#pack-view__inner > section.pack-view__header > h1'
|
|
|
self.img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'
|
|
|
self.img_count_selector = '#pack-view__inner > section.pack-view__header > p'
|
|
|
@@ -265,18 +265,18 @@ class ImageCollector:
|
|
|
cursor.execute(f"SELECT img_url FROM {self.target} WHERE img_url = %s", (data['url'],))
|
|
|
if cursor.fetchone() is None:
|
|
|
cursor.execute(f"""
|
|
|
- INSERT INTO {self.target} (name, target_site, file_title, set_name, category, serial, download_state, image_suffix, img_url)
|
|
|
+ INSERT INTO {self.target} (name, target_site, file_title, set_name, serial, download_state, image_suffix, img_url, category)
|
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
|
""", (
|
|
|
None,
|
|
|
self.target,
|
|
|
data['file_title'],
|
|
|
None,
|
|
|
- None,
|
|
|
data['serial'],
|
|
|
False,
|
|
|
data['suffix'],
|
|
|
- data['url']
|
|
|
+ data['url'],
|
|
|
+ None,
|
|
|
))
|
|
|
conn.commit()
|
|
|
cursor.close()
|
|
|
@@ -301,11 +301,11 @@ class ImageCollector:
|
|
|
'target_site': row[2],
|
|
|
'file_title': row[3],
|
|
|
'set_name': row[4],
|
|
|
- 'category': row[5],
|
|
|
- 'serial': row[6],
|
|
|
- 'download_state': row[7],
|
|
|
- 'image_suffix': row[8],
|
|
|
- 'img_url': row[9]
|
|
|
+ 'serial': row[5],
|
|
|
+ 'download_state': row[6],
|
|
|
+ 'image_suffix': row[7],
|
|
|
+ 'img_url': row[8],
|
|
|
+ 'category': row[9]
|
|
|
}
|
|
|
)
|
|
|
except psycopg2.Error as e:
|
|
|
@@ -342,11 +342,11 @@ class ImageCollector:
|
|
|
target_site VARCHAR(255),
|
|
|
file_title VARCHAR(255),
|
|
|
set_name VARCHAR(255),
|
|
|
- category VARCHAR(255),
|
|
|
serial INT,
|
|
|
download_state BOOLEAN,
|
|
|
image_suffix VARCHAR(50),
|
|
|
- img_url TEXT
|
|
|
+ img_url TEXT,
|
|
|
+ category VARCHAR(255)
|
|
|
);
|
|
|
""")
|
|
|
print(f"表 '{self.target}' 创建成功。")
|