|
|
@@ -21,7 +21,7 @@ class ImageCollector:
|
|
|
self.category = '' # 细分分类文件夹
|
|
|
self.step = 2 # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
|
|
|
self.local_proxy = 0
|
|
|
- self.thread_count = 1
|
|
|
+ self.thread_count = 8
|
|
|
self.title_selector = '#pack-view__inner > section.pack-view__header > h1'
|
|
|
self.img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'
|
|
|
self.img_count_selector = '#pack-view__inner > section.pack-view__header > p'
|
|
|
@@ -63,7 +63,7 @@ class ImageCollector:
|
|
|
|
|
|
if page_count == 1:
|
|
|
# 在第一页, 获取 title
|
|
|
- title = re.findall('<span class="title">([\S\s]*?)</h1>', page)
|
|
|
+ title = re.findall(r'<span class="title">([\S\s]*?)</h1>', page)
|
|
|
if title:
|
|
|
title = title[0]
|
|
|
invalid_chars = ['\n', '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.', ' ',
|