|
@@ -129,61 +129,67 @@ def open_browser(target_urls):
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_img(load_data, target_file_path):
|
|
def download_img(load_data, target_file_path):
|
|
|
|
|
+ print('正在下载图片')
|
|
|
|
|
+ for index, data in enumerate(load_data): # 循环内是单张图片
|
|
|
|
|
+ multitasking_download(index, data, load_data, target_file_path)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def multitasking_download(index, data, load_data, target_file_path):
|
|
|
# 连接数据库, 准备反写下载状态
|
|
# 连接数据库, 准备反写下载状态
|
|
|
conn = psycopg2.connect(**psql_params)
|
|
conn = psycopg2.connect(**psql_params)
|
|
|
cursor = conn.cursor()
|
|
cursor = conn.cursor()
|
|
|
|
|
|
|
|
- print('正在下载图片')
|
|
|
|
|
- for index, data in enumerate(load_data):
|
|
|
|
|
- # 如果img文件存在, 即已经下载过, 直接跳过
|
|
|
|
|
- id = data['id']
|
|
|
|
|
- name = data['name']
|
|
|
|
|
- target_site = data['target_site'],
|
|
|
|
|
- file_title = data['file_title'].replace(' ', '_')
|
|
|
|
|
- set_name = data['set_name']
|
|
|
|
|
- serial = str(data['serial']).zfill(3)
|
|
|
|
|
- image_suffix = data['image_suffix']
|
|
|
|
|
- img_url = data['img_url']
|
|
|
|
|
-
|
|
|
|
|
- # 查看每个合集的文件夹是否存在, 不存在就创建
|
|
|
|
|
- title_file_path = os.path.join(target_file_path, file_title)
|
|
|
|
|
- if not os.path.exists(title_file_path):
|
|
|
|
|
- os.mkdir(title_file_path)
|
|
|
|
|
-
|
|
|
|
|
- img_name = f'{file_title}_{serial}.{image_suffix}' # 图片文件名
|
|
|
|
|
- img_file_path = os.path.join(str(title_file_path), img_name) # 图片完整路径
|
|
|
|
|
-
|
|
|
|
|
- if os.path.exists(img_file_path):
|
|
|
|
|
- # 当此 img 已存在本地时, 在 psql 将数据库状态改为已下载
|
|
|
|
|
|
|
+ # 如果img文件存在, 即已经下载过, 直接跳过
|
|
|
|
|
+ id = data['id']
|
|
|
|
|
+ name = data['name']
|
|
|
|
|
+ target_site = data['target_site'],
|
|
|
|
|
+ file_title = data['file_title'].replace(' ', '_')
|
|
|
|
|
+ set_name = data['set_name']
|
|
|
|
|
+ serial = str(data['serial']).zfill(3)
|
|
|
|
|
+ image_suffix = data['image_suffix']
|
|
|
|
|
+ img_url = data['img_url']
|
|
|
|
|
+
|
|
|
|
|
+ # 查看每个合集的文件夹是否存在, 不存在就创建
|
|
|
|
|
+ title_file_path = os.path.join(target_file_path, file_title)
|
|
|
|
|
+ if not os.path.exists(title_file_path):
|
|
|
|
|
+ os.mkdir(title_file_path)
|
|
|
|
|
+
|
|
|
|
|
+ img_name = f'{file_title}_{serial}.{image_suffix}' # 图片文件名
|
|
|
|
|
+ img_file_path = os.path.join(str(title_file_path), img_name) # 图片完整路径
|
|
|
|
|
+
|
|
|
|
|
+ if os.path.exists(img_file_path):
|
|
|
|
|
+ # 当此 img 已存在本地时, 在 psql 将数据库状态改为已下载
|
|
|
|
|
+ query = f"UPDATE {target} SET download_state = %s WHERE id = %s"
|
|
|
|
|
+ cursor.execute(query, (True, id))
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ print(f'图片 {img_file_path} 已存在。继续!')
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ retry = 8
|
|
|
|
|
+ while retry:
|
|
|
|
|
+ try:
|
|
|
|
|
+ resp = httpx.get(img_url, headers={
|
|
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
|
|
|
|
+ })
|
|
|
|
|
+ with open(img_file_path, 'wb') as f:
|
|
|
|
|
+ f.write(resp.content)
|
|
|
|
|
+
|
|
|
|
|
+ # 下载成功后, 在 psql 将数据库状态改为已下载
|
|
|
query = f"UPDATE {target} SET download_state = %s WHERE id = %s"
|
|
query = f"UPDATE {target} SET download_state = %s WHERE id = %s"
|
|
|
cursor.execute(query, (True, id))
|
|
cursor.execute(query, (True, id))
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
- print(f'图片 {img_file_path} 已存在。继续!')
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- retry = 8
|
|
|
|
|
- while retry:
|
|
|
|
|
- try:
|
|
|
|
|
- resp = httpx.get(img_url, headers={
|
|
|
|
|
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
|
|
|
|
- })
|
|
|
|
|
- with open(img_file_path, 'wb') as f:
|
|
|
|
|
- f.write(resp.content)
|
|
|
|
|
-
|
|
|
|
|
- # 下载成功后, 在 psql 将数据库状态改为已下载
|
|
|
|
|
- query = f"UPDATE {target} SET download_state = %s WHERE id = %s"
|
|
|
|
|
- cursor.execute(query, (True, id))
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
|
|
|
- # 算一下进度
|
|
|
|
|
- rate = index / len(load_data) * 100
|
|
|
|
|
- print(f'已下载:{img_name}, 当前第 {index+1} 个, 共 {len(load_data)} 个, 已下载 {rate:.2f}%')
|
|
|
|
|
- time.sleep(random.uniform(1, 2))
|
|
|
|
|
- break
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f'下载图片失败:{img_name}。错误:{e} 重试: {retry}')
|
|
|
|
|
- retry -= 1
|
|
|
|
|
- time.sleep(random.uniform(3, 5))
|
|
|
|
|
|
|
+ # 算一下进度
|
|
|
|
|
+ rate = index / len(load_data) * 100
|
|
|
|
|
+ print(f'已下载:{img_name}, 当前第 {index + 1} 个, 共 {len(load_data)} 个, 已下载 {rate:.2f}%')
|
|
|
|
|
+ time.sleep(random.uniform(1, 2))
|
|
|
|
|
+ break
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f'下载图片失败:{img_name}。错误:{e} 重试: {retry}')
|
|
|
|
|
+ retry -= 1
|
|
|
|
|
+ time.sleep(random.uniform(3, 5))
|
|
|
|
|
+
|
|
|
|
|
+ conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_data(data_item):
|
|
def save_data(data_item):
|