11 сар өмнө · 86f5707c9b
--- a/jcomic/main.py
+++ b/jcomic/main.py
@@ -1,19 +1,39 @@
 
				+'''
			
 
				+https://jcomic.net/
			
 
				+'''
			
 
				 import os
			
 
				 import time
			
 
				+import re
			
 
				 import random
			
 
				+from urllib.parse import unquote
			
 
				 import httpx
			
 
				 from bs4 import BeautifulSoup
			
 
				-
			
 
				-comico_urls = [
			
 
				-    '[PIXIV] LotteryFate (18900473)（AI）',
			
 
				-]
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+
			
 
				+comico_urls = []
			
 
				+urls_txt = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'urls.txt')
			
 
				+# 如果文件不存在，创建一个空文件
			
 
				+if not os.path.exists(urls_txt):
			
 
				+    with open(urls_txt, 'w') as f:
			
 
				+        f.write('')
			
 
				+
			
 
				+# 读取文件内容
			
 
				+with open(urls_txt, 'r', encoding='utf-8') as f:
			
 
				+    lines = f.readlines()  # 调用 readlines() 方法
			
 
				+    for line in lines:
			
 
				+        comico_urls.append(line.strip())  # 去除换行符
			
 
				+
			
 
				+if not comico_urls:
			
 
				+    exit(0)
			
 
				+else:
			
 
				+    print(f'准备下载 {comico_urls}')
			
 
				 
			
 
				 # 是否使用代理
			
 
				-use_proxy = 1
			
 
				+use_proxy = 0
			
 
				 
			
 
				 
			
 
				 def save_img(client, folder_path, img_links):
			
 
				-    for index, img_url in enumerate(img_links, start=1):
			
 
				+    def download_image(index, img_url):
			
 
				         try:
			
 
				             # 生成文件名，例如 0001.png, 0002.png
			
 
				             file_name = f"{str(index).zfill(4)}.png"
			
@@ -22,13 +42,12 @@ def save_img(client, folder_path, img_links):
 
				             # 检查文件是否已经存在
			
 
				             if os.path.exists(file_path):
			
 
				                 print(f"文件已存在，跳过下载: {file_path}")
			
 
				-                continue
			
 
				+                return
			
 
				 
			
 
				             # 发送请求获取图片内容
			
 
				             response = client.get(img_url)
			
 
				             if response.status_code != 200:
			
 
				-                raise Exception(
			
 
				-                    f"无法下载图片 {img_url}，状态码: {response.status_code}")
			
 
				+                raise Exception(f"无法下载图片 {img_url}，状态码: {response.status_code}")
			
 
				 
			
 
				             # 保存图片到本地
			
 
				             with open(file_path, 'wb') as file:
			
@@ -36,15 +55,21 @@ def save_img(client, folder_path, img_links):
 
				 
			
 
				             print(f"图片已保存: {file_path}")
			
 
				         except Exception as e:
			
 
				-            raise Exception(f"下载图片 {img_url} 时出错: {e}")
			
 
				+            print(f"下载图片 {img_url} 时出错: {e}")
			
 
				+
			
 
				+    # 使用 ThreadPoolExecutor 进行多线程下载
			
 
				+    with ThreadPoolExecutor(max_workers=10) as executor:
			
 
				+        futures = []
			
 
				+        for index, img_url in img_links.items():
			
 
				+            futures.append(executor.submit(download_image, index, img_url))
			
 
				 
			
 
				-        # random_sleep = random.uniform(2, 3)
			
 
				-        # print(f"随机休眠 {random_sleep} 秒")
			
 
				-        # time.sleep(random_sleep)
			
 
				+        # 等待所有任务完成
			
 
				+        for future in as_completed(futures):
			
 
				+            future.result()  # 获取任务结果，如果有异常会在这里抛出
			
 
				 
			
 
				 
			
 
				 def get_imgs(client, folder_path, chapter_data):
			
 
				-    img_links = []
			
 
				+    img_links = {}
			
 
				     for chapter_name, url in chapter_data.items():
			
 
				         try:
			
 
				             # 发送请求获取页面内容
			
@@ -67,23 +92,43 @@ def get_imgs(client, folder_path, chapter_data):
 
				             print(f'{chapter_name} 共 {total_images} 张图片')
			
 
				 
			
 
				             # 输出图片的 URL
			
 
				+            page = 1
			
 
				             for img in img_elements:
			
 
				                 img_url = img.get('src')
			
 
				                 if img_url:
			
 
				-                    img_links.append(img_url)
			
 
				+                    img_links[str(page).zfill(4)] = img_url
			
 
				+                    page += 1
			
 
				         except Exception as e:
			
 
				             print(f"获取图片时出错: {e}")
			
 
				             raise  # 抛出异常，触发重试逻辑
			
 
				     return img_links
			
 
				 
			
 
				 
			
 
				+def new_file_name(file_name):
			
 
				+    """
			
 
				+    将文件名中的非法字符直接消除，使其符合 Windows 文件夹命名规则。
			
 
				+    :param file_name: 原始文件名
			
 
				+    :return: 合法的文件名
			
 
				+    """
			
 
				+    # 定义 Windows 文件系统中不允许的字符
			
 
				+    illegal_chars = r'[\\/:*?"<>|]'
			
 
				+
			
 
				+    # 直接消除非法字符
			
 
				+    safe_name = re.sub(illegal_chars, '', file_name)
			
 
				+    
			
 
				+    # 去掉首尾的空格（如果有）
			
 
				+    safe_name = safe_name.strip()
			
 
				+    
			
 
				+    return safe_name
			
 
				+
			
 
				+
			
 
				 def save_urls(folder_path, img_links):
			
 
				     # 定义保存文件路径
			
 
				     save_path = os.path.join(folder_path, 'img_links.txt')
			
 
				 
			
 
				     # 将图片链接写入文件
			
 
				     with open(save_path, 'w', encoding='utf-8') as file:
			
 
				-        for link in img_links:
			
 
				+        for num, link in img_links.items():
			
 
				             file.write(link + '\n')
			
 
				 
			
 
				     print(f"图片链接已保存到: {save_path}")
			
@@ -107,7 +152,7 @@ def new_folder(page_title):
 
				         return folder_path
			
 
				 
			
 
				 
			
 
				-def get_chapter_data(client, target_url):
			
 
				+def get_chapter_data(client, target_url, base_url):
			
 
				     result = {}
			
 
				     page_title = ''
			
 
				 
			
@@ -121,12 +166,16 @@ def get_chapter_data(client, target_url):
 
				         # 获取指定选择器下的所有元素
			
 
				         elements = soup.select(
			
 
				             'body > div.container > div:nth-child(3) > div:nth-child(2) a')
			
 
				+        if elements:
			
 
				+            # 提取每个元素的 URL 和文本
			
 
				+            for element in elements:
			
 
				+                url = element.get('href')
			
 
				+                text = element.get_text()
			
 
				+                result[text] = base_url + url
			
 
				+        else:
			
 
				+            # 这里是只有第一话的情况
			
 
				+            result['第1话'] = target_url.replace('eps', 'page')
			
 
				 
			
 
				-        # 提取每个元素的 URL 和文本
			
 
				-        for element in elements:
			
 
				-            url = element.get('href')
			
 
				-            text = element.get_text()
			
 
				-            result[text] = base_url + url
			
 
				     except Exception as e:
			
 
				         print(f"获取章节数据时出错: {e}")
			
 
				         raise  # 抛出异常，触发重试逻辑
			
@@ -137,7 +186,6 @@ def get_chapter_data(client, target_url):
 
				 def main():
			
 
				     proxy_url = 'http://127.0.0.1:7890'
			
 
				     base_url = 'https://jcomic.net'
			
 
				-    herf_url = '/eps/'
			
 
				     # 自定义请求头
			
 
				     custom_headers = {
			
 
				         "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
@@ -157,8 +205,11 @@ def main():
 
				     }
			
 
				 
			
 
				     for comico_url in comico_urls:
			
 
				-        target_url = base_url + herf_url + comico_url
			
 
				-        print(target_url)
			
 
				+        # 处理url,并获取文件名
			
 
				+        file_name = new_file_name(unquote(comico_url.split('/')[-1]))
			
 
				+
			
 
				+        target_url = comico_url
			
 
				+        print(file_name)
			
 
				         # 最大重试次数
			
 
				         max_retries = 999
			
 
				         retry_count = 0
			
@@ -168,18 +219,16 @@ def main():
 
				                 # 创建 httpx.Client 实例，并设置自定义请求头
			
 
				                 with httpx.Client(proxies=proxy_url if use_proxy else None, headers=custom_headers) as client:
			
 
				                     # 1, 获取页面章节数据
			
 
				-                    chapter_data = get_chapter_data(client, target_url)
			
 
				-                    print(chapter_data)
			
 
				+                    chapter_data = get_chapter_data(client, target_url, base_url)
			
 
				 
			
 
				                     # 2, 在当前文件夹下创建一个文件夹，用来保存图片, 文件名称是 title
			
 
				-                    folder_path = new_folder(comico_url)
			
 
				+                    folder_path = new_folder(file_name)
			
 
				 
			
 
				                     # 3, 遍历章节数据，获取img的链接
			
 
				                     img_links = get_imgs(client, folder_path, chapter_data)
			
 
				-                    print(img_links)
			
 
				 
			
 
				                     # 4, 保存url到新建的文件夹中
			
 
				-                    save_urls(folder_path, img_links)
			
 
				+                    #save_urls(folder_path, img_links)
			
 
				 
			
 
				                     # 5，遍历 img_links ，将图片保存到 folder_path中， 保存的文件名类似 0001.png
			
 
				                     save_img(client, folder_path, img_links)
			
@@ -195,11 +244,10 @@ def main():
 
				                     print("已达到最大重试次数，程序终止。")
			
 
				                     break
			
 
				 
			
 
				-                # 固定延迟 10 分钟（600 秒）
			
 
				-                delay = 30
			
 
				+                delay = 5
			
 
				                 print(f"等待 {delay} 秒后重试...")
			
 
				                 time.sleep(delay)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    main()
			
 
				+    main()