há 3 meses atrás · 7bfeb90f5a
--- a/failed_downloads.json
+++ b/failed_downloads.json
@@ -0,0 +1,6 @@
 
				+[
			
 
				+  {
			
 
				+    "img_path": "data/downloads/[Pixiv]玲殿下(81002566)2025.09.23-E-HentaiGalleries/0016",
			
 
				+    "img_url": "https://e-hentai.org/s/e3f2a8c9a8/3550066-16"
			
 
				+  }
			
 
				+]
			
--- a/main.py
+++ b/main.py
@@ -4,9 +4,32 @@ from fastapi.templating import Jinja2Templates
 
				 from fastapi.responses import JSONResponse
			
 
				 import uvicorn
			
 
				 import os
			
 
				+from pydantic import BaseModel
			
 
				+from utils import *
			
 
				 
			
 
				 app = FastAPI(title="下载工具", version="1.0.0")
			
 
				 
			
 
				+# 在应用启动时检查并创建data文件夹和targets.txt
			
 
				+@app.on_event("startup")
			
 
				+async def startup_event():
			
 
				+    # 检查并创建data文件夹
			
 
				+    data_dir = "data"
			
 
				+    if not os.path.exists(data_dir):
			
 
				+        os.makedirs(data_dir)
			
 
				+        print(f"创建目录: {data_dir}")
			
 
				+    
			
 
				+    # 检查并创建targets.txt文件
			
 
				+    targets_file = os.path.join(data_dir, "targets.txt")
			
 
				+    if not os.path.exists(targets_file):
			
 
				+        with open(targets_file, 'w', encoding='utf-8') as f:
			
 
				+            f.write("# 在这里添加目标URL，每行一个\n")
			
 
				+            f.write("# 示例:\n")
			
 
				+            f.write("# https://example.com/file1.zip\n")
			
 
				+            f.write("# https://example.com/image.jpg\n")
			
 
				+        print(f"创建文件: {targets_file}")
			
 
				+    else:
			
 
				+        print(f"文件已存在: {targets_file}")
			
 
				+
			
 
				 # 挂载静态文件和模板
			
 
				 app.mount("/static", StaticFiles(directory="static"), name="static")
			
 
				 templates = Jinja2Templates(directory="templates")
			
@@ -22,22 +45,6 @@ async def load_urls():
 
				     try:
			
 
				         file_path = "data/targets.txt"
			
 
				         
			
 
				-        # 检查文件是否存在
			
 
				-        if not os.path.exists('data'):
			
 
				-            os.mkdir('data')
			
 
				-            return JSONResponse({
			
 
				-                "success": False,
			
 
				-                "message": f"文件 {file_path} 不存在",
			
 
				-                "urls": []
			
 
				-            })
			
 
				-        # 检查导入url配置是否存在
			
 
				-        if not os.path.exists(file_path):
			
 
				-            return JSONResponse({
			
 
				-                "success": False,
			
 
				-                "message": f"文件 {file_path} 不存在",
			
 
				-                "urls": []
			
 
				-            })
			
 
				-        
			
 
				         # 读取文件内容
			
 
				         with open(file_path, 'r', encoding='utf-8') as f:
			
 
				             urls = [line.strip() for line in f.readlines() if line.strip()]
			
@@ -45,6 +52,13 @@ async def load_urls():
 
				         # 过滤掉空行和注释行（以#开头的行）
			
 
				         urls = [url for url in urls if url and not url.startswith('#')]
			
 
				         
			
 
				+        if not urls:
			
 
				+            return JSONResponse({
			
 
				+                "success": True,
			
 
				+                "message": "targets.txt 文件为空，请在data/targets.txt中添加URL",
			
 
				+                "urls": []
			
 
				+            })
			
 
				+        
			
 
				         return JSONResponse({
			
 
				             "success": True,
			
 
				             "message": f"成功读取 {len(urls)} 个URL",
			
@@ -67,5 +81,21 @@ async def clear_output():
 
				         "output": ""
			
 
				     })
			
 
				 
			
 
				+class ProxyRequest(BaseModel):
			
 
				+    ip: str
			
 
				+    port: str
			
 
				+
			
 
				+@app.post("/download_urls")
			
 
				+async def download_urls(req: ProxyRequest):
			
 
				+    proxy = f"http://{req.ip}:{req.port}"
			
 
				+    msg = await run_step1(proxy)
			
 
				+    return JSONResponse({"success": True, "message": msg})
			
 
				+
			
 
				+@app.post("/download_images")
			
 
				+async def download_images(req: ProxyRequest):
			
 
				+    proxy = f"http://{req.ip}:{req.port}"
			
 
				+    msg = await run_step2(proxy)
			
 
				+    return JSONResponse({"success": True, "message": msg})
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
			
--- a/static/script.js
+++ b/static/script.js
@@ -19,12 +19,12 @@ class DownloadTool {
 
				         
			
 
				         // 下载URL按钮
			
 
				         this.downloadUrlBtn.addEventListener('click', () => {
			
 
				-            this.showOutput('下载URL被点击', 'success');
			
 
				+            this.downloadUrls()
			
 
				         });
			
 
				         
			
 
				         // 下载图片按钮
			
 
				         this.downloadImageBtn.addEventListener('click', () => {
			
 
				-            this.showOutput('下载IMG被点击', 'success');
			
 
				+            this.downloadImages()
			
 
				         });
			
 
				         
			
 
				         // 清除输出按钮
			
@@ -71,6 +71,34 @@ class DownloadTool {
 
				         }
			
 
				     }
			
 
				     
			
 
				+    async downloadUrls() {
			
 
				+        const ip = document.getElementById('ip').value;
			
 
				+        const port = document.getElementById('port').value;
			
 
				+    
			
 
				+        this.showOutput('正在抓取画廊链接...', 'info');
			
 
				+        const res = await fetch('/download_urls', {
			
 
				+            method: 'POST',
			
 
				+            headers: { 'Content-Type': 'application/json' },
			
 
				+            body: JSON.stringify({ ip, port })
			
 
				+        });
			
 
				+        const data = await res.json();
			
 
				+        this.showOutput(data.message, data.success ? 'success' : 'error');
			
 
				+    }
			
 
				+
			
 
				+    async downloadImages() {
			
 
				+        const ip = document.getElementById('ip').value;
			
 
				+        const port = document.getElementById('port').value;
			
 
				+    
			
 
				+        this.showOutput('正在下载图片...', 'info');
			
 
				+        const res = await fetch('/download_images', {
			
 
				+            method: 'POST',
			
 
				+            headers: { 'Content-Type': 'application/json' },
			
 
				+            body: JSON.stringify({ ip, port })
			
 
				+        });
			
 
				+        const data = await res.json();
			
 
				+        this.showOutput(data.message, data.success ? 'success' : 'error');
			
 
				+    }
			
 
				+
			
 
				     showOutput(message, type = '') {
			
 
				         this.output.textContent = message;
			
 
				         this.output.className = 'output-area';
			
--- a/step1.py
+++ b/step1.py
@@ -0,0 +1,193 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+异步批量抓取 E-H 画廊图片链接，按专辑保存 json
			
 
				+python eh_crawler.py
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import asyncio
			
 
				+import json
			
 
				+import logging
			
 
				+import re
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional, Tuple
			
 
				+
			
 
				+import aiofiles
			
 
				+import httpx
			
 
				+from bs4 import BeautifulSoup
			
 
				+from tqdm.asyncio import tqdm_asyncio
			
 
				+from aiopath import AsyncPath
			
 
				+
			
 
				+# -------------------- 可配置常量 --------------------
			
 
				+CONCURRENCY = 20                 # 并发页数
			
 
				+MAX_PAGE = 100                   # 单专辑最大翻页
			
 
				+RETRY_PER_PAGE = 5               # 单页重试
			
 
				+TIMEOUT = httpx.Timeout(10.0)    # 请求超时
			
 
				+IMG_SELECTOR = "#gdt"            # 图片入口区域
			
 
				+FAILED_RECORD = "failed_keys.json"
			
 
				+LOG_LEVEL = logging.INFO
			
 
				+# ----------------------------------------------------
			
 
				+
			
 
				+logging.basicConfig(
			
 
				+    level=LOG_LEVEL,
			
 
				+    format="[%(asctime)s] [%(levelname)s] %(message)s",
			
 
				+    handlers=[
			
 
				+        logging.StreamHandler(sys.stdout),
			
 
				+        logging.FileHandler("crawl.log", encoding="utf-8"),
			
 
				+    ],
			
 
				+)
			
 
				+log = logging.getLogger("eh_crawler")
			
 
				+
			
 
				+# 预编译正则
			
 
				+ILLEGAL_CHARS = re.compile(r'[<>:"/\\|?*\x00-\x1F]')
			
 
				+
			
 
				+
			
 
				+# -------------------- 工具函数 --------------------
			
 
				+def clean_folder_name(title: str) -> str:
			
 
				+    """清洗文件夹名"""
			
 
				+    return ILLEGAL_CHARS.sub("_", title).replace(" ", "").replace("_", "").strip() or "gallery"
			
 
				+
			
 
				+
			
 
				+def load_targets() -> List[str]:
			
 
				+    """读取 targets.txt"""
			
 
				+    tgt = Path("data/targets.txt")
			
 
				+    if not tgt.exists():
			
 
				+        log.error("targets.txt 不存在，已自动创建，请先填写 URL")
			
 
				+        tgt.touch()
			
 
				+        sys.exit(0)
			
 
				+    lines = [ln.strip() for ln in tgt.read_text(encoding="utf-8").splitlines() if ln.strip()]
			
 
				+    if not lines:
			
 
				+        log.error("targets.txt 为空，请先填写 URL")
			
 
				+        sys.exit(0)
			
 
				+    return list(set(lines))  # 去重
			
 
				+
			
 
				+
			
 
				+def load_failed() -> List[str]:
			
 
				+    if Path(FAILED_RECORD).exists():
			
 
				+        try:
			
 
				+            return json.loads(Path(FAILED_RECORD).read_text(encoding="utf-8"))
			
 
				+        except Exception as exc:
			
 
				+            log.warning(f"加载失败记录失败 -> {exc}")
			
 
				+    return []
			
 
				+
			
 
				+
			
 
				+def save_failed(keys: List[str]) -> None:
			
 
				+    Path(FAILED_RECORD).write_text(json.dumps(keys, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+
			
 
				+
			
 
				+# -------------------- 爬虫核心 --------------------
			
 
				+async def fetch_page(client: httpx.AsyncClient, url: str) -> Optional[str]:
			
 
				+    """获取单页 HTML"""
			
 
				+    for attempt in range(1, RETRY_PER_PAGE + 1):
			
 
				+        try:
			
 
				+            resp = await client.get(url)
			
 
				+            resp.raise_for_status()
			
 
				+            return resp.text
			
 
				+        except httpx.HTTPError as exc:
			
 
				+            log.error(f"[{attempt}/{RETRY_PER_PAGE}] 请求失败 {url} -> {exc}")
			
 
				+            await asyncio.sleep(2 ** attempt)
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+async def crawl_single_gallery(
			
 
				+    client: httpx.AsyncClient, sem: asyncio.Semaphore, gallery_url: str
			
 
				+) -> bool:
			
 
				+    """抓取单个画廊，成功返回 True"""
			
 
				+    async with sem:
			
 
				+        base_url = gallery_url.rstrip("/")
			
 
				+        key = base_url.split("/")[-1]  # 用最后一截当 key
			
 
				+        json_name = f"{key}.json"
			
 
				+
			
 
				+        folder_path: Optional[AsyncPath] = None
			
 
				+        json_data: Dict[str, str] = {}
			
 
				+        img_count = 1
			
 
				+        last_page = False
			
 
				+
			
 
				+        for page in range(MAX_PAGE):
			
 
				+            if last_page:
			
 
				+                break
			
 
				+            url = f"{base_url}?p={page}"
			
 
				+            html = await fetch_page(client, url)
			
 
				+            if html is None:
			
 
				+                continue
			
 
				+
			
 
				+            soup = BeautifulSoup(html, "lxml")
			
 
				+            title = soup.title.string if soup.title else "gallery"
			
 
				+            clean_title = clean_folder_name(title)
			
 
				+            folder_path = AsyncPath("data/downloads") / clean_title
			
 
				+            await folder_path.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            # 如果 json 已存在则跳过整个画廊
			
 
				+            json_path = folder_path / json_name
			
 
				+            if await json_path.exists():
			
 
				+                log.info(f"{json_name} 已存在，跳过")
			
 
				+                return True
			
 
				+
			
 
				+            log.info(f"当前页码：{page + 1}  {url}")
			
 
				+
			
 
				+            selected = soup.select_one(IMG_SELECTOR)
			
 
				+            if not selected:
			
 
				+                log.warning(f"未找到选择器 {IMG_SELECTOR}")
			
 
				+                continue
			
 
				+
			
 
				+            links = re.findall(r'<a href="(.*?)"', selected.prettify())
			
 
				+            if not links:
			
 
				+                log.info("本页无图片入口，视为最后一页")
			
 
				+                last_page = True
			
 
				+                continue
			
 
				+
			
 
				+            for img_entry in links:
			
 
				+                if img_entry in json_data.values():
			
 
				+                    last_page = True
			
 
				+                    break
			
 
				+                json_data[f"{img_count:04d}"] = img_entry
			
 
				+                img_count += 1
			
 
				+
			
 
				+        if json_data:
			
 
				+            await json_path.write_text(
			
 
				+                json.dumps(json_data, ensure_ascii=False, indent=2), encoding="utf-8"
			
 
				+            )
			
 
				+            log.info(f"保存成功 -> {json_path}  ({len(json_data)} 张)")
			
 
				+            return True
			
 
				+        else:
			
 
				+            log.warning(f"{key} 未解析到任何图片链接")
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+# -------------------- 主流程 --------------------
			
 
				+async def main(proxy: str | None = None) -> None:
			
 
				+    targets = load_targets()
			
 
				+    failed = load_failed()
			
 
				+    if failed:
			
 
				+        log.info(f"优先重试上次失败画廊: {len(failed)} 个")
			
 
				+    all_urls = list(set(targets + failed))
			
 
				+
			
 
				+    print(proxy)
			
 
				+    limits = httpx.Limits(max_keepalive_connections=20, max_connections=50)
			
 
				+    async with httpx.AsyncClient(
			
 
				+        limits=limits, timeout=TIMEOUT, proxies=proxy, verify=True
			
 
				+    ) as client:
			
 
				+        sem = asyncio.Semaphore(CONCURRENCY)
			
 
				+        results = await tqdm_asyncio.gather(
			
 
				+            *[crawl_single_gallery(client, sem, u) for u in all_urls],
			
 
				+            desc="Galleries",
			
 
				+            total=len(all_urls),
			
 
				+        )
			
 
				+
			
 
				+    # 失败持久化
			
 
				+    new_failed = [u for u, ok in zip(all_urls, results) if not ok]
			
 
				+    if new_failed:
			
 
				+        save_failed(new_failed)
			
 
				+        log.warning(f"本轮仍有 {len(new_failed)} 个画廊失败，已写入 {FAILED_RECORD}")
			
 
				+    else:
			
 
				+        Path(FAILED_RECORD).unlink(missing_ok=True)
			
 
				+        log.info("全部画廊抓取完成！")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    try:
			
 
				+        asyncio.run(main())
			
 
				+    except KeyboardInterrupt:
			
 
				+        log.info("用户中断，抓取结束")
			
--- a/step2.py
+++ b/step2.py
@@ -0,0 +1,180 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+异步批量下载 EH 画廊真实图片
			
 
				+python download_images.py
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import asyncio
			
 
				+import json
			
 
				+import logging
			
 
				+import re
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional
			
 
				+
			
 
				+import aiofiles
			
 
				+import httpx
			
 
				+from aiopath import AsyncPath
			
 
				+from tqdm.asyncio import tqdm_asyncio
			
 
				+
			
 
				+# -------------------- 可配置常量 --------------------
			
 
				+CONCURRENCY = 20                 # 并发下载数
			
 
				+RETRY_PER_IMG = 3                # 单图重试
			
 
				+TIMEOUT = httpx.Timeout(15.0)    # 请求超时
			
 
				+FAILED_RECORD = "failed_downloads.json"
			
 
				+LOG_LEVEL = logging.INFO
			
 
				+# ----------------------------------------------------
			
 
				+
			
 
				+logging.basicConfig(
			
 
				+    level=LOG_LEVEL,
			
 
				+    format="[%(asctime)s] [%(levelname)s] %(message)s",
			
 
				+    handlers=[
			
 
				+        logging.StreamHandler(sys.stdout),
			
 
				+        logging.FileHandler("download.log", encoding="utf-8"),
			
 
				+    ],
			
 
				+)
			
 
				+log = logging.getLogger("img_downloader")
			
 
				+
			
 
				+# 预编译正则
			
 
				+IMG_URL_RE = re.compile(r'<img id="img" src="(.*?)"', re.S)
			
 
				+EXT_RE = re.compile(r"\.(jpg|jpeg|png|gif|webp)$", re.I)
			
 
				+
			
 
				+
			
 
				+# -------------------- 工具函数 --------------------
			
 
				+def load_failed() -> List[Dict[str, str]]:
			
 
				+    if Path(FAILED_RECORD).exists():
			
 
				+        try:
			
 
				+            return json.loads(Path(FAILED_RECORD).read_text(encoding="utf-8"))
			
 
				+        except Exception as exc:
			
 
				+            log.warning(f"加载失败记录失败 -> {exc}")
			
 
				+    return []
			
 
				+
			
 
				+
			
 
				+def save_failed(failed: List[Dict[str, str]]) -> None:
			
 
				+    Path(FAILED_RECORD).write_text(json.dumps(failed, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+
			
 
				+
			
 
				+# -------------------- 下载核心 --------------------
			
 
				+async def download_one(
			
 
				+    client: httpx.AsyncClient, sem: asyncio.Semaphore, item: Dict[str, str]
			
 
				+) -> bool:
			
 
				+    """下载单张图，成功返回 True"""
			
 
				+    img_path, img_url = Path(item["img_path"]), item["img_url"]
			
 
				+
			
 
				+    await sem.acquire()
			
 
				+    try:
			
 
				+        for attempt in range(1, RETRY_PER_IMG + 1):
			
 
				+            try:
			
 
				+                # 1. 获取详情页
			
 
				+                resp = await client.get(img_url)
			
 
				+                resp.raise_for_status()
			
 
				+                real_url_match = IMG_URL_RE.search(resp.text)
			
 
				+                if not real_url_match:
			
 
				+                    log.warning(f"未解析到真实图片链接: {img_url}")
			
 
				+                    return False          # <- 这里不会触发 await
			
 
				+                real_url = real_url_match.group(1)
			
 
				+
			
 
				+                # 2. 下载真实图片（流式）
			
 
				+                ext_match = EXT_RE.search(real_url)
			
 
				+                ext = ext_match.group(1).lower() if ext_match else "jpg"
			
 
				+                final_path = img_path.with_suffix(f".{ext}")
			
 
				+
			
 
				+                if await AsyncPath(final_path).exists():
			
 
				+                    log.info(f"已存在，跳过: {final_path.name}")
			
 
				+                    return True
			
 
				+
			
 
				+                async with client.stream("GET", real_url) as img_resp:
			
 
				+                    img_resp.raise_for_status()
			
 
				+                    await AsyncPath(final_path).parent.mkdir(parents=True, exist_ok=True)
			
 
				+                    async with aiofiles.open(final_path, "wb") as fp:
			
 
				+                        async for chunk in img_resp.aiter_bytes(chunk_size=65536):
			
 
				+                            await fp.write(chunk)
			
 
				+
			
 
				+                log.info(f"[OK] {final_path.name}")
			
 
				+                return True
			
 
				+
			
 
				+            except httpx.HTTPStatusError as exc:
			
 
				+                if exc.response.status_code == 429:
			
 
				+                    wait = 2 ** (attempt - 1)
			
 
				+                    log.warning(f"[429] 等待 {wait}s 后重试（{attempt}/{RETRY_PER_IMG}）")
			
 
				+                    await asyncio.sleep(wait)
			
 
				+                else:
			
 
				+                    log.error(f"[HTTP {exc.response.status_code}] {img_url}")
			
 
				+                    break
			
 
				+            except Exception as exc:
			
 
				+                log.error(f"[ERROR] {img_url} -> {exc} （{attempt}/{RETRY_PER_IMG}）")
			
 
				+                await asyncio.sleep(1)
			
 
				+
			
 
				+        return False
			
 
				+    finally:
			
 
				+        sem.release()
			
 
				+
			
 
				+
			
 
				+# -------------------- 扫描待下载列表 --------------------
			
 
				+async def scan_tasks() -> List[Dict[str, str]]:
			
 
				+    """扫描 downloads/ 下所有 json，返回待下载列表"""
			
 
				+    result = []
			
 
				+    root = AsyncPath("data/downloads")
			
 
				+    if not await root.exists():
			
 
				+        return result
			
 
				+
			
 
				+    async for json_path in root.rglob("*.json"):
			
 
				+        folder = json_path.parent
			
 
				+        try:
			
 
				+            data: Dict[str, str] = json.loads(await json_path.read_text(encoding="utf-8"))
			
 
				+        except Exception as exc:
			
 
				+            log.warning(f"读取 json 失败 {json_path} -> {exc}")
			
 
				+            continue
			
 
				+
			
 
				+        for img_name, img_url in data.items():
			
 
				+            img_path = folder / img_name  # 无后缀
			
 
				+            # 异步判断任意后缀是否存在
			
 
				+            exists = False
			
 
				+            for ext in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
			
 
				+                if await img_path.with_suffix(ext).exists():
			
 
				+                    exists = True
			
 
				+                    break
			
 
				+            if not exists:
			
 
				+                result.append({"img_path": str(img_path), "img_url": img_url})
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+# -------------------- 主流程 --------------------
			
 
				+async def main(proxy: str | None = None) -> None:
			
 
				+    # 1. 优先重试上次失败
			
 
				+    failed_tasks = load_failed()
			
 
				+    if failed_tasks:
			
 
				+        log.info(f"优先重试上次失败任务: {len(failed_tasks)} 张")
			
 
				+
			
 
				+    tasks = failed_tasks + await scan_tasks()
			
 
				+    if not tasks:
			
 
				+        log.info("没有需要下载的图片，收工！")
			
 
				+        return
			
 
				+
			
 
				+    limits = httpx.Limits(max_keepalive_connections=20, max_connections=50)
			
 
				+    async with httpx.AsyncClient(limits=limits, timeout=TIMEOUT, proxies=proxy, verify=True) as client:
			
 
				+        sem = asyncio.Semaphore(CONCURRENCY)
			
 
				+        results = await tqdm_asyncio.gather(
			
 
				+            *[download_one(client, sem, t) for t in tasks],
			
 
				+            desc="Downloading",
			
 
				+            total=len(tasks),
			
 
				+        )
			
 
				+
			
 
				+    # 统计 & 持久化新失败
			
 
				+    failed_again = [t for t, ok in zip(tasks, results) if not ok]
			
 
				+    if failed_again:
			
 
				+        save_failed(failed_again)
			
 
				+        log.warning(f"本轮仍有 {len(failed_again)} 张下载失败，已写入 {FAILED_RECORD}")
			
 
				+    else:
			
 
				+        Path(FAILED_RECORD).unlink(missing_ok=True)
			
 
				+        log.info("全部下载完成！")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    try:
			
 
				+        asyncio.run(main())
			
 
				+    except KeyboardInterrupt:
			
 
				+        log.info("用户中断，下载结束")
			
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,29 @@
 
				+# utils.py
			
 
				+import asyncio
			
 
				+from pathlib import Path
			
 
				+from typing import List
			
 
				+
			
 
				+from aiopath import AsyncPath
			
 
				+import logging
			
 
				+
			
 
				+# 把 1step.py 的主逻辑封装成函数
			
 
				+from step1 import main as step1_main
			
 
				+from step2 import main as step2_main
			
 
				+
			
 
				+log = logging.getLogger("utils")
			
 
				+
			
 
				+async def run_step1(proxy: str | None = None) -> str:
			
 
				+    try:
			
 
				+        await step1_main(proxy)
			
 
				+        return "画廊链接抓取完成！"
			
 
				+    except Exception as e:
			
 
				+        log.exception("step1 执行失败")
			
 
				+        return f"抓取失败：{e}"
			
 
				+
			
 
				+async def run_step2(proxy: str | None = None) -> str:
			
 
				+    try:
			
 
				+        await step2_main(proxy)
			
 
				+        return "图片下载完成！"
			
 
				+    except Exception as e:
			
 
				+        log.exception("step2 执行失败")
			
 
				+        return f"下载失败：{e}"