| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- package main
- import (
- "encoding/json"
- "fmt"
- "io"
- "log"
- "net/http"
- "os"
- "path/filepath"
- "regexp"
- "strings"
- "sync"
- "time"
- "fyne.io/fyne/v2/widget"
- "github.com/schollz/progressbar/v3"
- )
- // -------------------- 全局变量 --------------------
- var (
- imgURLRegex = regexp.MustCompile(`<img id="img" src="(.*?)"`)
- extRegex = regexp.MustCompile(`\.(jpg|jpeg|png|gif|webp)$`)
- logger = log.New(os.Stdout, "", log.LstdFlags|log.Lmsgprefix)
- )
- // -------------------- 数据结构 --------------------
- type DownloadTask struct {
- ImgPath string `json:"img_path"`
- ImgURL string `json:"img_url"`
- }
- // -------------------- 工具函数 --------------------
- func loadFailedImg() []DownloadTask {
- if _, err := os.Stat(FailedRecordImg); os.IsNotExist(err) {
- return []DownloadTask{}
- }
- data, err := os.ReadFile(FailedRecordImg)
- if err != nil {
- logger.Printf("加载失败记录失败 -> %v", err)
- return []DownloadTask{}
- }
- var tasks []DownloadTask
- if err := json.Unmarshal(data, &tasks); err != nil {
- logger.Printf("解析失败记录失败 -> %v", err)
- return []DownloadTask{}
- }
- return tasks
- }
- func saveFailedImg(failed []DownloadTask) {
- data, err := json.MarshalIndent(failed, "", " ")
- if err != nil {
- logger.Printf("序列化失败记录失败 -> %v", err)
- return
- }
- if err := os.WriteFile(FailedRecordImg, data, 0644); err != nil {
- logger.Printf("保存失败记录失败 -> %v", err)
- }
- }
- func fileExists(path string) bool {
- _, err := os.Stat(path)
- return err == nil
- }
- func getFileExtension(url string) string {
- match := extRegex.FindStringSubmatch(strings.ToLower(url))
- if len(match) > 1 {
- return match[1]
- }
- return "jpg" // 默认扩展名
- }
- // -------------------- 下载核心 --------------------
- func downloadOne(client *http.Client, sem chan struct{}, wg *sync.WaitGroup, task DownloadTask, bar *progressbar.ProgressBar) bool {
- defer wg.Done()
- defer func() { <-sem }()
- imgPath, imgURL := task.ImgPath, task.ImgURL
- for attempt := 1; attempt <= RetryPerImg; attempt++ {
- success := func() bool {
- // 1. 获取详情页
- resp, err := client.Get(imgURL)
- if err != nil {
- logger.Printf("[ERROR] %s -> %v (尝试 %d/%d)", imgURL, err, attempt, RetryPerImg)
- return false
- }
- defer resp.Body.Close()
- if resp.StatusCode != http.StatusOK {
- if resp.StatusCode == http.StatusTooManyRequests {
- wait := 1 << (attempt - 1) // 指数退避
- logger.Printf("[429] 等待 %ds 后重试(%d/%d)", wait, attempt, RetryPerImg)
- time.Sleep(time.Duration(wait) * time.Second)
- return false
- }
- logger.Printf("[HTTP %d] %s", resp.StatusCode, imgURL)
- return false
- }
- // 读取响应内容
- body, err := io.ReadAll(resp.Body)
- if err != nil {
- logger.Printf("[ERROR] 读取响应失败 %s -> %v", imgURL, err)
- return false
- }
- // 解析真实图片链接
- realURLMatch := imgURLRegex.FindStringSubmatch(string(body))
- if len(realURLMatch) < 2 {
- logger.Printf("未解析到真实图片链接: %s", imgURL)
- return false
- }
- realURL := realURLMatch[1]
- // 2. 下载真实图片
- ext := getFileExtension(realURL)
- finalPath := strings.TrimSuffix(imgPath, filepath.Ext(imgPath)) + "." + ext
- // 检查文件是否已存在
- if fileExists(finalPath) {
- logger.Printf("已存在,跳过: %s", filepath.Base(finalPath))
- bar.Add(1)
- return true
- }
- // 创建目录
- if err := os.MkdirAll(filepath.Dir(finalPath), 0755); err != nil {
- logger.Printf("[ERROR] 创建目录失败 %s -> %v", filepath.Dir(finalPath), err)
- return false
- }
- // 下载图片
- imgResp, err := client.Get(realURL)
- if err != nil {
- logger.Printf("[ERROR] 下载图片失败 %s -> %v", realURL, err)
- return false
- }
- defer imgResp.Body.Close() // 修复:应该是 Body.Close()
- if imgResp.StatusCode != http.StatusOK {
- logger.Printf("[HTTP %d] %s", imgResp.StatusCode, realURL)
- return false
- }
- // 创建文件
- file, err := os.Create(finalPath)
- if err != nil {
- logger.Printf("[ERROR] 创建文件失败 %s -> %v", finalPath, err)
- return false
- }
- defer file.Close()
- // 写入文件
- _, err = io.Copy(file, imgResp.Body)
- if err != nil {
- logger.Printf("[ERROR] 写入文件失败 %s -> %v", finalPath, err)
- return false
- }
- logger.Printf("[OK] %s", filepath.Base(finalPath))
- bar.Add(1)
- return true
- }()
- if success {
- return true
- }
- if attempt < RetryPerImg {
- time.Sleep(time.Second) // 重试前等待
- }
- }
- return false
- }
- // -------------------- 扫描待下载列表 --------------------
- func scanTasks() ([]DownloadTask, error) {
- var tasks []DownloadTask
- err := filepath.Walk(DownloadDir, func(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
- if info.IsDir() || filepath.Ext(path) != ".json" {
- return nil
- }
- // 读取JSON文件
- data, err := os.ReadFile(path)
- if err != nil {
- logger.Printf("读取JSON文件失败 %s -> %v", path, err)
- return nil
- }
- var urlMap map[string]string
- if err := json.Unmarshal(data, &urlMap); err != nil {
- logger.Printf("解析JSON失败 %s -> %v", path, err)
- return nil
- }
- dir := filepath.Dir(path)
- for imgName, imgURL := range urlMap {
- imgPathWithoutExt := filepath.Join(dir, imgName)
- // 检查文件是否已存在(任意扩展名)
- exists := false
- for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} {
- if fileExists(imgPathWithoutExt + ext) {
- exists = true
- break
- }
- }
- if !exists {
- tasks = append(tasks, DownloadTask{
- ImgPath: imgPathWithoutExt,
- ImgURL: imgURL,
- })
- }
- }
- return nil
- })
- return tasks, err
- }
- // -------------------- 主流程 --------------------
- func ImgDownloader(ip, port string, output *widget.Entry) {
- logger.SetPrefix("[INFO] ")
- // 1. 优先重试上次失败
- failedTasks := loadFailedImg()
- if len(failedTasks) > 0 {
- logger.Printf("优先重试上次失败任务: %d 张", len(failedTasks))
- }
- // 2. 扫描新任务
- newTasks, err := scanTasks()
- if err != nil {
- logger.Printf("扫描任务失败: %v", err)
- return
- }
- // 合并任务
- tasks := append(failedTasks, newTasks...)
- if len(tasks) == 0 {
- logger.Println("没有需要下载的图片,收工!")
- return
- }
- logger.Printf("开始下载 %d 张图片", len(tasks))
- // 3. 创建HTTP客户端
- proxy := ip + port
- client := createHTTPClient(proxy)
- // 4. 创建进度条
- bar := progressbar.NewOptions(len(tasks),
- progressbar.OptionSetDescription("Downloading"),
- progressbar.OptionSetWriter(os.Stderr),
- progressbar.OptionShowCount(),
- progressbar.OptionShowIts(),
- progressbar.OptionSetWidth(30),
- progressbar.OptionThrottle(100*time.Millisecond),
- progressbar.OptionOnCompletion(func() {
- fmt.Fprint(os.Stderr, "\n")
- }),
- )
- // 5. 并发下载
- var wg sync.WaitGroup
- sem := make(chan struct{}, Concurrency)
- results := make([]bool, len(tasks)) // 修复:使用slice存储结果
- for i, task := range tasks {
- wg.Add(1)
- sem <- struct{}{} // 获取信号量
- go func(idx int, t DownloadTask) {
- results[idx] = downloadOne(client, sem, &wg, t, bar)
- }(i, task)
- }
- wg.Wait()
- // 6. 统计结果
- var failedAgain []DownloadTask
- successCount := 0
- for i, success := range results {
- if !success {
- failedAgain = append(failedAgain, tasks[i])
- } else {
- successCount++
- }
- }
- // 7. 保存失败记录
- if len(failedAgain) > 0 {
- saveFailedImg(failedAgain)
- logger.Printf("本轮仍有 %d 张下载失败,已写入 %s", len(failedAgain), FailedRecordImg)
- } else {
- os.Remove(FailedRecordImg)
- logger.Printf("全部下载完成!成功 %d 张", successCount)
- }
- }
|