2step.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. package main
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. "log"
  7. "net/http"
  8. "os"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "sync"
  13. "time"
  14. "fyne.io/fyne/v2/widget"
  15. "github.com/schollz/progressbar/v3"
  16. )
  17. // -------------------- 全局变量 --------------------
  18. var (
  19. imgURLRegex = regexp.MustCompile(`<img id="img" src="(.*?)"`)
  20. extRegex = regexp.MustCompile(`\.(jpg|jpeg|png|gif|webp)$`)
  21. logger = log.New(os.Stdout, "", log.LstdFlags|log.Lmsgprefix)
  22. )
  23. // -------------------- 数据结构 --------------------
  24. type DownloadTask struct {
  25. ImgPath string `json:"img_path"`
  26. ImgURL string `json:"img_url"`
  27. }
  28. // -------------------- 工具函数 --------------------
  29. func loadFailedImg() []DownloadTask {
  30. if _, err := os.Stat(FailedRecordImg); os.IsNotExist(err) {
  31. return []DownloadTask{}
  32. }
  33. data, err := os.ReadFile(FailedRecordImg)
  34. if err != nil {
  35. logger.Printf("加载失败记录失败 -> %v", err)
  36. return []DownloadTask{}
  37. }
  38. var tasks []DownloadTask
  39. if err := json.Unmarshal(data, &tasks); err != nil {
  40. logger.Printf("解析失败记录失败 -> %v", err)
  41. return []DownloadTask{}
  42. }
  43. return tasks
  44. }
  45. func saveFailedImg(failed []DownloadTask) {
  46. data, err := json.MarshalIndent(failed, "", " ")
  47. if err != nil {
  48. logger.Printf("序列化失败记录失败 -> %v", err)
  49. return
  50. }
  51. if err := os.WriteFile(FailedRecordImg, data, 0644); err != nil {
  52. logger.Printf("保存失败记录失败 -> %v", err)
  53. }
  54. }
  55. func fileExists(path string) bool {
  56. _, err := os.Stat(path)
  57. return err == nil
  58. }
  59. func getFileExtension(url string) string {
  60. match := extRegex.FindStringSubmatch(strings.ToLower(url))
  61. if len(match) > 1 {
  62. return match[1]
  63. }
  64. return "jpg" // 默认扩展名
  65. }
  66. // -------------------- 下载核心 --------------------
  67. func downloadOne(client *http.Client, sem chan struct{}, wg *sync.WaitGroup, task DownloadTask, bar *progressbar.ProgressBar) bool {
  68. defer wg.Done()
  69. defer func() { <-sem }()
  70. imgPath, imgURL := task.ImgPath, task.ImgURL
  71. for attempt := 1; attempt <= RetryPerImg; attempt++ {
  72. success := func() bool {
  73. // 1. 获取详情页
  74. resp, err := client.Get(imgURL)
  75. if err != nil {
  76. logger.Printf("[ERROR] %s -> %v (尝试 %d/%d)", imgURL, err, attempt, RetryPerImg)
  77. return false
  78. }
  79. defer resp.Body.Close()
  80. if resp.StatusCode != http.StatusOK {
  81. if resp.StatusCode == http.StatusTooManyRequests {
  82. wait := 1 << (attempt - 1) // 指数退避
  83. logger.Printf("[429] 等待 %ds 后重试(%d/%d)", wait, attempt, RetryPerImg)
  84. time.Sleep(time.Duration(wait) * time.Second)
  85. return false
  86. }
  87. logger.Printf("[HTTP %d] %s", resp.StatusCode, imgURL)
  88. return false
  89. }
  90. // 读取响应内容
  91. body, err := io.ReadAll(resp.Body)
  92. if err != nil {
  93. logger.Printf("[ERROR] 读取响应失败 %s -> %v", imgURL, err)
  94. return false
  95. }
  96. // 解析真实图片链接
  97. realURLMatch := imgURLRegex.FindStringSubmatch(string(body))
  98. if len(realURLMatch) < 2 {
  99. logger.Printf("未解析到真实图片链接: %s", imgURL)
  100. return false
  101. }
  102. realURL := realURLMatch[1]
  103. // 2. 下载真实图片
  104. ext := getFileExtension(realURL)
  105. finalPath := strings.TrimSuffix(imgPath, filepath.Ext(imgPath)) + "." + ext
  106. // 检查文件是否已存在
  107. if fileExists(finalPath) {
  108. logger.Printf("已存在,跳过: %s", filepath.Base(finalPath))
  109. bar.Add(1)
  110. return true
  111. }
  112. // 创建目录
  113. if err := os.MkdirAll(filepath.Dir(finalPath), 0755); err != nil {
  114. logger.Printf("[ERROR] 创建目录失败 %s -> %v", filepath.Dir(finalPath), err)
  115. return false
  116. }
  117. // 下载图片
  118. imgResp, err := client.Get(realURL)
  119. if err != nil {
  120. logger.Printf("[ERROR] 下载图片失败 %s -> %v", realURL, err)
  121. return false
  122. }
  123. defer imgResp.Body.Close() // 修复:应该是 Body.Close()
  124. if imgResp.StatusCode != http.StatusOK {
  125. logger.Printf("[HTTP %d] %s", imgResp.StatusCode, realURL)
  126. return false
  127. }
  128. // 创建文件
  129. file, err := os.Create(finalPath)
  130. if err != nil {
  131. logger.Printf("[ERROR] 创建文件失败 %s -> %v", finalPath, err)
  132. return false
  133. }
  134. defer file.Close()
  135. // 写入文件
  136. _, err = io.Copy(file, imgResp.Body)
  137. if err != nil {
  138. logger.Printf("[ERROR] 写入文件失败 %s -> %v", finalPath, err)
  139. return false
  140. }
  141. logger.Printf("[OK] %s", filepath.Base(finalPath))
  142. bar.Add(1)
  143. return true
  144. }()
  145. if success {
  146. return true
  147. }
  148. if attempt < RetryPerImg {
  149. time.Sleep(time.Second) // 重试前等待
  150. }
  151. }
  152. return false
  153. }
  154. // -------------------- 扫描待下载列表 --------------------
  155. func scanTasks() ([]DownloadTask, error) {
  156. var tasks []DownloadTask
  157. err := filepath.Walk(DownloadDir, func(path string, info os.FileInfo, err error) error {
  158. if err != nil {
  159. return err
  160. }
  161. if info.IsDir() || filepath.Ext(path) != ".json" {
  162. return nil
  163. }
  164. // 读取JSON文件
  165. data, err := os.ReadFile(path)
  166. if err != nil {
  167. logger.Printf("读取JSON文件失败 %s -> %v", path, err)
  168. return nil
  169. }
  170. var urlMap map[string]string
  171. if err := json.Unmarshal(data, &urlMap); err != nil {
  172. logger.Printf("解析JSON失败 %s -> %v", path, err)
  173. return nil
  174. }
  175. dir := filepath.Dir(path)
  176. for imgName, imgURL := range urlMap {
  177. imgPathWithoutExt := filepath.Join(dir, imgName)
  178. // 检查文件是否已存在(任意扩展名)
  179. exists := false
  180. for _, ext := range []string{".jpg", ".jpeg", ".png", ".gif", ".webp"} {
  181. if fileExists(imgPathWithoutExt + ext) {
  182. exists = true
  183. break
  184. }
  185. }
  186. if !exists {
  187. tasks = append(tasks, DownloadTask{
  188. ImgPath: imgPathWithoutExt,
  189. ImgURL: imgURL,
  190. })
  191. }
  192. }
  193. return nil
  194. })
  195. return tasks, err
  196. }
  197. // -------------------- 主流程 --------------------
  198. func ImgDownloader(ip, port string, output *widget.Entry) {
  199. logger.SetPrefix("[INFO] ")
  200. // 1. 优先重试上次失败
  201. failedTasks := loadFailedImg()
  202. if len(failedTasks) > 0 {
  203. logger.Printf("优先重试上次失败任务: %d 张", len(failedTasks))
  204. }
  205. // 2. 扫描新任务
  206. newTasks, err := scanTasks()
  207. if err != nil {
  208. logger.Printf("扫描任务失败: %v", err)
  209. return
  210. }
  211. // 合并任务
  212. tasks := append(failedTasks, newTasks...)
  213. if len(tasks) == 0 {
  214. logger.Println("没有需要下载的图片,收工!")
  215. return
  216. }
  217. logger.Printf("开始下载 %d 张图片", len(tasks))
  218. // 3. 创建HTTP客户端
  219. proxy := ip + port
  220. client := createHTTPClient(proxy)
  221. // 4. 创建进度条
  222. bar := progressbar.NewOptions(len(tasks),
  223. progressbar.OptionSetDescription("Downloading"),
  224. progressbar.OptionSetWriter(os.Stderr),
  225. progressbar.OptionShowCount(),
  226. progressbar.OptionShowIts(),
  227. progressbar.OptionSetWidth(30),
  228. progressbar.OptionThrottle(100*time.Millisecond),
  229. progressbar.OptionOnCompletion(func() {
  230. fmt.Fprint(os.Stderr, "\n")
  231. }),
  232. )
  233. // 5. 并发下载
  234. var wg sync.WaitGroup
  235. sem := make(chan struct{}, Concurrency)
  236. results := make([]bool, len(tasks)) // 修复:使用slice存储结果
  237. for i, task := range tasks {
  238. wg.Add(1)
  239. sem <- struct{}{} // 获取信号量
  240. go func(idx int, t DownloadTask) {
  241. results[idx] = downloadOne(client, sem, &wg, t, bar)
  242. }(i, task)
  243. }
  244. wg.Wait()
  245. // 6. 统计结果
  246. var failedAgain []DownloadTask
  247. successCount := 0
  248. for i, success := range results {
  249. if !success {
  250. failedAgain = append(failedAgain, tasks[i])
  251. } else {
  252. successCount++
  253. }
  254. }
  255. // 7. 保存失败记录
  256. if len(failedAgain) > 0 {
  257. saveFailedImg(failedAgain)
  258. logger.Printf("本轮仍有 %d 张下载失败,已写入 %s", len(failedAgain), FailedRecordImg)
  259. } else {
  260. os.Remove(FailedRecordImg)
  261. logger.Printf("全部下载完成!成功 %d 张", successCount)
  262. }
  263. }