2step.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import asyncio
  2. import time
  3. import json
  4. import os
  5. import httpx
  6. async def download_image(session, img_path, img_url, retry_count=3):
  7. for attempt in range(retry_count):
  8. try:
  9. # 发起请求下载图片
  10. response = await session.get(img_url)
  11. response.raise_for_status() # 检查请求是否成功
  12. # 确保图片文件夹存在
  13. os.makedirs(os.path.dirname(img_path), exist_ok=True)
  14. # 将图片内容写入文件
  15. with open(img_path, 'wb') as f:
  16. f.write(response.content)
  17. # print(f"图片下载完成: {img_path}")
  18. return True
  19. except httpx.HTTPStatusError as e:
  20. if e.response.status_code == 429:
  21. wait_time = 2 ** attempt # 指数退避策略
  22. # print(f"429 Too Many Requests, 等待 {wait_time} 秒后重试...")
  23. await asyncio.sleep(wait_time)
  24. else:
  25. # print(f"下载图片失败: {img_url}, 错误信息: {e}")
  26. return False
  27. except Exception as e:
  28. # print(f"下载图片失败: {img_url}, 错误信息: {e}")
  29. await asyncio.sleep(1) # 简单的重试间隔
  30. # print(f"图片下载失败,达到最大重试次数: {img_url}")
  31. return False
  32. # 异步下载所有图片
  33. async def download_all_images(ready_to_download_list, max_concurrent_downloads=5):
  34. async with httpx.AsyncClient() as session:
  35. tasks = []
  36. semaphore = asyncio.Semaphore(max_concurrent_downloads) # 限制并发数量
  37. async def bounded_download(item):
  38. async with semaphore:
  39. return await download_image(session, item['img_path'], item['img_url'])
  40. for item in ready_to_download_list:
  41. task = asyncio.create_task(bounded_download(item))
  42. tasks.append(task)
  43. # 等待所有任务完成
  44. await asyncio.gather(*tasks)
  45. # 加载需要下载的图片列表
  46. def load_ready_to_download_list():
  47. result = []
  48. # 获取项目根目录
  49. project_root = os.path.dirname(os.path.abspath(__file__))
  50. downloads_path = os.path.join(project_root, 'downloads')
  51. all_path = []
  52. for root, dirs, files in os.walk(downloads_path):
  53. for dir in dirs:
  54. all_path.append(os.path.join(root, dir))
  55. for path in all_path:
  56. json_files = [f for f in os.listdir(path) if f.endswith('.json')]
  57. if len(json_files) != 1:
  58. continue
  59. json_file = json_files[0]
  60. json_path = os.path.join(path, json_file)
  61. with open(json_path, 'r', encoding='utf-8') as f:
  62. img_list = json.load(f)
  63. for k, v in img_list.items():
  64. img_path = os.path.join(path, k)
  65. if os.path.exists(img_path):
  66. continue
  67. result.append({
  68. 'img_path': img_path,
  69. 'img_url': v
  70. })
  71. return result
  72. # 主函数
  73. async def start_download():
  74. for retry in range(3):
  75. ready_to_download_list = load_ready_to_download_list()
  76. print(f"准备下载图片共: {len(ready_to_download_list)} 张")
  77. if not ready_to_download_list:
  78. print("已全部下载完成或没有需要下载的图片")
  79. return
  80. await download_all_images(ready_to_download_list)
  81. time.sleep(2) # 间隔2秒后重新检查
  82. if __name__ == "__main__":
  83. asyncio.run(start_download())