main.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import os
  2. import time
  3. import httpx
  4. from bs4 import BeautifulSoup
  5. use_proxy = 0
  6. base_url = 'https://jcomic.net'
  7. herf_url = '/eps/'
  8. # comico_url = '%E7%99%BE%E5%90%88%E3%83%95%E3%82%A7%E3%83%81LIFE'
  9. comico_url = '神崎咲良ハーレム化計画'
  10. target_url = base_url + herf_url + comico_url
  11. def save_img(folder_path, img_links):
  12. with httpx.Client() as client:
  13. for index, img_url in enumerate(img_links, start=1):
  14. try:
  15. # 发送请求获取图片内容
  16. response = client.get(img_url)
  17. if response.status_code != 200:
  18. print(f"无法下载图片 {img_url},状态码: {response.status_code}")
  19. continue
  20. # 生成文件名,例如 0001.png, 0002.png
  21. file_name = f"{str(index).zfill(4)}.png"
  22. file_path = os.path.join(folder_path, file_name)
  23. # 保存图片到本地
  24. with open(file_path, 'wb') as file:
  25. file.write(response.content)
  26. print(f"图片已保存: {file_path}")
  27. except Exception as e:
  28. print(f"下载图片 {img_url} 时出错: {e}")
  29. time.sleep(1)
  30. def get_imgs(folder_path, chapter_data):
  31. img_links = []
  32. with httpx.Client() as client:
  33. for chapter_name, url in chapter_data.items():
  34. # 创建文件夹
  35. chapter_folder = os.path.join(folder_path, chapter_name)
  36. if not os.path.exists(chapter_folder):
  37. os.makedirs(chapter_folder)
  38. # 发送请求获取页面内容
  39. response = client.get(url)
  40. if response.status_code != 200:
  41. print(f"无法访问 {url},状态码: {response.status_code}")
  42. continue
  43. # 解析 HTML
  44. soup = BeautifulSoup(response.text, 'html.parser')
  45. # 获取图片的上一层元素
  46. parent_element = soup.select_one('body > div.container > div.row.col-lg-12.col-md-12.col-xs-12')
  47. if not parent_element:
  48. print(f"{chapter_name} 未找到图片容器")
  49. continue
  50. # 获取所有图片元素
  51. img_elements = parent_element.select('img')
  52. total_images = len(img_elements)
  53. print(f'{chapter_name} 共 {total_images} 张图片')
  54. # 输出图片的 URL
  55. for img in img_elements:
  56. img_url = img.get('src')
  57. if img_url:
  58. img_links.append(img_url)
  59. return img_links
  60. def save_urls(folder_path, img_links):
  61. # 定义保存文件路径
  62. save_path = os.path.join(folder_path, 'img_links.txt')
  63. # 将图片链接写入文件
  64. with open(save_path, 'w', encoding='utf-8') as file:
  65. for link in img_links:
  66. file.write(link + '\n')
  67. print(f"图片链接已保存到: {save_path}")
  68. def new_folder(page_title):
  69. # 获取当前脚本所在的目录
  70. script_dir = os.path.dirname(os.path.abspath(__file__))
  71. download_dir = os.path.join(script_dir, 'downloads')
  72. if not os.path.exists(script_dir):
  73. os.makedirs(script_dir)
  74. if page_title:
  75. # 拼接目标文件夹路径
  76. folder_path = os.path.join(download_dir, page_title)
  77. # 检查文件夹是否存在,如果不存在则创建
  78. if not os.path.exists(folder_path):
  79. os.makedirs(folder_path)
  80. return folder_path
  81. def get_chapter_data():
  82. result = {}
  83. page_title = ''
  84. # 使用 httpx 发送请求
  85. with httpx.Client() as client:
  86. response = client.get(target_url)
  87. if response.status_code == 200:
  88. soup = BeautifulSoup(response.text, 'html.parser')
  89. # 获取指定选择器下的所有元素
  90. elements = soup.select('body > div.container > div:nth-child(3) > div:nth-child(2) a')
  91. # 提取每个元素的 URL 和文本
  92. for element in elements:
  93. url = element.get('href')
  94. text = element.get_text()
  95. result[text] = base_url + url
  96. return result
  97. def main():
  98. # 1, 获取页面章节数据
  99. chapter_data = get_chapter_data()
  100. # 2, 在当前文件夹下创建一个文件夹,用来保存图片, 文件名称是 title
  101. folder_path = new_folder(comico_url)
  102. # 3, 遍历章节数据,获取img的链接
  103. img_links = get_imgs(folder_path, chapter_data)
  104. # 4, 保存url到新建的文件夹中
  105. save_urls(folder_path, img_links)
  106. # 5,遍历 img_links ,将图片保存到 folder_path中, 保存的文件名类似 0001.png
  107. save_img(folder_path, img_links)
  108. if __name__ == '__main__':
  109. main()
  110. print('done!')