|
|
@@ -7,19 +7,19 @@ sys.path.append(os.path.join(os.path.abspath(__file__).split('ResourceCollection
|
|
|
import httpx
|
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
|
|
-target_url = 'https://www.flaticon.com/packs/medicine-103'
|
|
|
+target_url = 'https://www.flaticon.com/packs/summer-watermelon-17517790'
|
|
|
|
|
|
title_selector = '#pack-view__inner > section.pack-view__header > h1'
|
|
|
-selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a'
|
|
|
+selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'
|
|
|
img_selector = '#detail > div > div.row.detail__top.mg-none > section > div > div > div.row.row--vertical-center.mg-none.full-height.detail__icon__inner > div > div > img'
|
|
|
img_count_selector = '#pack-view__inner > section.pack-view__header > p'
|
|
|
|
|
|
|
|
|
def main():
|
|
|
with sync_playwright() as playwright:
|
|
|
- browser = playwright.chromium.launch(
|
|
|
+ browser = playwright.webkit.launch(
|
|
|
headless=True,
|
|
|
- proxy={"server": "http://127.0.0.1:7890"}
|
|
|
+ proxy={"server": "http://127.0.0.1:7897"}
|
|
|
)
|
|
|
context = browser.new_context(viewport={'width': 1280, 'height': 700})
|
|
|
page = context.new_page()
|
|
|
@@ -57,50 +57,40 @@ def main():
|
|
|
|
|
|
# 遍历所有<a>标签,提取href属性
|
|
|
for element in elements:
|
|
|
- href = element.get_attribute('href')
|
|
|
- if href:
|
|
|
+ src = element.get_attribute('src')
|
|
|
+ if src:
|
|
|
+ src = src.replace('/128/', '/512/')
|
|
|
sequence = str(i).zfill(2)
|
|
|
urls.append({
|
|
|
- 'url': href,
|
|
|
- 'img': f'{img_name}_{sequence}'
|
|
|
+ 'url': src,
|
|
|
+ 'img': f'{img_name}_{sequence}.png'
|
|
|
})
|
|
|
- print('已获取第一层url')
|
|
|
-
|
|
|
- img_urls = []
|
|
|
- # 第二层 url
|
|
|
- for url in urls:
|
|
|
- # 如果png文件存在, 即已经下载过, 直接跳过
|
|
|
- img_png_name = url['img'] + 'png'
|
|
|
- img_png_path = os.path.join(file_path, img_png_name)
|
|
|
- if os.path.exists(img_png_path):
|
|
|
- continue
|
|
|
-
|
|
|
- page.goto(url['url'])
|
|
|
- page.wait_for_selector(img_selector, state="attached", timeout=10000)
|
|
|
- img_elements = page.query_selector_all(img_selector)
|
|
|
- for img_element in img_elements:
|
|
|
- img_url = img_element.get_attribute('src')
|
|
|
- img_urls.append({
|
|
|
- 'target_img_url': img_url,
|
|
|
- 'target_img_name': url['img'] + '.png'
|
|
|
- })
|
|
|
-
|
|
|
- print('已获取第二层url\n关闭浏览器')
|
|
|
+ print('已获取所有图片url')
|
|
|
|
|
|
page.close()
|
|
|
browser.close()
|
|
|
|
|
|
print('正在下载图片')
|
|
|
- for img_url in img_urls:
|
|
|
- target_img_url = img_url['target_img_url']
|
|
|
- target_img_name = img_url['target_img_name']
|
|
|
- resp = httpx.get(target_img_url, headers={
|
|
|
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
|
|
- })
|
|
|
- with open(os.path.join(file_path, target_img_name), 'wb') as f:
|
|
|
- f.write(resp.content)
|
|
|
- print(f'已下载: {target_img_name}')
|
|
|
- time.sleep(1)
|
|
|
+ for url in urls:
|
|
|
+ # 如果png文件存在, 即已经下载过, 直接跳过
|
|
|
+ target_img_url = url['url']
|
|
|
+ img_png_name = url['img']
|
|
|
+ target_img_name = os.path.join(file_path, img_png_name)
|
|
|
+ if os.path.exists(target_img_name):
|
|
|
+ print(f'图片 {img_png_name} 已存在')
|
|
|
+ continue
|
|
|
+
|
|
|
+ try:
|
|
|
+ resp = httpx.get(target_img_url, headers={
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
|
|
+ })
|
|
|
+ with open(target_img_name, 'wb') as f:
|
|
|
+ f.write(resp.content)
|
|
|
+ print(f'已下载: {img_png_name}')
|
|
|
+ time.sleep(1)
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+
|
|
|
|
|
|
print(f'{title} : 已下载完成')
|
|
|
|