| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- import httpx
- import re
- import time
- url = 'https://www.flaticon.com/packs/vegetable-17858464'
- headers = {
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
- "accept-encoding": "gzip, deflate, br, zstd",
- "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
- "cache-control": "max-age=0",
- "priority": "u=0, i",
- "referer": "https://www.flaticon.com/icons",
- "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Microsoft Edge\";v=\"128\"",
- "sec-ch-ua-mobile": "?0",
- "sec-ch-ua-platform": "\"macOS\"",
- "sec-fetch-dest": "document",
- "sec-fetch-mode": "navigate",
- "sec-fetch-site": "same-origin",
- "sec-fetch-user": "?1",
- "upgrade-insecure-requests": "1",
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
- }
- resp = httpx.get(url, headers=headers)
- resp.encoding = 'utf-8'
- text = resp.text
- all_img_url_list = []
- img_count = re.findall('<meta property=\'og:title\' content="(.*?) ', text)
- img_url_list = re.findall('data-png="(.*?)"', text)
- for u in img_url_list:
- if u == '{{png512}}':
- continue
- all_img_url_list.append(u)
- if int(img_count[0]) > 50:
- for page in range(2, 999):
- if 'Oopsies... Seems like you got lost! - Flaticon' in text:
- break
- next_url = url + '/' + str(page)
- resp = httpx.get(next_url, headers=headers)
- resp.encoding = 'utf-8'
- text = resp.text
- next_page_img_url = re.findall('data-png="(.*?)"', text)
- for next_img in next_page_img_url:
- if next_img == '{{png512}}':
- continue
- all_img_url_list.append(next_img)
- time.sleep(2)
- print(img_url_list)
- print(len(img_url_list))
|