test.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import httpx
  2. import re
  3. import time
  4. url = 'https://www.flaticon.com/packs/vegetable-17858464'
  5. headers = {
  6. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  7. "accept-encoding": "gzip, deflate, br, zstd",
  8. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
  9. "cache-control": "max-age=0",
  10. "priority": "u=0, i",
  11. "referer": "https://www.flaticon.com/icons",
  12. "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Microsoft Edge\";v=\"128\"",
  13. "sec-ch-ua-mobile": "?0",
  14. "sec-ch-ua-platform": "\"macOS\"",
  15. "sec-fetch-dest": "document",
  16. "sec-fetch-mode": "navigate",
  17. "sec-fetch-site": "same-origin",
  18. "sec-fetch-user": "?1",
  19. "upgrade-insecure-requests": "1",
  20. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
  21. }
  22. resp = httpx.get(url, headers=headers)
  23. resp.encoding = 'utf-8'
  24. text = resp.text
  25. all_img_url_list = []
  26. img_count = re.findall('<meta property=\'og:title\' content="(.*?) ', text)
  27. img_url_list = re.findall('data-png="(.*?)"', text)
  28. for u in img_url_list:
  29. if u == '{{png512}}':
  30. continue
  31. all_img_url_list.append(u)
  32. if int(img_count[0]) > 50:
  33. for page in range(2, 999):
  34. if 'Oopsies... Seems like you got lost! - Flaticon' in text:
  35. break
  36. next_url = url + '/' + str(page)
  37. resp = httpx.get(next_url, headers=headers)
  38. resp.encoding = 'utf-8'
  39. text = resp.text
  40. next_page_img_url = re.findall('data-png="(.*?)"', text)
  41. for next_img in next_page_img_url:
  42. if next_img == '{{png512}}':
  43. continue
  44. all_img_url_list.append(next_img)
  45. time.sleep(2)
  46. print(img_url_list)
  47. print(len(img_url_list))