Jack преди 1 година
родител
ревизия
e7a1f45100
променени са 2 файла, в които са добавени 55 реда и са изтрити 3 реда
  1. 2 3
      flaticon/main.py
  2. 53 0
      flaticon/test.py

+ 2 - 3
flaticon/main.py

@@ -19,7 +19,7 @@ def main():
     with sync_playwright() as playwright:
         browser = playwright.webkit.launch(
             headless=True,
-            proxy={"server": "http://127.0.0.1:7897"}
+            proxy={"server": "http://127.0.0.1:7890"}
         )
         context = browser.new_context(viewport={'width': 1280, 'height': 700})
         page = context.new_page()
@@ -79,7 +79,7 @@ def main():
             if os.path.exists(target_img_name):
                 print(f'图片 {img_png_name} 已存在')
                 continue
-            
+
             try:
                 resp = httpx.get(target_img_url, headers={
                     "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
@@ -90,7 +90,6 @@ def main():
                 time.sleep(1)
             except Exception as e:
                 print(e)
-                    
 
         print(f'{title} : 已下载完成')
 

+ 53 - 0
flaticon/test.py

@@ -0,0 +1,53 @@
+import httpx
+import re
+import time
+
+url = 'https://www.flaticon.com/packs/vegetable-17858464'
+
+headers = {
+    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "accept-encoding": "gzip, deflate, br, zstd",
+    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
+    "cache-control": "max-age=0",
+    "priority": "u=0, i",
+    "referer": "https://www.flaticon.com/icons",
+    "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Microsoft Edge\";v=\"128\"",
+    "sec-ch-ua-mobile": "?0",
+    "sec-ch-ua-platform": "\"macOS\"",
+    "sec-fetch-dest": "document",
+    "sec-fetch-mode": "navigate",
+    "sec-fetch-site": "same-origin",
+    "sec-fetch-user": "?1",
+    "upgrade-insecure-requests": "1",
+    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
+}
+
+resp = httpx.get(url, headers=headers)
+resp.encoding = 'utf-8'
+text = resp.text
+
+all_img_url_list = []
+img_count = re.findall('<meta property=\'og:title\' content="(.*?) ', text)
+img_url_list = re.findall('data-png="(.*?)"', text)
+for u in img_url_list:
+    if u == '{{png512}}':
+        continue
+    all_img_url_list.append(u)
+
+if int(img_count[0]) > 50:
+    for page in range(2, 999):
+        if 'Oopsies... Seems like you got lost! - Flaticon' in text:
+            break
+        next_url = url + '/' + str(page)
+        resp = httpx.get(next_url, headers=headers)
+        resp.encoding = 'utf-8'
+        text = resp.text
+        next_page_img_url = re.findall('data-png="(.*?)"', text)
+        for next_img in next_page_img_url:
+            if next_img == '{{png512}}':
+                continue
+            all_img_url_list.append(next_img)
+        time.sleep(2)
+
+print(img_url_list)
+print(len(img_url_list))