1 năm trước cách đây · e74853c1cc
--- a/flaticon/flaticon.py
+++ b/flaticon/flaticon.py
@@ -52,14 +52,6 @@ def open_browser(target_urls):
 
															             img_sequence_num = 1
														
 
															             for page_count in range(1, 999):
														
 
															-                # 检查一下当前页面是不是 404
														
 
															-                try:
														
 
															-                    page.wait_for_selector(not_find_page_selector, state="attached", timeout=2000)
														
 
															-                    print(f'总页数是 {page_count - 1} 在 url: {goto_url}')
														
 
															-                    break
														
 
															-                except:
														
 
															-                    pass
														
 
															-
														
 
															                 try:
														
 
															                     goto_url = target_url + pages.format(page_count)
														
 
															                     page.goto(goto_url, timeout=5000)
														
@@ -67,6 +59,14 @@ def open_browser(target_urls):
 
															                     print(e)
														
 
															                     print(f'页面加载失败：url：{goto_url}')
														
 
															+                try:
														
 
															+                    # 检查一下当前页面是不是 404
														
 
															+                    page.wait_for_selector(not_find_page_selector, state="attached", timeout=2000)
														
 
															+                    print(f'总页数是 {page_count - 1} 在 url: {goto_url}')
														
 
															+                    break
														
 
															+                except:
														
 
															+                    pass
														
 
															+
														
 
															                 if page_count == 1:
														
 
															                     # 获取title
														
 
															                     page.wait_for_selector(title_selector, state="attached", timeout=10000)
														
--- a/kaizty/kaizty.py
+++ b/kaizty/kaizty.py
@@ -14,8 +14,10 @@ import httpx
 
															 from playwright.sync_api import sync_playwright
														
 
															 target = 'kaizty'
														
 
															-step = 1  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
														
 
															-local_proxy = 1
														
 
															+
														
 
															+# 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
														
 
															+step = 2
														
 
															+local_proxy = 0
														
 
															 title_selector = '#pack-view__inner > section.pack-view__header > h1'  # 获取标题选择器
														
 
															 img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'  # 获取图片的url
														
 
															 img_count_selector = '#pack-view__inner > section.pack-view__header > p'  # 获取图片总数选择器
														
@@ -53,17 +55,23 @@ def open_browser(target_urls):
 
															             img_sequence_num = 1
														
 
															-            for page_count in range(30, 31):
														
 
															+            for page_count in range(1, 999):
														
 
															                 try:
														
 
															                     goto_url = target_url + pages.format(page_count)
														
 
															-                    page.goto(goto_url, timeout=5000)
														
 
															-                    page.wait_for_load_state('domcontentloaded')
														
 
															+                    page.goto(goto_url, timeout=20000)
														
 
															+                    page.wait_for_selector('body > div.housing > div.housing-coveringap > div.thrcol.refill.afsite > div.thr-ot.hid > div > div.c-content > div:nth-child(3) > div')
														
 
															                 except Exception as e:
														
 
															                     print(e)
														
 
															                     print(f'页面加载失败：url：{goto_url}')
														
 
															                 page_source = page.content()
														
 
															+                if "EMPTY" in page_source:
														
 
															+                    print('没有下一页了， 跳出循环')
														
 
															+                    break
														
 
															+
														
 
															+                print(f'开始获取第 {page_count} 页')
														
 
															+
														
 
															                 title = page.title()
														
 
															                 img_list = re.findall('<meta property="og:image" content="(.*?)"', page_source)
														
@@ -129,7 +137,22 @@ def download_img(load_data, target_file_path):
 
															         while retry:
														
 
															             try:
														
 
															                 resp = httpx.get(img_url, headers={
														
 
															-                    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
														
 
															+                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
														
 
															+                    "Accept-Encoding": "gzip, deflate, br, zstd",
														
 
															+                    "Accept-Language": "zh-CN,zh;q=0.9",
														
 
															+                    "Cache-Control": "max-age=0",
														
 
															+                    "Cookie": "asgfp2=77542c163334cb6fe4f6c38c671acfdd; _ga=GA1.1.1971075315.1723678888; _ga_WF05TQ75CR=GS1.1.1726202265.4.1.1726202301.24.0.0; asgfp2=77542c163334cb6fe4f6c38c671acfdd; sp-chjeuHenj=Po",
														
 
															+                    "Priority": "u=0, i",
														
 
															+                    "Referer": "https://www.kaizty.com/photos/bFh6Njdrc01HM0FxeEhrVFVXM2xlUT09.html?page=9",
														
 
															+                    "Sec-CH-UA": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
														
 
															+                    "Sec-CH-UA-Mobile": "?0",
														
 
															+                    "Sec-CH-UA-Platform": "\"Windows\"",
														
 
															+                    "Sec-Fetch-Dest": "document",
														
 
															+                    "Sec-Fetch-Mode": "navigate",
														
 
															+                    "Sec-Fetch-Site": "same-origin",
														
 
															+                    "Sec-Fetch-User": "?1",
														
 
															+                    "Upgrade-Insecure-Requests": "1",
														
 
															+                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
														
 
															                 })
														
 
															                 with open(img_file_path, 'wb') as f:
														
 
															                     f.write(resp.content)
														
@@ -154,13 +177,13 @@ def save_data(data_item):
 
															     for k, v in data_item.items():
														
 
															         for data in v:
														
 
															             # 检查img_url是否重复
														
 
															-            cursor.execute("SELECT img_url FROM flaticon WHERE img_url = %s", (data['url'],))
														
 
															+            cursor.execute(f"SELECT img_url FROM {target} WHERE img_url = %s", (data['url'],))
														
 
															             if cursor.fetchone() is None:
														
 
															                 # 插入数据
														
 
															-                cursor.execute("""
														
 
															-                    INSERT INTO flaticon (name, target_site, file_title, set_name, serial, download_state, image_suffix, img_url)
														
 
															+                cursor.execute(("""
														
 
															+                    INSERT INTO {target} (name, target_site, file_title, set_name, serial, download_state, image_suffix, img_url)
														
 
															                     VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
														
 
															-                """, (
														
 
															+                """).format(target=target), (
														
 
															                     None,
														
 
															                     target,
														
 
															                     data['file_title'],
														
@@ -252,7 +275,7 @@ def check_psql():
 
															             serial INT,
														
 
															             download_state BOOLEAN,
														
 
															             image_suffix VARCHAR(50),
														
 
															-            img_url VARCHAR(255)
														
 
															+            img_url TEXT
														
 
															         );
														
 
															         """)
														
 
															         print(f"表 '{target}' 创建成功。")