1 рік тому · 8a79545a91
--- a/flaticon/flaticon.py
+++ b/flaticon/flaticon.py
@@ -329,5 +329,16 @@ if __name__ == "__main__":
 
				         #  保存 img 链接
			
 
				         all_data = open_browser(targets)
			
 
				         save_data(all_data)
			
 
				+
			
 
				+        # 开始读取数据
			
 
				+        load_data = load_data()
			
 
				+
			
 
				+        # 开始下载 img
			
 
				+        target_file_path = check_local_downloads_dir()
			
 
				+        download_img(load_data, target_file_path)
			
 
				+        print('下载完成, 程序退出')
			
 
				+    elif step == 4:
			
 
				+        #  调试
			
 
				+        pass
			
 
				     else:
			
 
				         pass
			
--- a/kaizty/kaizty.py
+++ b/kaizty/kaizty.py
@@ -14,8 +14,8 @@ import httpx
 
				 from playwright.sync_api import sync_playwright
			
 
				 
			
 
				 target = 'kaizty'
			
 
				-step = 1  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2
			
 
				-local_proxy = 0
			
 
				+step = 4  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
			
 
				+local_proxy = 1
			
 
				 title_selector = '#pack-view__inner > section.pack-view__header > h1'  # 获取标题选择器
			
 
				 img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'  # 获取图片的url
			
 
				 img_count_selector = '#pack-view__inner > section.pack-view__header > p'  # 获取图片总数选择器
			
@@ -36,7 +36,7 @@ def open_browser(target_urls):
 
				     all_data = {}
			
 
				 
			
 
				     for target_url in target_urls:
			
 
				-        pages = '/{}'
			
 
				+        pages = '?page={}'
			
 
				         urls = []
			
 
				         title = ''  # 存放当前页面的title
			
 
				         with sync_playwright() as playwright:
			
@@ -52,7 +52,8 @@ def open_browser(target_urls):
 
				             page = context.new_page()
			
 
				 
			
 
				             img_sequence_num = 1
			
 
				-            for page_count in range(1, 999):
			
 
				+
			
 
				+            for page_count in range(1, 2):
			
 
				                 # 检查一下当前页面是不是 404
			
 
				                 try:
			
 
				                     page.wait_for_selector(not_find_page_selector, state="attached", timeout=2000)
			
@@ -68,46 +69,31 @@ def open_browser(target_urls):
 
				                     print(e)
			
 
				                     print(f'页面加载失败：url：{goto_url}')
			
 
				 
			
 
				-                if page_count == 1:
			
 
				-                    # 获取title
			
 
				-                    page.wait_for_selector(title_selector, state="attached", timeout=10000)
			
 
				-                    title = page.query_selector(title_selector).inner_text()
			
 
				-
			
 
				-                    img_count = page.query_selector(img_count_selector).inner_text()
			
 
				-                    img_count = int(img_count.split(' ')[0])
			
 
				-
			
 
				-                    invalid_chars = ['<', '>', ':', '"', '/', '\\', '|', '?', '*', '.', '  ', 'Icon Pack ']
			
 
				-                    for char in invalid_chars:
			
 
				-                        title = title.replace(char, '')
			
 
				-
			
 
				-                for i in range(1, img_count + 1):
			
 
				-                    # 选择所有的<a>标签
			
 
				-                    elements = page.query_selector_all(img_selector.format(i))
			
 
				-
			
 
				-                    # 遍历所有<a>标签，提取href属性
			
 
				-                    for element in elements:
			
 
				-                        src = element.get_attribute('src')
			
 
				-                        if src:
			
 
				-                            src = src.replace('/128/', '/512/')
			
 
				-                            suffix = src.split('.')[-1]
			
 
				-                            sequence = str(img_sequence_num).zfill(3)
			
 
				-                            urls.append({
			
 
				-                                'url': src,
			
 
				-                                'file_title': title,
			
 
				-                                'serial': sequence,
			
 
				-                                'img': f'{title}_{sequence}',
			
 
				-                                'suffix': suffix
			
 
				-                            })
			
 
				-                            img_sequence_num += 1
			
 
				-                            break
			
 
				-
			
 
				-            print(f'所有图片URL已获取。总共图片 {len(urls)}')
			
 
				+                page.wait_for_load_state('domcontentloaded')
			
 
				 
			
 
				-            page.close()
			
 
				-            browser.close()
			
 
				+                title = page.title()
			
 
				+                page_source = page.content()
			
 
				+                img_list = re.findall('<meta property="og:image" content="(.*?)"', page_source)
			
 
				+
			
 
				+                title = clean_string(title)
			
 
				+
			
 
				+                for img_url in img_list:
			
 
				+                    suffix = img_url.split('.')[-1]
			
 
				+                    sequence = str(img_sequence_num).zfill(3)
			
 
				+                    urls.append({
			
 
				+                        'url': img_url,
			
 
				+                        'file_title': title,
			
 
				+                        'serial': sequence,
			
 
				+                        'img': f'{title}_{sequence}',
			
 
				+                        'suffix': suffix
			
 
				+                    })
			
 
				+                    img_sequence_num += 1
			
 
				 
			
 
				             all_data[title] = urls
			
 
				 
			
 
				+            page.close()
			
 
				+            browser.close()
			
 
				+
			
 
				     # 获取所有 url 数据之后, 存数据库
			
 
				     return all_data
			
 
				 
			
@@ -340,5 +326,16 @@ if __name__ == "__main__":
 
				         #  保存 img 链接
			
 
				         all_data = open_browser(targets)
			
 
				         save_data(all_data)
			
 
				+
			
 
				+        # 开始读取数据
			
 
				+        load_data = load_data()
			
 
				+
			
 
				+        # 开始下载 img
			
 
				+        target_file_path = check_local_downloads_dir()
			
 
				+        download_img(load_data, target_file_path)
			
 
				+        print('下载完成, 程序退出')
			
 
				+    elif step == 4:
			
 
				+        #  调试
			
 
				+        all_data = open_browser(targets)
			
 
				     else:
			
 
				         pass
			
--- a/kaizty/target_link.txt
+++ b/kaizty/target_link.txt
@@ -1,2 +1 @@
 
				-https://www.flaticon.com/packs/editorial-design-24
			
 
				-https://www.flaticon.com/packs/space-347
			
 
				+https://www.kaizty.com//photos/L2lBQ200aE0vOVNmUGcydzhhT296Zz09.html