jack il y a 1 an
Parent
commit
64363e90fa
2 fichiers modifiés avec 11 ajouts et 15 suppressions
  1. 2 0
      git_pull.sh
  2. 9 15
      kaizty/kaizty.py

+ 2 - 0
git_pull.sh

@@ -0,0 +1,2 @@
+#!/bin/bash
+git fetch --all && git reset --hard origin/main

+ 9 - 15
kaizty/kaizty.py

@@ -14,7 +14,7 @@ import httpx
 from playwright.sync_api import sync_playwright
 
 target = 'kaizty'
-step = 4  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
+step = 1  # 1 = 获取img链接, 2 = 下载图片, 3 = 1 + 2, 4 = 调试
 local_proxy = 1
 title_selector = '#pack-view__inner > section.pack-view__header > h1'  # 获取标题选择器
 img_selector = '#pack-view__inner > section.search-result > ul > li:nth-child({}) > div > a > img'  # 获取图片的url
@@ -53,26 +53,19 @@ def open_browser(target_urls):
 
             img_sequence_num = 1
 
-            for page_count in range(1, 2):
-                # 检查一下当前页面是不是 404
-                try:
-                    page.wait_for_selector(not_find_page_selector, state="attached", timeout=2000)
-                    print(f'总页数是 {page_count - 1} 在 url: {goto_url}')
-                    break
-                except:
-                    pass
-
+            for page_count in range(30, 31):
                 try:
                     goto_url = target_url + pages.format(page_count)
                     page.goto(goto_url, timeout=5000)
+                    page.wait_for_load_state('domcontentloaded')
                 except Exception as e:
                     print(e)
                     print(f'页面加载失败:url:{goto_url}')
 
-                page.wait_for_load_state('domcontentloaded')
+                page_source = page.content()
 
                 title = page.title()
-                page_source = page.content()
+
                 img_list = re.findall('<meta property="og:image" content="(.*?)"', page_source)
 
                 title = clean_string(title)
@@ -89,11 +82,12 @@ def open_browser(target_urls):
                     })
                     img_sequence_num += 1
 
-            all_data[title] = urls
-
             page.close()
             browser.close()
 
+        if urls:
+            all_data[title] = urls
+
     # 获取所有 url 数据之后, 存数据库
     return all_data
 
@@ -336,6 +330,6 @@ if __name__ == "__main__":
         print('下载完成, 程序退出')
     elif step == 4:
         #  调试
-        all_data = open_browser(targets)
+        pass
     else:
         pass