Browse Source

更新 'eh_art.py'

toor 1 year ago
parent
commit
116d52e92e
1 changed files with 10 additions and 3 deletions
  1. 10 3
      eh_art.py

+ 10 - 3
eh_art.py

@@ -35,14 +35,21 @@ with sync_playwright() as playwright:
     # print(content)
 
     view_img_list = re.findall('no-repeat"><a href="(.*?)">', content)
+    all_view_img_list = [i for i in view_img_list]
 
-    max_page_mun = re.findall('onclick="return false">(.*?)</a></td>', content)
+    max_page_mun = re.findall('onclick="return false">(\\d*?)</a></td>', content)
 
     if max_page_mun:
-        max_page_mun = int(max_page_mun[0])
+        max_page_mun = int(max(max_page_mun))
 
-    print(max_page_mun)
+    for view_page in range(1, max_page_mun):
+        page.goto(base_url + href_url + change_page + str(view_page))
+        content = page.content()
+        view_img_list = re.findall('no-repeat"><a href="(.*?)">', content)
+        for i in view_img_list:
+            all_view_img_list.append(i)
 
+    print(list(set(all_view_img_list)))
     # # 访问内层图片链接
     # for n, img_url in enumerate(view_img_list):
     #     page.goto(img_url)