|
@@ -35,14 +35,21 @@ with sync_playwright() as playwright:
|
|
|
# print(content)
|
|
# print(content)
|
|
|
|
|
|
|
|
view_img_list = re.findall('no-repeat"><a href="(.*?)">', content)
|
|
view_img_list = re.findall('no-repeat"><a href="(.*?)">', content)
|
|
|
|
|
+ all_view_img_list = [i for i in view_img_list]
|
|
|
|
|
|
|
|
- max_page_mun = re.findall('onclick="return false">(.*?)</a></td>', content)
|
|
|
|
|
|
|
+ max_page_mun = re.findall('onclick="return false">(\\d*?)</a></td>', content)
|
|
|
|
|
|
|
|
if max_page_mun:
|
|
if max_page_mun:
|
|
|
- max_page_mun = int(max_page_mun[0])
|
|
|
|
|
|
|
+ max_page_mun = int(max(max_page_mun))
|
|
|
|
|
|
|
|
- print(max_page_mun)
|
|
|
|
|
|
|
+ for view_page in range(1, max_page_mun):
|
|
|
|
|
+ page.goto(base_url + href_url + change_page + str(view_page))
|
|
|
|
|
+ content = page.content()
|
|
|
|
|
+ view_img_list = re.findall('no-repeat"><a href="(.*?)">', content)
|
|
|
|
|
+ for i in view_img_list:
|
|
|
|
|
+ all_view_img_list.append(i)
|
|
|
|
|
|
|
|
|
|
+ print(list(set(all_view_img_list)))
|
|
|
# # 访问内层图片链接
|
|
# # 访问内层图片链接
|
|
|
# for n, img_url in enumerate(view_img_list):
|
|
# for n, img_url in enumerate(view_img_list):
|
|
|
# page.goto(img_url)
|
|
# page.goto(img_url)
|