# -*- coding: utf-8 -*-
# https://www.zhuimh.com/comic/419025
# 获取 zhuimh
import time
import os
import psycopg2
from playwright.sync_api import sync_playwright
class Zhuimh:
def __init__(self):
self.comico_id = 419025
self.base_url = 'https://www.zhuimh.com'
self.href_url = '/comic/'
self.target_url = self.base_url + self.href_url + str(self.comico_id)
def window_scroll(self, page):
# 获取页面高度
page_height = page.evaluate('() => document.body.scrollHeight')
# 获取视口高度
viewport_height = page.evaluate('() => window.innerHeight')
# 计算需要滚动的距离
scroll_distance = page_height - viewport_height
# 模拟鼠标滚轮操作,滚动到页面底部
page.mouse.wheel(0, scroll_distance)
def get_chapter_img(self, chapter_name_list, chapter_url_list):
for chapter_name, chapter_url in zip(chapter_name_list, chapter_url_list):
print(f'章节名: {chapter_name}, 章节url: {chapter_url}')
with sync_playwright() as playwright:
browser = playwright.chromium.launch(headless=True) # headless=False 可以开启浏览器界面,便于调试
page = browser.new_page()
page.goto(chapter_url)
time.sleep(1)
self.window_scroll(page)
# for _ in range(5):
# page.evaluate('''() => {window.scrollTo(0, document.body.scrollHeight);}''')
#
# time.sleep(0.2)
#
# page.wait_for_timeout(1000)
time.sleep(1)
element = page.query_selector('body > div.chpater-images')
links = element.query_selector_all('img')
chpater_img_links = []
for link in links:
# 获取每个 标签的 href 属性
img_src = link.get_attribute('src')
if 'blob:' in img_src:
chpater_img_links.append(img_src)
print(chpater_img_links)
def get_chapter(self):
with sync_playwright() as playwright:
browser = playwright.chromium.launch(headless=True) # headless=False 可以开启浏览器界面,便于调试
page = browser.new_page()
page.goto(self.target_url)
title = page.title()
target_name = title.split('漫画免费')[0]
current_path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(current_path, 'zhuimh', target_name)
if not os.path.exists(path):
os.makedirs(path)
element = page.query_selector('body > div.tbox.tabs > div.tabs_block > ul')
chapter_name_list = []
chapter_url_list = []
if element:
# 执行你需要的操作,例如获取元素的文本内容
text = element.text_content()
for line in text.split('\n'):
if line.strip():
chapter_name_list.append(line.strip())
links = element.query_selector_all('a')
for link in links:
# 获取每个 标签的 href 属性
href = link.get_attribute('href')
if href:
chapter_url_list.append(self.base_url + href)
else:
print('没有找到 href 属性')
else:
print('元素未找到')
exit(0)
return chapter_name_list, chapter_url_list
def main(self):
chapter_name_list, chapter_url_list = self.get_chapter()
self.get_chapter_img(chapter_name_list, chapter_url_list)
if __name__ == '__main__':
zhuimh = Zhuimh()
zhuimh.main()