il y a 1 an · bbbb72d02e
--- a/manual/zhuimh/main.py
+++ b/manual/zhuimh/main.py
@@ -0,0 +1,114 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# https://www.zhuimh.com/comic/419025
			
 
				+# 获取 zhuimh
			
 
				+import time
			
 
				+import os
			
 
				+import psycopg2
			
 
				+
			
 
				+from playwright.sync_api import sync_playwright
			
 
				+
			
 
				+
			
 
				+class Zhuimh:
			
 
				+    def __init__(self):
			
 
				+        self.comico_id = 419025
			
 
				+        self.base_url = 'https://www.zhuimh.com'
			
 
				+        self.href_url = '/comic/'
			
 
				+        self.target_url = self.base_url + self.href_url + str(self.comico_id)
			
 
				+
			
 
				+    def window_scroll(self, page):
			
 
				+        # 获取页面高度
			
 
				+        page_height = page.evaluate('() => document.body.scrollHeight')
			
 
				+        # 获取视口高度
			
 
				+        viewport_height = page.evaluate('() => window.innerHeight')
			
 
				+
			
 
				+        # 计算需要滚动的距离
			
 
				+        scroll_distance = page_height - viewport_height
			
 
				+
			
 
				+        # 模拟鼠标滚轮操作，滚动到页面底部
			
 
				+        page.mouse.wheel(0, scroll_distance)
			
 
				+
			
 
				+    def get_chapter_img(self, chapter_name_list, chapter_url_list):
			
 
				+        for chapter_name, chapter_url in zip(chapter_name_list, chapter_url_list):
			
 
				+            print(f'章节名: {chapter_name}, 章节url: {chapter_url}')
			
 
				+            with sync_playwright() as playwright:
			
 
				+                browser = playwright.chromium.launch(headless=True)  # headless=False 可以开启浏览器界面，便于调试
			
 
				+                page = browser.new_page()
			
 
				+
			
 
				+                page.goto(chapter_url)
			
 
				+
			
 
				+                time.sleep(1)
			
 
				+
			
 
				+                self.window_scroll(page)
			
 
				+
			
 
				+                # for _ in range(5):
			
 
				+                #     page.evaluate('''() => {window.scrollTo(0, document.body.scrollHeight);}''')
			
 
				+                #
			
 
				+                #     time.sleep(0.2)
			
 
				+                #
			
 
				+                # page.wait_for_timeout(1000)
			
 
				+
			
 
				+                time.sleep(1)
			
 
				+
			
 
				+                element = page.query_selector('body > div.chpater-images')
			
 
				+
			
 
				+                links = element.query_selector_all('img')
			
 
				+
			
 
				+                chpater_img_links = []
			
 
				+
			
 
				+                for link in links:
			
 
				+                    # 获取每个 <a> 标签的 href 属性
			
 
				+                    img_src = link.get_attribute('src')
			
 
				+                    if 'blob:' in img_src:
			
 
				+                        chpater_img_links.append(img_src)
			
 
				+
			
 
				+                print(chpater_img_links)
			
 
				+
			
 
				+    def get_chapter(self):
			
 
				+        with sync_playwright() as playwright:
			
 
				+            browser = playwright.chromium.launch(headless=True)  # headless=False 可以开启浏览器界面，便于调试
			
 
				+            page = browser.new_page()
			
 
				+
			
 
				+            page.goto(self.target_url)
			
 
				+
			
 
				+            title = page.title()
			
 
				+            target_name = title.split('漫画免费')[0]
			
 
				+            current_path = os.path.dirname(os.path.abspath(__file__))
			
 
				+
			
 
				+            path = os.path.join(current_path, 'zhuimh', target_name)
			
 
				+            if not os.path.exists(path):
			
 
				+                os.makedirs(path)
			
 
				+
			
 
				+            element = page.query_selector('body > div.tbox.tabs > div.tabs_block > ul')
			
 
				+
			
 
				+            chapter_name_list = []
			
 
				+            chapter_url_list = []
			
 
				+
			
 
				+            if element:
			
 
				+                # 执行你需要的操作，例如获取元素的文本内容
			
 
				+                text = element.text_content()
			
 
				+                for line in text.split('\n'):
			
 
				+                    if line.strip():
			
 
				+                        chapter_name_list.append(line.strip())
			
 
				+
			
 
				+                links = element.query_selector_all('a')
			
 
				+                for link in links:
			
 
				+                    # 获取每个 <a> 标签的 href 属性
			
 
				+                    href = link.get_attribute('href')
			
 
				+                    if href:
			
 
				+                        chapter_url_list.append(self.base_url + href)
			
 
				+                    else:
			
 
				+                        print('没有找到 href 属性')
			
 
				+            else:
			
 
				+                print('元素未找到')
			
 
				+                exit(0)
			
 
				+
			
 
				+            return chapter_name_list, chapter_url_list
			
 
				+
			
 
				+    def main(self):
			
 
				+        chapter_name_list, chapter_url_list = self.get_chapter()
			
 
				+        self.get_chapter_img(chapter_name_list, chapter_url_list)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    zhuimh = Zhuimh()
			
 
				+    zhuimh.main()