1 year ago · bbbb72d02e
--- a/manual/zhuimh/main.py
+++ b/manual/zhuimh/main.py
@@ -0,0 +1,114 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# https://www.zhuimh.com/comic/419025
														
 
															+# 获取 zhuimh
														
 
															+import time
														
 
															+import os
														
 
															+import psycopg2
														
 
															+
														
 
															+from playwright.sync_api import sync_playwright
														
 
															+
														
 
															+
														
 
															+class Zhuimh:
														
 
															+    def __init__(self):
														
 
															+        self.comico_id = 419025
														
 
															+        self.base_url = 'https://www.zhuimh.com'
														
 
															+        self.href_url = '/comic/'
														
 
															+        self.target_url = self.base_url + self.href_url + str(self.comico_id)
														
 
															+
														
 
															+    def window_scroll(self, page):
														
 
															+        # 获取页面高度
														
 
															+        page_height = page.evaluate('() => document.body.scrollHeight')
														
 
															+        # 获取视口高度
														
 
															+        viewport_height = page.evaluate('() => window.innerHeight')
														
 
															+
														
 
															+        # 计算需要滚动的距离
														
 
															+        scroll_distance = page_height - viewport_height
														
 
															+
														
 
															+        # 模拟鼠标滚轮操作，滚动到页面底部
														
 
															+        page.mouse.wheel(0, scroll_distance)
														
 
															+
														
 
															+    def get_chapter_img(self, chapter_name_list, chapter_url_list):
														
 
															+        for chapter_name, chapter_url in zip(chapter_name_list, chapter_url_list):
														
 
															+            print(f'章节名: {chapter_name}, 章节url: {chapter_url}')
														
 
															+            with sync_playwright() as playwright:
														
 
															+                browser = playwright.chromium.launch(headless=True)  # headless=False 可以开启浏览器界面，便于调试
														
 
															+                page = browser.new_page()
														
 
															+
														
 
															+                page.goto(chapter_url)
														
 
															+
														
 
															+                time.sleep(1)
														
 
															+
														
 
															+                self.window_scroll(page)
														
 
															+
														
 
															+                # for _ in range(5):
														
 
															+                #     page.evaluate('''() => {window.scrollTo(0, document.body.scrollHeight);}''')
														
 
															+                #
														
 
															+                #     time.sleep(0.2)
														
 
															+                #
														
 
															+                # page.wait_for_timeout(1000)
														
 
															+
														
 
															+                time.sleep(1)
														
 
															+
														
 
															+                element = page.query_selector('body > div.chpater-images')
														
 
															+
														
 
															+                links = element.query_selector_all('img')
														
 
															+
														
 
															+                chpater_img_links = []
														
 
															+
														
 
															+                for link in links:
														
 
															+                    # 获取每个 <a> 标签的 href 属性
														
 
															+                    img_src = link.get_attribute('src')
														
 
															+                    if 'blob:' in img_src:
														
 
															+                        chpater_img_links.append(img_src)
														
 
															+
														
 
															+                print(chpater_img_links)
														
 
															+
														
 
															+    def get_chapter(self):
														
 
															+        with sync_playwright() as playwright:
														
 
															+            browser = playwright.chromium.launch(headless=True)  # headless=False 可以开启浏览器界面，便于调试
														
 
															+            page = browser.new_page()
														
 
															+
														
 
															+            page.goto(self.target_url)
														
 
															+
														
 
															+            title = page.title()
														
 
															+            target_name = title.split('漫画免费')[0]
														
 
															+            current_path = os.path.dirname(os.path.abspath(__file__))
														
 
															+
														
 
															+            path = os.path.join(current_path, 'zhuimh', target_name)
														
 
															+            if not os.path.exists(path):
														
 
															+                os.makedirs(path)
														
 
															+
														
 
															+            element = page.query_selector('body > div.tbox.tabs > div.tabs_block > ul')
														
 
															+
														
 
															+            chapter_name_list = []
														
 
															+            chapter_url_list = []
														
 
															+
														
 
															+            if element:
														
 
															+                # 执行你需要的操作，例如获取元素的文本内容
														
 
															+                text = element.text_content()
														
 
															+                for line in text.split('\n'):
														
 
															+                    if line.strip():
														
 
															+                        chapter_name_list.append(line.strip())
														
 
															+
														
 
															+                links = element.query_selector_all('a')
														
 
															+                for link in links:
														
 
															+                    # 获取每个 <a> 标签的 href 属性
														
 
															+                    href = link.get_attribute('href')
														
 
															+                    if href:
														
 
															+                        chapter_url_list.append(self.base_url + href)
														
 
															+                    else:
														
 
															+                        print('没有找到 href 属性')
														
 
															+            else:
														
 
															+                print('元素未找到')
														
 
															+                exit(0)
														
 
															+
														
 
															+            return chapter_name_list, chapter_url_list
														
 
															+
														
 
															+    def main(self):
														
 
															+        chapter_name_list, chapter_url_list = self.get_chapter()
														
 
															+        self.get_chapter_img(chapter_name_list, chapter_url_list)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    zhuimh = Zhuimh()
														
 
															+    zhuimh.main()