| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 配置管理模块
- """
- import os
- from pathlib import Path
- from typing import List, Optional
- from pydantic import BaseModel, Field
- class AppConfig(BaseModel):
- """应用配置"""
- # 基础配置
- app_name: str = "EH-Downloader"
- app_version: str = "1.0.0"
- debug: bool = False
-
- # 服务器配置
- host: str = "0.0.0.0"
- port: int = 8000
-
- # 数据目录配置
- data_dir: str = "data"
- downloads_dir: str = "data/downloads"
- targets_file: str = "data/targets.txt"
- proxy_file: str = "data/proxy.txt"
-
- # 爬虫配置
- concurrency: int = 20
- max_page: int = 100
- retry_per_page: int = 5
- retry_per_image: int = 3
- timeout: float = 10.0
- image_timeout: float = 15.0
-
- # 日志配置
- log_level: str = "INFO"
- log_format: str = "[%(asctime)s] [%(levelname)s] %(message)s"
-
- # 文件清理配置
- cleanup_patterns: List[str] = ["**/*.log", "**/*.json"]
- cleanup_exclude: List[str] = ["data/targets.txt"]
-
- def __init__(self, **kwargs):
- super().__init__(**kwargs)
- # 确保目录存在
- self._ensure_directories()
-
- def _ensure_directories(self):
- """确保必要的目录存在"""
- Path(self.data_dir).mkdir(exist_ok=True)
- Path(self.downloads_dir).mkdir(parents=True, exist_ok=True)
-
- @property
- def targets_path(self) -> Path:
- """获取targets文件路径"""
- return Path(self.targets_file)
-
- @property
- def proxy_path(self) -> Path:
- """获取proxy文件路径"""
- return Path(self.proxy_file)
-
- def get_proxies(self) -> List[str]:
- """读取代理列表"""
- if not self.proxy_path.exists():
- return ["127.0.0.1:7890"]
-
- try:
- with open(self.proxy_path, 'r', encoding='utf-8') as f:
- proxies = [line.strip() for line in f.readlines() if line.strip()]
- return proxies if proxies else ["127.0.0.1:7890"]
- except Exception:
- return ["127.0.0.1:7890"]
-
- def get_targets(self) -> List[str]:
- """读取目标URL列表"""
- if not self.targets_path.exists():
- return []
-
- try:
- with open(self.targets_path, 'r', encoding='utf-8') as f:
- urls = [line.strip() for line in f.readlines() if line.strip()]
- # 过滤掉注释行
- return [url for url in urls if url and not url.startswith('#')]
- except Exception:
- return []
- # 全局配置实例
- config = AppConfig()
|