config.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 配置管理模块
  5. """
  6. import os
  7. from pathlib import Path
  8. from typing import List, Optional
  9. from pydantic import BaseModel, Field
  10. class AppConfig(BaseModel):
  11. """应用配置"""
  12. # 基础配置
  13. app_name: str = "EH-Downloader"
  14. app_version: str = "1.0.0"
  15. debug: bool = False
  16. # 服务器配置
  17. host: str = "0.0.0.0"
  18. port: int = 8000
  19. # 数据目录配置
  20. data_dir: str = "data"
  21. downloads_dir: str = "data/downloads"
  22. targets_file: str = "data/targets.txt"
  23. proxy_file: str = "data/proxy.txt"
  24. # 爬虫配置
  25. concurrency: int = 20
  26. max_page: int = 100
  27. retry_per_page: int = 5
  28. retry_per_image: int = 3
  29. timeout: float = 10.0
  30. image_timeout: float = 15.0
  31. # 日志配置
  32. log_level: str = "INFO"
  33. log_format: str = "[%(asctime)s] [%(levelname)s] %(message)s"
  34. # 文件清理配置
  35. cleanup_patterns: List[str] = ["**/*.log", "**/*.json"]
  36. cleanup_exclude: List[str] = ["data/targets.txt"]
  37. def __init__(self, **kwargs):
  38. super().__init__(**kwargs)
  39. # 确保目录存在
  40. self._ensure_directories()
  41. def _ensure_directories(self):
  42. """确保必要的目录存在"""
  43. Path(self.data_dir).mkdir(exist_ok=True)
  44. Path(self.downloads_dir).mkdir(parents=True, exist_ok=True)
  45. @property
  46. def targets_path(self) -> Path:
  47. """获取targets文件路径"""
  48. return Path(self.targets_file)
  49. @property
  50. def proxy_path(self) -> Path:
  51. """获取proxy文件路径"""
  52. return Path(self.proxy_file)
  53. def get_proxies(self) -> List[str]:
  54. """读取代理列表"""
  55. if not self.proxy_path.exists():
  56. return ["127.0.0.1:7890"]
  57. try:
  58. with open(self.proxy_path, 'r', encoding='utf-8') as f:
  59. proxies = [line.strip() for line in f.readlines() if line.strip()]
  60. return proxies if proxies else ["127.0.0.1:7890"]
  61. except Exception:
  62. return ["127.0.0.1:7890"]
  63. def get_targets(self) -> List[str]:
  64. """读取目标URL列表"""
  65. if not self.targets_path.exists():
  66. return []
  67. try:
  68. with open(self.targets_path, 'r', encoding='utf-8') as f:
  69. urls = [line.strip() for line in f.readlines() if line.strip()]
  70. # 过滤掉注释行
  71. return [url for url in urls if url and not url.startswith('#')]
  72. except Exception:
  73. return []
  74. # 全局配置实例
  75. config = AppConfig()