jack преди 1 година
родител
ревизия
57e8f94d16
променени са 6 файла, в които са добавени 130 реда и са изтрити 59 реда
  1. 3 59
      .gitignore
  2. 0 0
      fantasyfactory.py
  3. 18 0
      requirements.txt
  4. 0 0
      test.txt
  5. 55 0
      test01.py
  6. 54 0
      test02.py

+ 3 - 59
.gitignore

@@ -1,60 +1,4 @@
-# ---> Python
-# Byte-compiled / optimized / DLL files
+.DS_Store
 __pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*,cover
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
+*.pyc
+.idea

+ 0 - 0
fantasyfactory.py


+ 18 - 0
requirements.txt

@@ -0,0 +1,18 @@
+anyio==4.2.0
+attrs==23.2.0
+certifi==2024.2.2
+cffi==1.16.0
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+idna==3.6
+outcome==1.3.0.post0
+pycparser==2.21
+PySocks==1.7.1
+sniffio==1.3.0
+sortedcontainers==2.4.0
+trio==0.24.0
+trio-websocket==0.11.1
+typing_extensions==4.9.0
+urllib3==2.2.0
+wsproto==1.2.0

Файловите разлики са ограничени, защото са твърде много
+ 0 - 0
test.txt


+ 55 - 0
test01.py

@@ -0,0 +1,55 @@
+# -*- coding: UTF-8 -*-
+import re
+import urllib.parse
+import httpx
+
+url = "https://www.fantasyfactory.xyz"
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+
+response = httpx.get(url, headers=headers)
+re_inter_urls = re.findall('<td class="fb-n"><a href="(.*?)"', response.text)
+
+replacements = {
+    ".": "_",
+    "/": "",
+}
+
+all_jpg = {}
+
+for u in re_inter_urls:
+    inter_url = url + u
+    inter_response = httpx.get(inter_url, headers=headers)
+    re_inter_response = re.findall('<td class="fb-n"><a href="(.*?)"', inter_response.text)
+    for j in re_inter_response:
+        if j != re_inter_response[0]:
+            if "jpg" in j:
+                for old, new in replacements.items():
+                    u = u.replace(old, new)
+                print(url + j)
+                if u not in all_jpg:
+                    all_jpg[u] = [url + j]
+                else:
+                    all_jpg[u].append(url + j)
+            else:
+                jj = urllib.parse.unquote(j)
+                inter_inter_response = url + jj
+                inter_inter_jpg_response = httpx.get(inter_inter_response, headers=headers)
+                re_inter_inter_jpg_response = re.findall('<td class="fb-n"><a href="(.*?)"',
+                                                         inter_inter_jpg_response.text)
+                for inter_jpg in re_inter_inter_jpg_response:
+                    if inter_jpg != re_inter_inter_jpg_response[0]:
+                        if "jpg" in inter_jpg:
+                            jj_file = jj.split('/')[2]
+                            for old, new in replacements.items():
+                                jj_file = jj_file.replace(old, new)
+                            print(url + inter_jpg)
+                            if jj_file not in all_jpg:
+                                all_jpg[jj_file] = [url + inter_jpg]
+                            else:
+                                all_jpg[jj_file].append(url + inter_jpg)
+
+with open('test.txt', 'w') as f:
+    f.write(str(all_jpg))

+ 54 - 0
test02.py

@@ -0,0 +1,54 @@
+# -*- coding: UTF-8 -*-
+import os
+import time
+from random import randint
+import httpx
+from urllib.parse import unquote
+import threading
+
+text = ''
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+}
+
+
+def get_jpg(file_name, urls):
+    jpg_path = os.path.join(save_path, file_name)
+    if not os.path.exists(jpg_path):
+        os.makedirs(jpg_path)
+
+    for url in urls:
+        jpg_name = url.split('/')[-1]
+        jpg_save_path = os.path.join(jpg_path, jpg_name)
+        if not os.path.exists(jpg_save_path):
+            time.sleep(randint(3, 5))
+            jpg = httpx.get(url, headers=headers)
+            if jpg.status_code == 200:
+                print(f'正在保存 {url} 到 {file_name}')
+                with open(jpg_save_path, 'wb') as f:
+                    f.write(jpg.content)
+        else:
+            print(f'{jpg_name} 已存在, 跳过')
+
+
+with open('test.txt', 'r') as file:
+    lines = file.readlines()
+
+for line in lines:
+    text += unquote(line.strip())
+
+save_path = os.path.join(os.getcwd(), 'fantasy')
+if not os.path.exists(save_path):
+    os.mkdir(save_path)
+
+threads = []
+for file_name, urls in eval(text).items():
+    t = threading.Thread(target=get_jpg, args=(file_name, urls,))
+    t.start()
+    threads.append(t)
+
+for t in threads:
+    t.join()
+
+print("all done")

Някои файлове не бяха показани, защото твърде много файлове са промени