|
|
@@ -0,0 +1,55 @@
|
|
|
+# -*- coding: UTF-8 -*-
|
|
|
+import re
|
|
|
+import urllib.parse
|
|
|
+import httpx
|
|
|
+
|
|
|
+url = "https://www.fantasyfactory.xyz"
|
|
|
+
|
|
|
+headers = {
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
|
+}
|
|
|
+
|
|
|
+response = httpx.get(url, headers=headers)
|
|
|
+re_inter_urls = re.findall('<td class="fb-n"><a href="(.*?)"', response.text)
|
|
|
+
|
|
|
+replacements = {
|
|
|
+ ".": "_",
|
|
|
+ "/": "",
|
|
|
+}
|
|
|
+
|
|
|
+all_jpg = {}
|
|
|
+
|
|
|
+for u in re_inter_urls:
|
|
|
+ inter_url = url + u
|
|
|
+ inter_response = httpx.get(inter_url, headers=headers)
|
|
|
+ re_inter_response = re.findall('<td class="fb-n"><a href="(.*?)"', inter_response.text)
|
|
|
+ for j in re_inter_response:
|
|
|
+ if j != re_inter_response[0]:
|
|
|
+ if "jpg" in j:
|
|
|
+ for old, new in replacements.items():
|
|
|
+ u = u.replace(old, new)
|
|
|
+ print(url + j)
|
|
|
+ if u not in all_jpg:
|
|
|
+ all_jpg[u] = [url + j]
|
|
|
+ else:
|
|
|
+ all_jpg[u].append(url + j)
|
|
|
+ else:
|
|
|
+ jj = urllib.parse.unquote(j)
|
|
|
+ inter_inter_response = url + jj
|
|
|
+ inter_inter_jpg_response = httpx.get(inter_inter_response, headers=headers)
|
|
|
+ re_inter_inter_jpg_response = re.findall('<td class="fb-n"><a href="(.*?)"',
|
|
|
+ inter_inter_jpg_response.text)
|
|
|
+ for inter_jpg in re_inter_inter_jpg_response:
|
|
|
+ if inter_jpg != re_inter_inter_jpg_response[0]:
|
|
|
+ if "jpg" in inter_jpg:
|
|
|
+ jj_file = jj.split('/')[2]
|
|
|
+ for old, new in replacements.items():
|
|
|
+ jj_file = jj_file.replace(old, new)
|
|
|
+ print(url + inter_jpg)
|
|
|
+ if jj_file not in all_jpg:
|
|
|
+ all_jpg[jj_file] = [url + inter_jpg]
|
|
|
+ else:
|
|
|
+ all_jpg[jj_file].append(url + inter_jpg)
|
|
|
+
|
|
|
+with open('test.txt', 'w') as f:
|
|
|
+ f.write(str(all_jpg))
|