| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # -*- coding: UTF-8 -*-
- import re
- import urllib.parse
- import httpx
- url = "https://www.fantasyfactory.xyz"
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
- }
- response = httpx.get(url, headers=headers)
- re_inter_urls = re.findall('<td class="fb-n"><a href="(.*?)"', response.text)
- replacements = {
- ".": "_",
- "/": "",
- }
- all_jpg = {}
- for u in re_inter_urls:
- inter_url = url + u
- inter_response = httpx.get(inter_url, headers=headers)
- re_inter_response = re.findall('<td class="fb-n"><a href="(.*?)"', inter_response.text)
- for j in re_inter_response:
- if j != re_inter_response[0]:
- if "jpg" in j:
- for old, new in replacements.items():
- u = u.replace(old, new)
- print(url + j)
- if u not in all_jpg:
- all_jpg[u] = [url + j]
- else:
- all_jpg[u].append(url + j)
- else:
- jj = urllib.parse.unquote(j)
- inter_inter_response = url + jj
- inter_inter_jpg_response = httpx.get(inter_inter_response, headers=headers)
- re_inter_inter_jpg_response = re.findall('<td class="fb-n"><a href="(.*?)"',
- inter_inter_jpg_response.text)
- for inter_jpg in re_inter_inter_jpg_response:
- if inter_jpg != re_inter_inter_jpg_response[0]:
- if "jpg" in inter_jpg:
- jj_file = jj.split('/')[2]
- for old, new in replacements.items():
- jj_file = jj_file.replace(old, new)
- print(url + inter_jpg)
- if jj_file not in all_jpg:
- all_jpg[jj_file] = [url + inter_jpg]
- else:
- all_jpg[jj_file].append(url + inter_jpg)
- with open('test.txt', 'w') as f:
- f.write(str(all_jpg))
|