# -*- coding: utf-8 -*- ''' 获取ip代理 ''' import os import sys sys.path.append(os.path.join(os.getcwd().split('auto_news_scheduler')[0], 'auto_news_scheduler')) from datetime import datetime import time import re import httpx from tools_mongo_handle import MongoHandle from tools_logs_handle import LogsHandle class IpProxy(object): def __init__(self): self.log_handle = LogsHandle() self.now_day = time.strftime('%Y-%m-%d', time.localtime()) db = 'free_ip' collection = 'free_ip' self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=True, auto_remove=0) def main(self): self.log_handle.logs_write('get_free_ip_proxy', '开始获取免费ip', 'start', False) text = self.request() result_list = self.re_data(text) self.db(result_list) self.log_handle.logs_write('get_free_ip_proxy', '获取免费ip已完成', 'done', False) def request(self): print('开始获取免费代理ip') url = 'https://www.dailiproxy.com/cn-free/' headers = { 'User-Agent': 'Mozilla/5.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6' } response = httpx.get(url=url, headers=headers) if not response.status_code == 200: self.log_handle.logs_write('get_free_ip_proxy', '请求状态码: %s' % response.status_code, 'error', False) else: response.encoding = "utf-8" text = response.text print('获取免费代理ip完成') return text def re_data(self, text): print('开始整理数据') result_list = [] try: re_text = re.findall('var json = ([\S\s]*?);', text) for r in re_text: for rr in eval(r): result_list.append(rr) except Exception as e: print(e) return None return result_list def db(self, result_list): print('开始储存免费代理ip') for data in result_list: data_to_insert = { "ip": data.setdefault('date').replace(' ', ''), "port": data.setdefault('port').replace(' ', ''), "location": data.setdefault('high'), "speed": data.setdefault('low'), "sunset": data.setdefault('sunset'), "rq": data.setdefault('rq').replace(' ', ''), "create_time": int(time.time()), "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } self.mongo.collection.insert_one(data_to_insert) print('数据已储存, 共储存数据{}条'.format(len(result_list))) if __name__ == '__main__': I = IpProxy() I.main()