spider_get_free_ip_proxy.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # -*- coding: utf-8 -*-
  2. '''
  3. 获取ip代理
  4. '''
  5. import os
  6. import sys
  7. sys.path.append(os.path.join(os.getcwd().split('auto')[0], 'auto'))
  8. from datetime import datetime
  9. import time
  10. import re
  11. import httpx
  12. from tools_mongo_handle import MongoHandle
  13. from tools_logs_handle import LogsHandle
  14. class IpProxy(object):
  15. def __init__(self):
  16. self.log_handle = LogsHandle()
  17. self.now_day = time.strftime('%Y-%m-%d', time.localtime())
  18. db = 'free_ip'
  19. collection = 'free_ip'
  20. self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=True, auto_remove=0)
  21. def main(self):
  22. self.log_handle.logs_write('get_free_ip_proxy', '开始获取免费ip', 'start', False)
  23. text = self.request()
  24. result_list = self.re_data(text)
  25. self.db(result_list)
  26. self.log_handle.logs_write('get_free_ip_proxy', '获取免费ip已完成', 'done', False)
  27. def request(self):
  28. print('开始获取免费代理ip')
  29. url = 'https://www.dailiproxy.com/cn-free/'
  30. headers = {
  31. 'User-Agent': 'Mozilla/5.0',
  32. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  33. 'Accept-Encoding': 'gzip, deflate, br',
  34. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
  35. }
  36. response = httpx.get(url=url, headers=headers)
  37. if not response.status_code == 200:
  38. self.log_handle.logs_write('get_free_ip_proxy', '请求状态码: %s' % response.status_code, 'error', False)
  39. else:
  40. response.encoding = "utf-8"
  41. text = response.text
  42. print('获取免费代理ip完成')
  43. return text
  44. def re_data(self, text):
  45. print('开始整理数据')
  46. result_list = []
  47. try:
  48. re_text = re.findall('var json = ([\S\s]*?);', text)
  49. for r in re_text:
  50. for rr in eval(r):
  51. result_list.append(rr)
  52. except Exception as e:
  53. print(e)
  54. return None
  55. return result_list
  56. def db(self, result_list):
  57. print('开始储存免费代理ip')
  58. for data in result_list:
  59. data_to_insert = {
  60. "ip": data.setdefault('date').replace(' ', ''),
  61. "port": data.setdefault('port').replace(' ', ''),
  62. "location": data.setdefault('high'),
  63. "speed": data.setdefault('low'),
  64. "sunset": data.setdefault('sunset'),
  65. "rq": data.setdefault('rq').replace(' ', ''),
  66. "create_time": int(time.time()),
  67. "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  68. }
  69. self.mongo.collection.insert_one(data_to_insert)
  70. print('数据已储存, 共储存数据{}条'.format(len(result_list)))
  71. if __name__ == '__main__':
  72. I = IpProxy()
  73. I.main()