| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- # -*-coding: utf-8 -*-
- import datetime
- import os
- import sqlite3
- from selenium import webdriver
- import httpx
- def get_cookies(url):
- chrome_options = webdriver.ChromeOptions()
- args = ['--headless', '--no-sandbox', '--disable-gpu', '--disable-dev-shm-usage']
- for arg in args:
- chrome_options.add_argument(arg)
- driver = webdriver.Chrome(options=chrome_options)
- driver.get(url)
- result_cookie = driver.get_cookies()
- if result_cookie:
- return result_cookie
- else:
- pass
- def req(url, cookies):
- with httpx.Client() as client:
- headers = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
- "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
- "Connection": "keep-alive",
- "Cookie": cookies,
- "Host": "www.cwl.gov.cn",
- "User-Agent": "Mozilla/5.0"
- }
- res = client.get(url, headers=headers, follow_redirects=True)
- if res.status_code != 200:
- print(res.status_code)
- log_file_path = os.path.join(get_path.get_logs_path(), str(datetime.date.today()) + '.log')
- with open(log_file_path, 'a') as f:
- f.write("\n spider_dlt: %s")
- return
- res_json = res.json()
- data_handle(res_json['result'])
- def data_handle(source_data):
- ssq_db_path = os.path.join(utils_get_path.get_db_path(), 'ssq.db')
- conn = sqlite3.connect(ssq_db_path)
- c = conn.cursor()
- c.execute('drop table if exists data;')
- c.execute(
- 'create table if not exists `ssq` (id INT PRIMARY KEY NOT NULL, `code` varchar(10),`red1` varchar(2),`red2` varchar(2),`red3` varchar(2),`red4` varchar(2),`red5` varchar(2),`red6` varchar(2),`blue` varchar(2),`date` varchar(12),`sales` varchar(15),`poolmoney` varchar(15),`content` varchar(255));')
- id = 1
- for data in source_data:
- insert_sql = "INSERT INTO ssq ('id', 'code', 'red1', 'red2', 'red3', 'red4', 'red5', 'red6', 'blue', 'date', 'sales', 'poolmoney', 'content') VALUES ({0}, '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}')".format(
- id,
- data.setdefault('code'),
- data.setdefault('red').split(',')[0],
- data.setdefault('red').split(',')[1],
- data.setdefault('red').split(',')[2],
- data.setdefault('red').split(',')[3],
- data.setdefault('red').split(',')[4],
- data.setdefault('red').split(',')[5],
- data.setdefault('blue'),
- data.setdefault('date'),
- data.setdefault('sales'),
- data.setdefault('poolmoney'),
- data.setdefault('content')
- )
- c.execute(insert_sql)
- conn.commit()
- id += 1
- conn.close()
- if __name__ == "__main__":
- url = 'http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount=&issueStart=&issueEnd=&dayStart=&dayEnd=&pageNo=1&pageSize=10&week=&systemType=PC'
- # result_cookie = util_get_cookies.get_cookies(url)
- #
- # cookies = '{}={}'.format(result_cookie[0].setdefault('name'), result_cookie[0].setdefault('value'))
- #
- # print(cookies)
- # 测试时使用的 cookies
- cookies = "HMF_CI=1b2fd73192f2054a429b2bfa4f58c3ff98119441420133cc8a04ca9c95aa2266eaec5bb7cf1d37df5f9864b8629ba407bacc9c58cadf26e2d726582df3870b0969"
- req(url, cookies)
|