import requests
import re
import time
import hashlib
import threading
import json
import os
from lxml import etree
from pymysql.converters import escape_string
from fake_useragent import UserAgent
from mylib.module import *
headers = {
'user-agent': UserAgent().random
}
url = 'https://www.ivsky.com/tupian/'
res = requests.get(url=url, headers=headers)
# 获取第一次请求网站的cookie信息
dt = res.cookies.get_dict()
# 请求头携带
'''
这里先要在浏览器里清空Application里面的Cookies
然后刷新页面,会看到第一个和第三个两个同名的框架文件(这里是tupian/)
我们需要打开第一个,查看Response Headers选项里面的Set-Cookie选项
要以第三个框架文件中的cookie为准,得出首次发送的cookie与正确的cookie之间适当的变化规律,分析得出结论
'''
cookies = {
't': dt['token'],
'r': str(int(dt['secret']) - 100)
}
# 第二次请求
res = requests.get(url=url, headers=headers, cookies=cookies)
html = res.text
e = etree.HTML(html)
lst = e.xpath('//div[@class="il_img"]/a/img/@src')
# for i in range(len(lst)):
# url = 'https:' + lst[i]
# res = requests.get(url=url, headers=headers)
# code = res.content
# with open('./img/' + str(i) + '.jpg', 'wb') as f:
# f.write(code)
# time.sleep(1)
url_lst = e.xpath('//ul[@class="ali"]/li/p/a/@href')
name_lst = e.xpath('//ul[@class="ali"]/li/p/a/text()')
for url, name in zip(url_lst, name_lst):
url = 'https://www.ivsky.com' + url
if not os.path.exists(f'./img/{name}'):
os.makedirs(f'./img/{name}')
res = requests.get(url=url, headers=headers, cookies=cookies)
html = res.text
e = etree.HTML(html)
p_lst = e.xpath('//div[@class="il_img"]/a/img/@src')
for i in range(len(lst)):
url = "https:" + lst[i]
res = requests.get(url=url, headers=headers)
code = res.content
with open(f'./img/{name}/' + str(i) + '.jpg', 'wb') as f:
f.write(code)
time.sleep(0.5)
print('爬完一张')