python爬虫——嘉兴水果指数获取

icekele / 2023-05-03 / 原文

1.抓包参数分析

 

我们可以看出,stageId参数随着时间的变化而变化,pageNo随着页数的增加+1,其他参数不变

2.代码部分

import requests
import re
def orderBy_get():
        url = 'http://jxzgsgzs.com/js/price.js?v=1.7.2'
        header ={
                'User-Agent': ''
        }
        response = requests.get(url=url,headers=header).text
        # print(response_list)
        ex = "var ORDERBY = '(.*?)';"
        orderBy = re.findall(ex,response)[0]
        return orderBy
def ids_get():
        url = 'http://jxzgsgzs.com/jia-xing-fruit-webapi/stage?reportCycle=10&rawDataIsPublish=true&pageSize=1&orderBy=-reportTimeStart'
        header = {
        'User-Agent': ''  }

        json_list = requests.get(url=url,headers=header).json()
        for dic in json_list['data']:
                ids = dic['id']
                reportTimeEnd = dic['reportTimeEnd']
                return ids,reportTimeEnd

  

import requests
import json
import time
from test import ids_get,orderBy_get
ids,reportTimeEnd= ids_get()
orderBy = orderBy_get()

for datadd in range(1,29):
    print('数据爬取第{}页'.format(datadd))
    url = 'http://jxzgsgzs.com/jia-xing-fruit-webapi/rawDataExpansion?'
    headers = {
        "User-Agent": ""
    }
    data = {
        'stageId': ids,
        'orderBy': orderBy,
        'parentStructId': 1,
        'pageNo': datadd
    }
    json_ids = requests.get(url=url, headers=headers, params=data).json()
    time.sleep(5)
    for dic in json_ids['data']:
         content_list = []
         content = {}
         content['category'] = dic['category']
         content['city'] = dic['city']
         content['id'] = str(dic['id'])
         content['kind'] = dic['kind']
         content['placeOfOrigin'] = dic['placeOfOrigin']
         content['price'] = str(dic['price'])
         content['specification'] = dic['specification']
         content['totalSalesVolume'] = str(dic['totalSalesVolume'])  #总销售额
         content['totalTurnover'] = str(dic['totalTurnover'])  #总经营额
         content['data'] = str(reportTimeEnd)

         content_list.append(content)
         print(content_list)

         with open('嘉兴水果数据.csv','a', encoding='utf-8') as f:
             for content in content_list:
                 f.write(content['category'] + ',' + content['city']+ ','+ content['id'] + ',' + content['kind'] + ','+ content['placeOfOrigin'] + ',' + content['price'] + ','+ content['specification'] + ','+ content['totalSalesVolume'] + ','+ content['totalTurnover'] + ','+ content['data']+ '\n')

3.数据展示