mohurd
js
const CryptoJS = require('crypto-js') f = CryptoJS.enc.Utf8.parse("jo8j9wGw%6HbxfFn") , m = CryptoJS.enc.Utf8.parse("0123456789ABCDEF"); function h(t) { var e = CryptoJS.enc.Hex.parse(t) , n = CryptoJS.enc.Base64.stringify(e) , a = CryptoJS.AES.decrypt(n, f, { iv: m, mode: CryptoJS.mode.CBC, padding: CryptoJS.pad.Pkcs7 }) , r = a.toString(CryptoJS.enc.Utf8); return r.toString() } data = "" console.log(h(data))
python
import json import subprocess from functools import partial import json import os import requests import pandas as pd import subprocess from functools import partial subprocess.Popen = partial(subprocess.Popen, encoding="utf-8") import execjs from mySaveMoudle import MySaveData class JZSC: def __init__(self,total_page): self.url = 'https://jzsc.mohurd.gov.cn/APi/webApi/dataservice/query/comp/list' self.encrypt_data = '' self.dec_data = '' self.total_page = total_page def get_encrypt_data(self,pg): params = { 'pg':pg, 'pgsz':15, 'total':450 } headers = { 'Referer':'https: // jzsc.mohurd.gov.cn / data / company', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } self.encrypt_data = requests.get(self.url,headers = headers,params=params).text # 解密数据 def js_decrypt_data(self): # 调用js代码进行解密数据 with open('mohurd.js','r',encoding='utf-8') as f: js_obj = execjs.compile(f.read()) self.dec_data = js_obj.call("h",self.encrypt_data) def parse_save_data(self): # 对解密的数据进行提取并保存 # 如果用户输入页码过大,报错,捕获错误 try: # 转换为字典对象 json_data = json.loads(self.dec_data) # 获取到数据 data_list = json_data['data']['list'] # 获取到数据 tb_header = ['统一社会信用代码','企业名称','企业法定代表人','企业注册属地'] for data in data_list: # 统一社会信用代码 QY_ORG_CODE = data['QY_ORG_CODE'] + '\t' if data['QY_ORG_CODE'] else None # 企业名称 QY_NAME = data['QY_NAME'] if data['QY_NAME'] else None try: QY_FR_NAME = data['QY_FR_NAME'] if data['QY_FR_NAME'] else None except: QY_FR_NAME = None QY_REGION_NAME = data['QY_REGION_NAME'] if data['QY_REGION_NAME'] else None save_list = [QY_ORG_CODE,QY_NAME,QY_FR_NAME,QY_REGION_NAME] MySaveData('22021建筑市场数据',tb_header,save_list,file_encoding='ANSI').csv_save() # 把据保存到json文件中 MySaveData('22021建筑市场数据',json_list=data_list).json_save() return False except: print("hhhhh,结束了") return True def run(self): for i in range(self.total_page+1): # 请求得到加密后的数据 self.get_encrypt_data(i) # 调用js代码进行解密数据 self.js_decrypt_data() # 对解密的数据进行提取并保存 finish = self.parse_save_data() # 如果finish返回Tre:说明爬取完毕,退出循环 if finish: return print(f'恭喜,第{i + 1} 页数据爬取完毕!!!') # https://jzsc.mohurd.gov.cn/data/company if __name__ == '__main__': total_page = int(input(">>>请输入要爬取的页码总数:")) JZSC(total_page).run()
import json import os import pandas as pd class MySaveData: JSON_DATA_DICT = {'info':[]} def __init__(self,file_name=None,head_list=None,csv_list=None,json_list=None,file_encoding="utf-8"): # 保存文件的文件名 self.file_name = file_name # csv文件的表头 self.head_list = head_list # 要保存的数据列表 self.csv_list = csv_list self.json_list = json_list self.file_encoding = file_encoding # 当前脚本文件的绝对路径,___file__表表的是appplogger这个文件 current_path = os.path.abspath(__file__) # 要判断文件要保存的数据文件夹是否存在 self.folder_path = os.path.abspath(os.path.dirname(current_path)) + f'\\{self.file_name}\\' self.judge_folder() # 保存的csv文件路径 self.csv_path = os.path.join(self.folder_path,f'{self.file_name}.csv') # 保存的json文件路径 self.json_path = os.path.join(self.folder_path,f'{self.file_name}.json') # 判断csv文件是否存在,不存在创建 self.judge_csv_file() def judge_folder(self): # 判断要保存数据的文件是否存在,不存在创建 if not os.path.exists(self.folder_path): os.mkdir(self.folder_path) def judge_csv_file(self): # 判断csv文件是否存在,不存在创建 if not os.path.exists(self.csv_path): tb_head = ",".join(self.head_list) + '\n' with open(self.csv_path,"w",encoding=self.file_encoding) as wf: wf.write(tb_head) def csv_save(self): data = pd.DataFrame(data=[self.csv_list]) # 追加数据,mode = ‘啊’ 表示追加,index=False 表示不给每行数据加索引序号,header = False 表示不加标题 data.to_csv(self.csv_path,mode='a',index=False,header=False,encoding=self.file_encoding) def json_save(self): # 通过追加啊方式保存json数据 MySaveData.JSON_DATA_DICT['info'].append(self.json_list) json_data = json.dumps(MySaveData.JSON_DATA_DICT,indent=2,ensure_ascii=False) with open(self.json_path,'w',encoding=self.file_encoding) as w: w.write(json_data)