mohurd
js
const CryptoJS = require('crypto-js') f = CryptoJS.enc.Utf8.parse("jo8j9wGw%6HbxfFn") , m = CryptoJS.enc.Utf8.parse("0123456789ABCDEF"); function h(t) { var e = CryptoJS.enc.Hex.parse(t) , n = CryptoJS.enc.Base64.stringify(e) , a = CryptoJS.AES.decrypt(n, f, { iv: m, mode: CryptoJS.mode.CBC, padding: CryptoJS.pad.Pkcs7 }) , r = a.toString(CryptoJS.enc.Utf8); return r.toString() } data = "95780ba0943730051dccb5fe3918f9fe1b6f2130681f99d5620c5497aa480f13d564cdb3a9b884b9556227cf9686b33e0ce10898368372f42059169f11683eca640b7ca93f7c09625c0f836c027e4235f735c4bd68cb2964eb81280a763f22c2b48240b92d5ff0fe4e60963c85539a877f80c8bac2f9d3ce949305576f443d4714e42d7a272e3cb6aa297d06d6c21e0a9fe059244db9f0363207ca249a895ae3df501c28e15f13f68a56f418987bb2f98bac364a0c82119e39062c0fa9439078f44c14b57f586a08fdc9b223fb8fe66ca3e023bf1d5201c14898f38686f02bed7d48282e34febbc7150a091da5c30981a14a455e1dcfaa6c833b828bb7a009c6cddbf4ea6dde41069ef06e8cce96e13acdcbc7bfec8461099822193ee774b3c706259b25b2eb107795bd5e003c9da2bb42a541ffedd7235ba7a5644af9ccb603df05e16d9253fd10e7e33baa3daab35709cc2255f386c783b2d709b5b329d7a70965ab5b19fe6e0490366a998475608ec3021856ee1364c8bfce71f7776a29aac70a1c102a3be7075dab298db2beff9708856e8ce28037721c1bf1faade6eb65bac51c6487c6261d5cb4173d699663f6aaf4a224ee52adf00289bd4ba22137d1ffb92d375be92e6f849e8a130aecc78f9163676d816a90acd78fed35c299c303d4021eae639bcbe16fef1b8fb005ce6a19dd5ff2e34badfe037856a57bdc4e8a9246fe845b1e3a5af18c8b52be9ff2a59863acdfcbe90e587e12df3d1128d29c341f34ab401f3cfb5dd30fee404202dca573c88bb9177f20f815c17a2e12d86906b6ac133aba1d83bcc21cd2f65e0f402e191ee05a9aa8bcda4771da9ba930960c80542ed4d16fc2ec7565576fd90dd044c50f8083b6f32ed5bc057ad6704b00bf0bd730149c6ecf707411f29c34e0711ba2a772af0c09400c3c41de56c092d1e8a1bf155c40e91742cddbc6478e0b5e5d881ced2d2f310581e924d677822d0057270ee50858988cdd3de94b021a88e9a81ff6b443222f3d273a70249cf8cac619185193a86da975396fa6cee1b37f3c0c2824c7b7d42488ad1b437f4bea78e0db7566c917a626f3e6ffbedca69303dff16804d6d4a2b36786e1ce24ae88caca45d4cfef323f72fa260afd9d1beec407313bbc418714adf5f7fe5299ece80ed852ad9cc9526e723f7dd5bf1afb8e9113a364d47efb687d46a1d45fcf98b75e38d752a8647bd975a12fb70152654d69d2cbe3f92adb5b7d68ff02335d323e9dbb5e091210de24c454f927b63bafad00788037e2e2307b09b29e6bef2e866721ad79420f667ac4dee20aa0cb65be9d953eac42384b5c3edc6ff0c2293b72e1b0098b1e4ff932e101570d8171f836ba3ffe4b0b093c911136345ee698fadde54efe1bbd097c6b3d7e4b388e72feead5910cd444b0ed111749f144943a8a87d060d248afb9f738eb1da673f6f24075da08bfe5958ec3b25bb79c535156f7bfe758b6aba06923817e9176db2817ee7d82809366dbf88e8a6fd9aff4637f3c0ef4132f985892f1c21322c37646c3f39eff84c556206cb9c0824fae029e856dedab11a3116ddad18f749a8b1817069d27141b84375ed8d02cf3a34ae6f8e25b243ec320b160dc4cc8c89ca8e11d86beb15b83873e066a31d55aac110e9ea207c10d39a0551b5d877cdc0e382201b1dab5ac0e37109d231432f0bcb8ac092cc16a45c6b5a797d00809cd6123ce53a2861a6982b7c1f8a1e006a5100f3746612de0fdffde690f073991f81f525b97c688b8b30278bea43e06a573f3b79acefdec744f475a082469234e2763f82a9aa11d1370e2541ea38171da840e1f9bad03b614f670f5a84788c6b4b40efe87b07df41c93cd44724d20b4978c514336b36645fe326c77419152e4b188fddb117ec7c09b8ac768ed426aca81780cca1c22f4708e57d10b2047764d23e47f108ac6657996f677c385c03291d0c06d8432fe8b86ce9ee3f3826467ab2c890017f368c2de861671bf8646210ed3acd24760b588328a8101ca5b83384e217178c38fa17b9c1f9d81ac968bd145437a21670444125558401cd44f2110edcfb83fcee0fd2962d913ea56c74fd674b96d03057ad873d48a80395c082b15a8d4b325d68349038552441b5fe9d27260bab1b64300315157c21dcc7f79e0564edc90d0f1a0b1411e4c7a600b8715100475bc59f4b41a829d8c3803d5afc84328cd341f1061ebc9b7810bb59f75e9a4d3dd1bde4ed6e98757cdbba740eead4eeb93c8b700ad196b298fccf1c9cb1641ea3d0c0ca05049f537d374c7ff87cbfc07bf98106fed5caa8995e6a48b77601384345bdc2893a27c65dead591a2d3cf8c3eae0282e9014bfef14e84e7aaed8a4de9ad697c8e36acd812e429290b080580aedfccb803e9d4961d86996202bb6dca4434f18dc6fab377b03a2a08f29002b0a239aa1b59f93605ce5b7e8b938e8987b3b3e411d91e1c5cf9172ed72d8e4093236953143baec83d94284c3342facb12d045e765372ec6ba418824ef56b8997c650e529f0729a6d08ea689f1992875b476df909041b82413ed33e52b37830a69f0201fd017b9b6d04f46221485fe9b972acb78b0916e70a1a043e9fa24377bedc15eb7b9ca8f981bf9c06d8758b33f7c0fe131099535bd59a8a18971c10d5e37a8a65207db4cf242fe4c53484dcb0d25243ee0e11e7ad5c692d93a4f059e86ee7eec982366730bae997419608b42d10639e4f9b6bdd0f645a390e012460be2262434484a1a6e0fd2b7a00fe650979b59c9299b1f3ac1babea4c95aaf134c589a33a79410cf6e96e19f7e50105e287b97095cbe57dcccbade8ee204777a431491b650e3cf1e3106ac4acb91e9f8a96c2384efe9b3e797626693210aafef4bd87081b256b4bd57c33526e92774d9d70c734fa26452b64f829425a7310ea19954cd028970c27e7d77f3162b1e78d2a03eeeee02823a895214537b9bdca8f24fa926625dd452b1a3c746c71d87e4e28eea48d163e6cc0399958f4634c78e7b5b06c2de81eda57f83651a253737da4359a6f3ffea0c6b78be46a47aae8571c19cb243ff186809937c09d77a3bb52cd245fec396664bf92425943db6150b03d13090c6ba11e002dc8dc60f664e4b704115d7cc9bbee68a2eb3e43db4c1d19339e6982d76113646eb5b6c46d0ab982ecb141619b9bde0e72869e785fbd7b8acbcb521cb618257324d3ff44cb0083e2ad759caa397cb80bb8a456668bfb01d7d9013e5232e8121eb34cf773b3af778740a029dc2bf622d98641c77fcb2de76634e71aecc072f9c4c754e259254307fc0b640c463889defeee3240b65e0e065b1d1d4dfbab53b092ea952f22fb9b4beefa92c1efb72ebaf201bd49d4e3c5a2d5360eb544a7e660cd81e8e19b72f4f1aa71c7b66f8995fb5a6e2062b39a4acb82f470b5b09e4bf5c18113c7577922800003b41882b8b19512c6647df2617f4a7ad35eedbac15ea156426df6b1a664c1199f8035307dbc46f806474fb7de2439f85b8a60c40a6fdb86bdb1494cadaad771593042af5c2019dea526dca67ecdb49a68427ed5ba291019b06630b27c776f6d4ec46f906ec9de37864332b2ab0cc00e04257e0dad4a9ef612595c683961df5a3a57faa2a93eac8f13ac5ce232ddbb6d98388369c297929c2649346cf7d8a889c5e27020468b130d84651f0bae72501011367cf0796708d65de5e8efbbdd9a0806140aa17f29dd54726f667d9596564f00e99e50651a942daf8f51c387f84d2286ba54613eb44a84a46ab4b104ac9d712d7f540e6d5af1d8d922a5ecfc196f03ced4212595230549135c6f1fe2a91e7ea6aa729172dbdef8c570eee460ae4c151addbaa3cf99fefeb3f4f9e6ba23e2a0e178f769c2d7afcfd86e8d286d03c2cb8a0d7e4e50a7adb2ef331290a0baaccbdec685fde356ef89f68d3a1770c00ae6ca9712fb468e6e921716ba9dacbcc216bbb914a01b6cb35554ab45872f5df4343ed3156aec58d7137f18aa0142265d54efcff7580250c9143bc7dbbed19ebaa3d91b90168e762e66d57dabcc3266c2322734d5dec8b3deaf2196169ea6bedb33814418574f2bf48b97d77f168cbf499cb8d2b85b16b75c2c171fed5ead7ecbbaeac63d963d96b57e8c160fdd7e5c111a6dcc70c9b90cbf5db9cf32ecd0becaa9983cb128765b0f1133b1a9a6aeaf0c53fd0ec2f7c2b20f9285560e05814b9130752fbaea1c31cc1993a0b1a6c9bb90ceea7d20141709323dd5db0aab713b1e83a8823a97602cd87855a21d402efd60beab0c3d2228a5fbd5a0f470cc9755b9e532fa196f67efa744f54733ec2e357151bf45e44f8c06c058a87d3d5b1087e2f0ec98b93a8936ef1eac4b4529830f51115415a5902945f1dcf7a6f0d1b9765152fdd73864357c4b62c22ad198d0f6b21c8c38782a01f1b45d6a2ccd0467f734083da908abaf68aee9aff0443cabf971e31d2b335f8a6694c9be73623b1e3c727320dc5dc2ae301e4adc8edd1e0a30d9e4455d0853f7ecbc49bf2f11dd81b69cf3fbbaf1b9c951a795bf78685fd42d8fd02216da5376b565dd4b5931fa9e82f85caea2ac80ac3ecba3ba1c7f274c1771ca6baca4ccea523ac2a8ea5eefc8fede4202d5fd51d908e208c756d3f46990a17e0b989ff43fb4d7e78f8cb42ca38a642afd2477c22ea590d2538943beb14c5839abd207f52d4fdf6ea8e37f0a8e213f0d0ac3a82a13bee7f61f875a9ee0fc2fb4f70d821a9985ad173fbbbd98df4cabaf990b4faa5f4d449026946508385803293a108293c064c319b1cfd9953498e271fbc52a27c3e1c8de5b25674e2ca589b1c352cfff9bc93c6bdd211c9631716f3e5c79e5531484f780619cb802884b5c887baef916ef9b490ef7f250e9ba48967fff86136bf245ef9bcd14981cbb7f1189ae624012092f94c770651fca56cc3d8acb8d39f45ccc77db05abe36ecd52997211a82f86f03e459bc18a3f9ade53246cd2cabfb866131fe56e086388c9e48e056520d8ad31545a6429ae418061cfccfb8ea1985fd3cfbaa92da7f46c8f88ddf067091ccb2310617c089bf0523fbfa6496c6dea3e60f2722ab411d9df6e716a4b05c8b190da52052f69c5df61a5d876671c429f90fb64a0e331cfb455ea6d874018735aa5a4f5d1597cfbe6b8a02077c2a3185bee18e059bd1b4f08f05cbe47b858aa68de2c9edc950149c6dd1030a052beb15084186186ead55954ec3e4786b90de2ce00c207e01b2aa4b4f3001128113dd0a52602909c54abdf0f77eb22a8ac5de691440bd3e00b9cf11e86d081dec15200661cb86e15e3775143fa0b0653a0833171497ac5a1086e82b9acc103ee141d6f0dcefe36ab45738e584f748bd2186d05965699927300f5447f7c66345cfd2fab62f73ad2fa27b67c9ba91ad8c2363012f67903e6a5a807a93094cced20ce56ed17d909ddaa3274bb894a7e9d7866540e4dbb7dcbf046d998bdbbbb66985c11c285b27c168f64c4d7f7c18a71c1ff3b01314d579efb788c2cf68062fd22bd494eddb698fa6cb14ca7d32c75bcf9edeeab3eb2f2799e4556f5775269756ac9bf475ddbabb4f830e9bcd1b28697794fa80e8d5186760696b5b84aab13da04dc18ef5dd95c85ff0f79cb7bd8ebfd657a9743a1fa38405e7aad7b4d8dd9cfe39288622ac1e74f9f5c90df0d194fc8569a947c926405b92e95c46988c73f11462658be77435271d54e000533266389a542e21873bb9f6060e17d04b0ae63619575c3ac2798675f7958faf985630d09c060f6b88a0fa93d69b46c22b3c698690240e4d8f7a7d1ad04221a2534f702271c6c46e99ea7c21af643ed9aea023503144d05f7c7128ea3c920ed904539cb5c95bd8f6af41e51fe60d515a8e189d3bd8745838d31869620a82d471c39937ce494d974e5559bee16d773c49ecb09cf013b26a111be06325d65e340b04becb4cd5f4f9b34232e5ce49f69f5123c32047bf23a466ec61f2299dbc4f76f6d8078c458ae1b0cabdd2fb4b315941b2c40a9d6c0c94bb5f21d5208e5080dce4de3f8e06951bac78b01e69b4de5a00bf56c17bd3cf28f6d62b8f5461abe6781662325144e9dca6181ab78ebb3a60cc189be844c7ebb94ec9315f0b1e5a6897342dea27e381fb1e22492db8fdc32b9bead802fa0991e006ab403b9aef2173431b81ea1aef632d163744fcd2338f3328c4c4e04ea79f72fd41305b4efdd74f8b12c5d95c020121f63a94d7fd59c5e05734a9a5fdd39f4284e49bceae1e6c69aa2964e7f4c77038b73a681b8674e92f4d45fd25def00a3ff2f7e7b77a468f72c8c410f8422a582d6" console.log(h(data))
python
import json import subprocess from functools import partial import json import os import requests import pandas as pd import subprocess from functools import partial subprocess.Popen = partial(subprocess.Popen, encoding="utf-8") import execjs from mySaveMoudle import MySaveData class JZSC: def __init__(self,total_page): self.url = 'https://jzsc.mohurd.gov.cn/APi/webApi/dataservice/query/comp/list' self.encrypt_data = '' self.dec_data = '' self.total_page = total_page def get_encrypt_data(self,pg): params = { 'pg':pg, 'pgsz':15, 'total':450 } headers = { 'Referer':'https: // jzsc.mohurd.gov.cn / data / company', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } self.encrypt_data = requests.get(self.url,headers = headers,params=params).text # 解密数据 def js_decrypt_data(self): # 调用js代码进行解密数据 with open('mohurd.js','r',encoding='utf-8') as f: js_obj = execjs.compile(f.read()) self.dec_data = js_obj.call("h",self.encrypt_data) def parse_save_data(self): # 对解密的数据进行提取并保存 # 如果用户输入页码过大,报错,捕获错误 try: # 转换为字典对象 json_data = json.loads(self.dec_data) # 获取到数据 data_list = json_data['data']['list'] # 获取到数据 tb_header = ['统一社会信用代码','企业名称','企业法定代表人','企业注册属地'] for data in data_list: # 统一社会信用代码 QY_ORG_CODE = data['QY_ORG_CODE'] + '\t' if data['QY_ORG_CODE'] else None # 企业名称 QY_NAME = data['QY_NAME'] if data['QY_NAME'] else None try: QY_FR_NAME = data['QY_FR_NAME'] if data['QY_FR_NAME'] else None except: QY_FR_NAME = None QY_REGION_NAME = data['QY_REGION_NAME'] if data['QY_REGION_NAME'] else None save_list = [QY_ORG_CODE,QY_NAME,QY_FR_NAME,QY_REGION_NAME] MySaveData('22021建筑市场数据',tb_header,save_list,file_encoding='ANSI').csv_save() # 把据保存到json文件中 MySaveData('22021建筑市场数据',json_list=data_list).json_save() return False except: print("hhhhh,结束了") return True def run(self): for i in range(self.total_page+1): # 请求得到加密后的数据 self.get_encrypt_data(i) # 调用js代码进行解密数据 self.js_decrypt_data() # 对解密的数据进行提取并保存 finish = self.parse_save_data() # 如果finish返回Tre:说明爬取完毕,退出循环 if finish: return print(f'恭喜,第{i + 1} 页数据爬取完毕!!!') # https://jzsc.mohurd.gov.cn/data/company if __name__ == '__main__': total_page = int(input(">>>请输入要爬取的页码总数:")) JZSC(total_page).run()
import json import os import pandas as pd class MySaveData: JSON_DATA_DICT = {'info':[]} def __init__(self,file_name=None,head_list=None,csv_list=None,json_list=None,file_encoding="utf-8"): # 保存文件的文件名 self.file_name = file_name # csv文件的表头 self.head_list = head_list # 要保存的数据列表 self.csv_list = csv_list self.json_list = json_list self.file_encoding = file_encoding # 当前脚本文件的绝对路径,___file__表表的是appplogger这个文件 current_path = os.path.abspath(__file__) # 要判断文件要保存的数据文件夹是否存在 self.folder_path = os.path.abspath(os.path.dirname(current_path)) + f'\\{self.file_name}\\' self.judge_folder() # 保存的csv文件路径 self.csv_path = os.path.join(self.folder_path,f'{self.file_name}.csv') # 保存的json文件路径 self.json_path = os.path.join(self.folder_path,f'{self.file_name}.json') # 判断csv文件是否存在,不存在创建 self.judge_csv_file() def judge_folder(self): # 判断要保存数据的文件是否存在,不存在创建 if not os.path.exists(self.folder_path): os.mkdir(self.folder_path) def judge_csv_file(self): # 判断csv文件是否存在,不存在创建 if not os.path.exists(self.csv_path): tb_head = ",".join(self.head_list) + '\n' with open(self.csv_path,"w",encoding=self.file_encoding) as wf: wf.write(tb_head) def csv_save(self): data = pd.DataFrame(data=[self.csv_list]) # 追加数据,mode = ‘啊’ 表示追加,index=False 表示不给每行数据加索引序号,header = False 表示不加标题 data.to_csv(self.csv_path,mode='a',index=False,header=False,encoding=self.file_encoding) def json_save(self): # 通过追加啊方式保存json数据 MySaveData.JSON_DATA_DICT['info'].append(self.json_list) json_data = json.dumps(MySaveData.JSON_DATA_DICT,indent=2,ensure_ascii=False) with open(self.json_path,'w',encoding=self.file_encoding) as w: w.write(json_data)