from openpyxl import Workbook
import pandas as pd
import numpy as np
import sys,time,re,csv
path="f:/te/qh.csv"
path1="F:/BaiduNetdiskDownload\行政许可/行政许可/行政许可.csv"
##num_rows = sum(1 for row in open(path,encoding="utf-8"))
##num_rows1 = sum(1 for row in open(path1,encoding="utf-8"))
chunksize = 10000
chunk_pointer = 0
tt="f:/te/qhv1.xlsx"
writer = pd. ExcelWriter(tt, engine= 'openpyxl')
# 循环读取 CSV 文件的每个块
def read_csv_feature(filePath):
# 读取文件
f = open(filePath, encoding='utf-8')
reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False)
loop = True
chunkSize = 100000
chunks = []
while loop:
try:
chunk = reader.get_chunk(chunkSize)
chunks.append(chunk)
except StopIteration:
loop = False
print('Iteration is END!!!')
df = pd.concat(chunks, axis=0, ignore_index=True)
f.close()
return df
f = open(path1, encoding='utf-8')
cxx=['company_id','unified_code','ent_name','reg_capital','real_capital','reg_no','legal_person','open_status','old_ent_name','industry','tax_no','license_number','org_no',
'authority','annual_date','start_date','ent_type','open_time','district','district_code','reg_addr','scope','state','create_time','update_time','数据来源']
reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False)
f1 = open(path, encoding='utf-8')
reader1 = pd.read_csv(f1, sep=',', iterator=True,low_memory=False,names=cxx)
loop = loop1=True
chunkSize = 5000
chunks =[]
chunks1=[]
ab=0
tff=0
while loop:
ab=ab+1
try:
chunk = reader.get_chunk(120000)
chunks.append(chunk)
except:
loop=False
df = pd.concat(chunks, axis=0, ignore_index=True)
df.drop(columns=['state','create_time','update_time','数据来源'])
print(df)
df.to_csv("f:/te/qinghai"+str(ab)+".csv")