PYTHON 快速分割CSV

myrj / 2023-08-20 / 原文

from openpyxl import Workbook
import pandas as pd
import numpy as np
import sys,time,re,csv
path="f:/te/qh.csv"
path1="F:/BaiduNetdiskDownload\行政许可/行政许可/行政许可.csv"
##num_rows = sum(1 for row in open(path,encoding="utf-8"))
##num_rows1 = sum(1 for row in open(path1,encoding="utf-8"))
chunksize = 10000
chunk_pointer = 0
tt="f:/te/qhv1.xlsx"
writer = pd. ExcelWriter(tt, engine= 'openpyxl')
# 循环读取 CSV 文件的每个块
def read_csv_feature(filePath):
    # 读取文件
    f = open(filePath, encoding='utf-8')
    reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False)
    loop = True
    chunkSize = 100000
    chunks = []
    while loop:
        try:
            chunk = reader.get_chunk(chunkSize)
            chunks.append(chunk)
        except StopIteration:
            loop = False
            print('Iteration is END!!!')
    df = pd.concat(chunks, axis=0, ignore_index=True)
    f.close()
    return df 

f = open(path1, encoding='utf-8')
cxx=['company_id','unified_code','ent_name','reg_capital','real_capital','reg_no','legal_person','open_status','old_ent_name','industry','tax_no','license_number','org_no',
'authority','annual_date','start_date','ent_type','open_time','district','district_code','reg_addr','scope','state','create_time','update_time','数据来源']
reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False)

f1 = open(path, encoding='utf-8')
reader1 = pd.read_csv(f1, sep=',', iterator=True,low_memory=False,names=cxx)
loop = loop1=True
chunkSize = 5000
chunks =[]
chunks1=[]
ab=0
tff=0
while loop:
    ab=ab+1
    try:
        chunk = reader.get_chunk(120000)
        chunks.append(chunk)
    except:
        loop=False
    df = pd.concat(chunks, axis=0, ignore_index=True)
    df.drop(columns=['state','create_time','update_time','数据来源'])
    print(df)
    df.to_csv("f:/te/qinghai"+str(ab)+".csv")