把文件比较大的csv拆分多个文件
此方法适用于文本文件特别大的情况下,例如csv文件条数特别多,用其他编辑器手动拆分时会遇到内存溢出的情况,本人需要将大的csv文件切割成2份小的文件,如果想拆分多个文件可以修改下脚本多次执行,主要文件名分开。
此脚本用python编写。
具体内容如下:
import csv
import os
def split_csv(path, total_len, per):
# 如果train.csv和vali.csv存在就删除
url = 'C:\\WwhWorld\\split_re'
# train.csv存放路径
csv_path = os.path.join(url, 'train.csv')
# vali.csv存放路径
csv_path2 = os.path.join(url, 'vali.csv')
if os.path.exists(csv_path):
os.remove(csv_path)
if os.path.exists(csv_path2):
os.remove(csv_path2)
with open(path, 'r', newline='', encoding='UTF-8') as file:
csvreader = csv.reader(file)
i = 0
for row in csvreader:
if i < round(total_len * per/100):
# print(csv_path)
print('.')
# 不存在此文件的时候,就创建
if not os.path.exists(csv_path):
with open(csv_path, 'w', newline='', encoding='UTF-8') as file:
csvwriter = csv.writer(file)
csvwriter.writerow(row)
i += 1
# 存在的时候就往里面添加
else:
with open(csv_path, 'a', newline='', encoding='UTF-8') as file:
csvwriter = csv.writer(file)
csvwriter.writerow(row)
i += 1
elif (i >= round(total_len * per/100)) and (i < total_len):
# print(csv_path2)
print('.')
# 不存在此文件的时候,就创建
if not os.path.exists(csv_path2):
with open(csv_path2, 'w', newline='', encoding='UTF-8') as file:
csvwriter = csv.writer(file)
csvwriter.writerow(row)
i += 1
# 存在的时候就往里面添加
else:
with open(csv_path2, 'a', newline='', encoding='UTF-8') as file:
csvwriter = csv.writer(file)
csvwriter.writerow(row)
i += 1
else:
break
print("切割成功")
return
if __name__ == '__main__':
path = 'C:\\WwhWorld\\RADIUS_ACCOUNT_CIDALL.20220901.csv'
total_len = len(open(path, 'r',encoding='UTF-8').readlines())# csv文件行数
per = 50 # 分割比例%
split_csv(path, total_len, per)