读取txt文件
方法一
逐行读取
f = open(file, 'r') ----> 常用的mode有如下几种: ‘r’只读,‘w’写入,‘a’追加,‘t’文本文件
for line in f.readlines():
first_line = lines[0] #取第一行
last_line = lines[-1] #取最后一行
f.close()
或者用with语句
with open('/tmp/test-file.txt', 'r') as f:
for line in f.readlines():
或者 tempdata = f.readlines() 全部读进去 以下为一实例
with open(os.path.join(mydatapath, 'goes-particle-flux-primary.txt')) as fh:
# this will read EVERYTHING from the file, each row will be a string
tempdata = fh.readlines()
tempdata = [line.strip() for line in tempdata]
#strip() #removes line breaks, trailing blanks, etc.
gheader = [line for line in tempdata if line[0] in [':', '#']]
print('Header (start and end only):\n\n{0}\n{1}'.format(gheader[0], gheader[1]))
print('...\n{0}\n{1}\n{2}'.format(gheader[-3], gheader[-2], gheader[-1]))
gbody = [line.split() for line in tempdata if line[0] not in [':', '#']] #breaks each line into parts, splitting on whitespace
gbody = np.asarray(gbody)
#now let's make a dictionary so we can access by variable name, then we'll put arrays inside it...
goesdata = dict()
goesdata['year'] = gbody[:, 0].astype(int)
goesdata['month'] = gbody[:, 1].astype(int)
goesdata['day'] = gbody[:, 2].astype(int)
goesdata['seconds_of_day'] = gbody[:, 5]
goesdata['flux_p'] = gbody[:, 6:12]
goesdata['flux_e'] = gbody[:, 12:]
用with 语句块的另一个附加好处就是不用写file.close()
方法二 numpy 的loadtxt
data = np.loadtxt(txtdir)
time, height = np.loadtxt('MyData.txt', skiprows=5 ,
usecols = (1,2), unpack=True
实例
#We'll use numpy's loadtxt function to read the data and ignore the header.
goesdata_np = np.loadtxt(os.path.join(mydatapath, 'goes-particle-flux-primary.txt'), comments=['#',':'])
#now inspect the shape of the data, so we know what array dimensions we are working with
print('The GOES data has dimensions {0}'.format(goesdata_np.shape))
#and we'll inspect the first line, which should be 15 elements long
print('Values in first row:\n {0}'.format(goesdata_np[0]))
方法三 pandas 中的readcsv
df=pd.read_csv(file,delim_whitespace = True )
sep如果不指定参数,会尝试使用逗号分隔, delim_whitespace = True 使用空格作为分隔
sc=df['sc'].astype(str).tolist() ------> dataframe 中的string array 转换成list
mlt=df[‘mlt’].astype(float).values ------> 转换成numpy array
写入txt 文件
方法一
每一行的写入
f = open(save_dir,'a’)
f.writelines(output)
f.close()
with open(file, 'w') as f:
f.write(…+'\n') ——>>> 只能是一个字符串
…
方法二 使用numpy中的savetxt
f='xxx/test.txt'
np.savetxt(f,np.column_stack(a,b,c),fmt='%1.4e')
加入header
info = 'Data for falling mass experiment'
info += '\nDate: 16-Aug-2013'
info += '\nData taken by Lauren and John'
info += '\n\n data point time (sec) height (mm) '
info += 'uncertainty (mm)'
np.savetxt('MyDataOut.txt', zip(dataPt, time, height, error), header=info, fmt="%12.1f")
或
f='/Users/xxx/fit_psd.txt'
np.savetxt(f,np.column_stack((ek,fit_fpe,fit_fpa)),fmt='%1.4e')
读取其他格式的数据
读取.sav 格式
data=scipy.io.readsav('proton_fit.sav’)
alpha0=data['alpha0']
读取mat 格式
import scipy.io
mat = scipy.io.loadmat('file.mat’)
event_l_shell=mat['event_l_shell’] ---> 转成float
快速保存读入
保存多个数组
ar2 = np.arange(4)
np.savez(r'C:\python数据分析\arraytest1.npz',my_name1=ar1, my_name2=ar2)
读取
A=np.load('array_save.npz')
ar1=a['my_name1']
保存h5文件
f = h5py.File('../output/Exy.h5','w')
iz_s,iz_e = 468,676
z = p.z[iz_s:iz_e]
ex = ex[iz_s:iz_e,:]
ey = ey[iz_s:iz_e,:]
f.create_dataset('wt',data=wt)
f.create_dataset('z',data=z)
f.create_dataset('ex',data=ex)
f.create_dataset('ey',data=ey)
f.close()