Pandas对datafram的部分执行自定义操作

[Name] STREAM [Data] X [ m ] , Y [ m ] , Z [ m ] , Streamline Number, Time [ s ] 9.310345E-01 , 2.027650E+00 , 0.000000E+00, 0.000000E+00 , 0.000000E+00 2.837438E+00 , 1.926267E+00 , 0.000000E+00, 0.000000E+00 , 5.000000E-01 9.310345E-01 , 2.990784E+00 , 0.000000E+00, 1.000000E+00 , 0.000000E+00 3.280788E+00 , 3.903226E+00 , 0.000000E+00, 1.000000E+00 , 2.000000E-01 6.650246E-01 , 6.133641E+00 , 0.000000E+00, 2.000000E+00 , 0.000000E+00 1.463054E+00 , 5.728111E+00 , 0.000000E+00, 2.000000E+00 , 5.000000E-01 7.536946E-01 , 1.008333E+01 , 0.000000E+00, 3.000000E+00 , 0.000000E+00 2.128079E+00 , 1.008333E+01 , 0.000000E+00, 3.000000E+00 , 5.000000E-01 3.546798E-01 , 1.043982E+01 , 0.000000E+00, 4.000000E+00 , 0.000000E+00 3.857143E+00 , 1.043982E+01 , 0.000000E+00, 4.000000E+00 , 1.000000E+01 5.098522E+00 , 1.115207E+00 , 0.000000E+00, 0.000000E+00 , 1.000000E+00 4.832512E+00 , 3.903226E+00 , 0.000000E+00, 1.000000E+00 , 4.000000E-01 6.162561E+00 , 3.142857E+00 , 0.000000E+00, 1.000000E+00 , 6.000000E-01 2.571429E+00 , 5.626728E+00 , 0.000000E+00, 2.000000E+00 , 1.000000E+00 4.300493E+00 , 5.423963E+00 , 0.000000E+00, 2.000000E+00 , 2.000000E+00 4.078818E+00 , 9.930555E+00 , 0.000000E+00, 3.000000E+00 , 7.500000E-01 5.320197E+00 , 9.625000E+00 , 0.000000E+00, 3.000000E+00 , 1.000000E+00 7.980296E+00 , 1.023611E+01 , 0.000000E+00, 4.000000E+00 , 1.500000E+01 8.068966E+00 , 1.165899E+00 , 0.000000E+00, 0.000000E+00 , 1.500000E+00 7.226601E+00 , 3.396313E+00 , 0.000000E+00, 1.000000E+00 , 8.000000E-01 7.581281E+00 , 2.179724E+00 , 0.000000E+00, 1.000000E+00 , 1.000000E+00 5.231527E+00 , 5.373272E+00 , 0.000000E+00, 2.000000E+00 , 3.000000E+00 6.118227E+00 , 5.322581E+00 , 0.000000E+00, 2.000000E+00 , 4.000000E+00 6.783251E+00 , 9.268518E+00 , 0.000000E+00, 3.000000E+00 , 1.500000E+00

import pandas as pd import numpy as np import time from scipy import interpolate # stream_orig = pd.read_csv('streamlines_example.csv',header=3,names=['X','Y','Z','num','time']) #reading df stream_orig['num'] = stream_orig['num'].astype(int) #converting streamline numbers into integers stream = stream_orig.sort_values(by=['num', 'time']) #sorting by streamline number, than by time stream.reset_index(drop=True, inplace = True) #resetting index # numstream = list(set(stream['num'])) #list of streamline numbers # start = time.time() L_dist = [] #initializing empty list for distance values C_dist = [] #initializing empty list for cumulative distance values for i in numstream: L_dist.append(-1000.0) #first value of each streamline is set to -1000.0 C_dist.append(-1000.0) #first value of each streamline is set to -1000.0 np_points = np.array(stream[stream['num']==i][['X','Y','Z']]) dist = np.sqrt(np.sum((np_points[0:-1] - np_points[1:])**2, axis=1)) #evaluating distance between each point and the previous one cumdist = np.cumsum(dist) #evaluating cumulative distance L_dist.extend(list(dist)) #extending distance list C_dist.extend(list(cumdist)) #extending cumulative distance list # stream['dist'] = L_dist stream['abscissa'] = C_dist # stream = stream[stream.dist != 0] #deleting points with "0.0" distance (coincident points) stream.drop(columns=['time', 'dist'],inplace = True) #deleting now useless columns # stream.replace(-1000, 0.0, inplace = True) #first element of each streamline need to have 0.0 distance and 0.0 cumulative distance # ### Deleting streamline containing just 1 point ### for i in numstream: if len(stream[stream['num']==i])<2: indexList = stream[stream['num']==i].index # Delete these row indexes from dataFrame stream.drop(indexList , inplace=True) # numstream = list(set(stream['num'])) #updating list of streamline numbers stream.reset_index(drop=True, inplace = True) # ### Sampling each streamline ### df_new = np.zeros(shape=(0,5)) dist_sampling = 0.5 for i in numstream: t1 = time.time() old = list(stream[stream['num']==i]['abscissa']) NP = int((old[-1]-old[0])/dist_sampling) f= interpolate.interp1d(np.array(stream[stream['num']==i]['abscissa']),np.array(stream[stream['num']==i]),axis=0) new = np.linspace(old[0],old[-1],NP) datanew = f(new) df_new = np.append(df_new,datanew,axis=0) # df_out = pd.DataFrame(df_new,columns=['X','Y','Z','num','abscissa']) df_out.to_csv('streamline_example_updated.csv') end = time.time() elapsed = end-start # print ('Elapsed time = ', elapsed)

0条回答

目前没有回答

相关问题更多 >

编程相关推荐

热门问题

热门文章