在pandas中分析时间序列?
我最近发现了pandas,这个工具看起来非常有趣。到目前为止,我通常在Python中使用结构化的掩码数组,而pandas似乎可以提供一个替代方案,特别是因为我经常处理时间序列数据。不过,在每个时间点上,我有一些标量数据和一些数组数据:标量时间、标量纬度、标量经度,以及向量压力和向量剖面:
dtype([('lat', '<f4'), ('ch4_profile', '<f4', (110,)), ('time', '<M8[us]'), ('lon', '<f4'), ('p', '<f4', (110,))])
我会用这些数据进行分析,比如找出与其他数据集的匹配,或者进行可视化,比如单独可视化每个剖面,或者在地图上显示每个剖面的一些统计信息。
如果我理解得没错,pandas的DataFrame是二维的,而Panel是三维的。有没有pandas的类似工具可以用来描述我提到的数据?或者我是否应该继续使用(更灵活的?)标准ndarray
(暂时)?
编辑:示例数据
根据评论,这里有一些示例数据,适合复制粘贴到代码中(请注意,在实际情况下,我使用的是numpy.ma.MaskedArray
)。
array([ (70.00604248046875, [1.8543829917907715, 1.8604300022125244, 1.8616620302200317, 1.8601950407028198, 1.851915955543518, 1.8489810228347778, 1.8222110271453857, 1.8655049800872803, 1.7066600322723389, 1.687608003616333, 1.2694480419158936, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0], datetime.datetime(2010, 3, 23, 2, 24, 54, 210000), -52.05939865112305, [863.52294921875, 669.9515380859375, 583.3341064453125, 507.91802978515625, 442.25689697265625, 385.1185607910156, 335.3567199707031, 292.0868225097656, 246.5142822265625, 183.48988342285156, 72.71968078613281, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0]),
(83.92768859863281, [1.8359440565109253, 1.8322449922561646, 1.834841012954712, 1.824031949043274, 1.8172659873962402, 1.8016170263290405, 1.8238199949264526, 1.6459150314331055, 1.6390000581741333, 1.2601679563522339, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0], datetime.datetime(2010, 3, 30, 21, 3, 12, 478000), -66.69522094726562, [850.73779296875, 612.4067993164062, 513.5870361328125, 447.0830993652344, 389.2227478027344, 338.8443298339844, 295.01129150390625, 248.8583984375, 190.45912170410156, 78.76023864746094, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0])],
dtype=[('lat', '<f4'), ('ch4_profile', '<f4', (110,)), ('time', '<M8[us]'), ('lon', '<f4'), ('p', '<f4', (110,))])
1 个回答
1
我把你的数据整理成了一种扁平的格式,然后放进了一个数据框里:
import numpy as np
import datetime
import pandas
x = np.array([
(70.00604248046875, [1.8543829917907715, 1.8604300022125244, 1.8616620302200317, 1.8601950407028198, 1.851915955543518, 1.8489810228347778, 1.8222110271453857, 1.8655049800872803, 1.7066600322723389, 1.687608003616333, 1.2694480419158936, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0], datetime.datetime(2010, 3, 23, 2, 24, 54, 210000), -52.05939865112305, [863.52294921875, 669.9515380859375, 583.3341064453125, 507.91802978515625, 442.25689697265625, 385.1185607910156, 335.3567199707031, 292.0868225097656, 246.5142822265625, 183.48988342285156, 72.71968078613281, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0]),
(83.92768859863281, [1.8359440565109253, 1.8322449922561646, 1.834841012954712, 1.824031949043274, 1.8172659873962402, 1.8016170263290405, 1.8238199949264526, 1.6459150314331055, 1.6390000581741333, 1.2601679563522339, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0], datetime.datetime(2010, 3, 30, 21, 3, 12, 478000), -66.69522094726562, [850.73779296875, 612.4067993164062, 513.5870361328125, 447.0830993652344, 389.2227478027344, 338.8443298339844, 295.01129150390625, 248.8583984375, 190.45912170410156, 78.76023864746094, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0])
],
dtype=[('lat', '<f4'), ('ch4', '<f4', (110,)), ('time', '<M8[us]'), ('lon', '<f4'), ('p', '<f4', (110,))]
)
finallist = []
for n, row in enumerate(x):
for m, (p, ch4) in enumerate(zip(row['p'], row['ch4'])):
outputrow = [n, m]
for ic in ['lat', 'lon', 'time']:
outputrow.append(row[ic])
outputrow.extend([p, ch4])
finallist.append(outputrow)
df = pandas.DataFrame(finallist, columns=['series', 'measurement', 'lat', 'lon', 'time', 'p', 'ch4'])
#df.set_index(['series', 'measurement'], inplace=True)
df['p'][df['p'] < 0] = np.nan
df['ch4'][df['ch4'] < 0] = np.nan
print(df.head().to_string())
接下来:
df.head()
series measurement lat lon time p ch4
0 0 0 70.006042 -52.059399 2010-03-23 02:24:54.210000 863.522949 1.854383
1 0 1 70.006042 -52.059399 2010-03-23 02:24:54.210000 669.951538 1.860430
2 0 2 70.006042 -52.059399 2010-03-23 02:24:54.210000 583.334106 1.861662
3 0 3 70.006042 -52.059399 2010-03-23 02:24:54.210000 507.918030 1.860195
4 0 4 70.006042 -52.059399 2010-03-23 02:24:54.210000 442.256897 1.851916
还有:
df.tail()
series measurement lat lon time p ch4
215 1 105 83.927689 -66.695221 2010-03-30 21:03:12.478000 NaN NaN
216 1 106 83.927689 -66.695221 2010-03-30 21:03:12.478000 NaN NaN
217 1 107 83.927689 -66.695221 2010-03-30 21:03:12.478000 NaN NaN
218 1 108 83.927689 -66.695221 2010-03-30 21:03:12.478000 NaN NaN
219 1 109 83.927689 -66.695221 2010-03-30 21:03:12.478000 NaN NaN
然后:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
df.plot(x='ch4', y='p', use_index=True, subplots=True, ax=ax, legend=True)
ax.set_ylabel(r'$p$')
ax.set_xlabel(r'$\mathrm{CH}_4$')
结果是:
或者,如果你能安装seaborn(强烈推荐):
seaborn.lmplot('ch4', 'p', data=df, hue='series', fit_reg=False)
结果是