下面是一个简单的例子,我想从中得出一系列元素(本例中为粒度)的不同条件(生物膜、SS_mgL、%MP)等的平均复制。正如你们将看到的,我已经非常粗略地处理了这个问题,但我相信有一个更优雅的方法来做到这一点。谢谢你的建议
# Load the Pandas libraries with alias 'pd'
import pandas as pd
# Load the Numpy libraries with alias 'np'
import numpy as np
# Load the Matplotlib library pyplot with alias 'plt'
from matplotlib import pyplot as plt
#Load data from my public Github repository
url = 'https://raw.githubusercontent.com/matt-salter/public/master/test.csv'
df = pd.read_csv(url,sep=';')
# Define size arrays etc.
midpoint = np.array([1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,58.5])
# Average size distribution for condition Biofilm=0, SS_mgL=10, %MP=0
size_dist = np.array([df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 1.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 2.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 3.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 4.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 5.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 6.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 7.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 8.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 9.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 10.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 11.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 12.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 13.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 14.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 15.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 16.5), 'dn/dlogDP'].mean(),
df.loc[(df['Biofilm'] == 0) & (df['SS_mgL'] == 10) & (df['%MP'] == 0) & (df['Midpoint'] == 58.5), 'dn/dlogDP'].mean()])
plt.semilogx(midpoint,size_dist)
plt.xlim([1,100])
plt.xlabel('Particle size ($\mu$m)')
plt.ylabel('dn/dlog$_{Dp}$')
您可以使用
groupby
后跟agg
函数来获得所需的输出:如果您希望在特定范围内设置
Midpoint
上的过滤器,也可以预先设置相关问题 更多 >
编程相关推荐