擅长:python、mysql、java
<p>如果我正确理解了你的“破单”策略(如果我不理解,请原谅),以下是一个可能完成此任务的脚本:</p>
<pre class="lang-py prettyprint-override"><code>import pandas as pd
# helper function to break sheets with 3 consecutive NaN in any column
def df_breaker(df):
res = pd.DataFrame()
nan_ind = pd.DataFrame({k:[0] for k in df.columns})
for row in df.iloc:
nan_ind=(row.isna()*nan_ind)+row.isna()
if (nan_ind.iloc[0]>=3).any():
return res.iloc[:-2]
res=res.append(row)
return res
# shall be broken after second row
dfa = pd.DataFrame({'c1':[0,1,None,None,None],'c2':[5,6,7,8,9]})
# shall not be broken
dfb = pd.DataFrame({'c1':[10,None,12,None,14],'c2':[None,16,None,18,19]})
# shall not be broken
dfc = pd.DataFrame({'c1':[20,21,22,23,24],'c2':[25,26,27,28,29]})
# shall not be broken
dfd = pd.DataFrame({'c1':[30,31,32,33,34],'c2':[35,36,37,38,39]})
input_files = [{'sheet1':dfa, 'sheet2':dfb},{'sheet1':dfc, 'sheet2':dfd}]
d_sheets ={}
for key in input_files[0]:
d_sheets[key]=pd.concat([df_breaker(k[key]) for k in input_files])
</code></pre>