from itertools import groupby
from ast import literal_eval as le
# I'm assuming your input file is called 'input.txt'
# which contains the data you gave in your question
with open('input.txt', 'r') as fp:
data = [k.split() for k in fp.read().splitlines()]
sub = {}
for k, v in groupby(sorted(data[1:], key= lambda x: x[0].split('_')[0]), lambda x: x[0].split('_')[0]):
# Remove the 'x3' field if you don't need their results in your code
_, x1, x2, x3 = list(zip(*list(v)))
sub[k] = {'x1': x1, 'x2': x2, 'x3': x3}
for k in sub:
for j in sub[k]:
# if any values of the fields 'x1', 'x2' or 'x3' != 0 it will retuen 1
# otherwise it will return 0
print("{}:{}: {}".format(k, j, 1 if any(le(m) for m in sub[k][j]) else 0))
如果你能负担得起将整个数据集加载到内存中,最好的方法是使用字典按基因名分组:
然后可以使用如下结果:
^{2}$编辑如果您想使用pandas:
又快又脏:
您可以使用来自
itertools
模块的groupby
,以及来自ast
模块的literal_eval
,如下例:输出:
^{2}$相关问题 更多 >
编程相关推荐