从ts中的列索引生成“特殊”字典结构

9606 1 GO:0002576 TAS - platelet degranulation - Process 9606 1 GO:0003674 ND - molecular_function_z - Function 9606 1 GO:0003674 OOO - molecular_function_z - Function 9606 1 GO:0005576 IDA - extracellular region - Component 9606 1 GO:0005576 TAS - extracellular region - Component 9606 1 GO:0005576 OOO - extracellular region - Component 9606 1 GO:0005615 HDA - extracellular spaces - Component 9606 1 GO:0008150 ND - biological_processes - Process 9606 1 GO:0008150 OOO - biological_processes - Process 9606 1 GO:0008150 HHH - biological_processes - Process 9606 1 GO:0008150 YYY - biological_processes - Process 9606 1 GO:0031012 IDA - extracellular matrix - Component 9606 1 GO:0043312 TAS - neutrophil degranulat - Process

three_userinput = "8:2:3" three = map(lambda x: int(x) - 1, three_userinput.split(":")) DICT3 = {} for line in file_handle: info = line.split("\t") if info[three[0]] in DICT3: if info[three[1]] in DICT3[info[three[0]]]: DICT3[info[three[0]]][info[three[1]]].add(info[three[2]]) else: DICT3[info[three[0]]][info[three[1]]] = set([info[three[2]]]) else: DICT3[info[three[0]]] = {info[three[1]]:set([info[three[2]]])} pprint.pprint(DICT3)

four_userinput = "8:2:3:4" four = map(lambda x: int(x) - 1, four_userinput.split(":")) DICT4 = {} for line in file_handle: info = line.split("\t") if info[four[0]] in DICT4: if info[four[1]] in DICT4[info[four[0]]]: if info[four[2]] in DICT4[info[four[0]]][info[four[1]]]: DICT4[info[four[0]]][info[four[1]]][info[four[2]]].add(info[four[3]]) else: DICT4[info[four[0]]][info[four[1]]][info[four[2]]] = set([info[four[3]]]) else: DICT4[info[four[0]]][info[four[1]]] = {info[four[2]]:set([info[four[3]]])} else: DICT4[info[four[0]]] = {info[four[1]]:{info[four[2]]:set([info[four[3]]])}} pprint.pprint(DICT4)

{'Component': {'1': {'GO:0005576': set(['IDA', 'OOO', 'TAS']), 'GO:0005615': set(['HDA']), 'GO:0031012': set(['IDA'])}}, 'Function': {'1': {'GO:0003674': set(['ND', 'OOO'])}}, 'Process': {'1': {'GO:0002576': set(['TAS']), 'GO:0008150': set(['HHH', 'ND', 'OOO', 'YYY']), 'GO:0043312': set(['TAS'])}}}

2条回答

网友

1楼 · 编辑于 2024-04-26 10:39:25

你需要的是一个^{}。这允许您更新条目，而不必首先测试它们是否存在。i、 e.如果不存在，则自动添加默认值。由于有多个级别，因此需要使用build_defaultdict(levels)函数递归地创建嵌套的defaultdict。设置值也需要递归，但逻辑更简单：

import pprint
import csv
from operator import itemgetter
from collections import defaultdict


def build_defaultdict(levels):
    return defaultdict(set) if levels <= 1 else defaultdict(lambda : build_defaultdict(levels - 1))


def set_value(d, row):
    if len(row) <= 2:
        d[row[0]].add(row[1])
    else:
        d[row[0]] = set_value(d[row[0]], row[1:])

    return d


req_cols = [7, 1, 2, 3]     # counting from col 0

data = build_defaultdict(len(req_cols) - 1)
get_cols = itemgetter(*req_cols)

with open('input.csv', 'r', newline='') as f_input:
    for row in csv.reader(f_input, delimiter='\t'):
        set_value(data, get_cols(row))

pprint.pprint(data)
print(data['Component']['1']['GO:0005576'])

这将创建字典，如下所示：

defaultdict(<function <lambda> at 0x000002350F481B70>,
    {
        'Component': defaultdict(<function <lambda>.<locals>.<lambda> at 0x000002350F6EB378>,
            {'1': defaultdict(<class 'set'>,
                {'GO:0005576': {'IDA', 'OOO', 'TAS'},
                 'GO:0005615': {'HDA'},
                 'GO:0031012': {'IDA'}})}),
        'Function': defaultdict(<function <lambda>.<locals>.<lambda> at 0x000002350F6EB400>,
            {'1': defaultdict(<class 'set'>,
                {'GO:0003674': {'ND', 'OOO'}})}),
     'Process': defaultdict(<function <lambda>.<locals>.<lambda> at 0x00000235071BE0D0>,
            {'1': defaultdict(<class 'set'>,
                {'GO:0002576': {'TAS'},
                 'GO:0008150': {'HHH', 'ND', 'OOO', 'YYY'},
                 'GO:0043312': {'TAS'}})})})

{'TAS', 'OOO', 'IDA'}

它的显示方式可能与普通词典不同，但其工作方式与普通词典相同。也可以使用itemgetter()将所需元素从一个列表提取到另一个列表中。你知道吗

网友

2楼 · 编辑于 2024-04-26 10:39:25

您可以定义一个递归函数来实现这一点。你知道吗

def update_nested_dict(d, vars):
    if len(vars) > 2:
        try:
            d[vars[0]] = update_nested_dict(d[vars[0]], vars[1:])
        except KeyError:
            d[vars[0]] = update_nested_dict({}, vars[1:])
    else:
        try:
            d[vars[0]] = d[vars[0]].union([vars[1]])
        except KeyError:
            d[vars[0]] = set([vars[1]])
    return d

根据需要保留尽可能多的代码逻辑和变量名

>>> userinput = "8:2:3:4"
>>> cols = map(lambda x: int(x) - 1, userinput.split(":"))
>>> 
>>> DICT = {}
>>> 
>>> for line in file_handle:
>>>     info = line.replace("\n", "").split("\t")
>>>     names = [info[c] for c in cols]
>>>     _ = update_nested_dict(DICT, names)
>>>
>>> for k, v in DICT.iteritems():
...  print k, v
...
Process {'1': {'GO:0002576': set(['TAS']), 'GO:0008150': set(['YYY', 'OOO', 'HHH', 'ND']), 'GO:0043312': set(['TAS'])}}
Function {'1': {'GO:0003674': set(['OOO', 'ND'])}}
Component {'1': {'GO:0005576': set(['OOO', 'IDA', 'TAS']), 'GO:0005615': set(['HDA']), 'GO:0031012': set(['IDA'])}}

相关问题更多 >

编程相关推荐

热门问题

热门文章