从路径和值创建嵌套Dict

2024-06-17 12:56:30 发布

您现在位置:Python中文网/ 问答频道 /正文

我需要在Python: recursively create dictionary from paths中提出类似请求的帮助,但我的路径也有列表。 我有时也有索引路径,如下所示

PATH       VALUE
/a/b/c      'Hi'
/a/b/d       1
/a/c/d       1
/b/c/d       1
/a/e[0]/f    1
/a/e[1]/g    2
/b/x/y[1]    'thank'
/b/x/y[2]    'you'
/b/j/b/c     2
/b/j/b/d     1
/a/e[2]/k[0]/s     '2D_1'
/a/e[2]/k[1]/s     '2D_2'

我正在查找的预期输出字典如下:

{
    "a": {
        "b": {
            "c": "Hi",
            "d": 1
        },
        "c": {
            "d": 1
        },
        "e": [
            {
                "f": 1
            },
            {
                "g": 2
            },
            {
                "k": [
                    {
                        "s": "2D_1"
                    },
                    {
                        "s": "2D_2"
                    }
                ]
            }
        ]
    },
    "b": {
        "c": {
            "d": 1
        },
        "x": {
            "y": [
                null,
                "thank",
                "you"
            ]
        },
        "j": {
            "b": {
                "c": 2,
                "d": 1
            }
        }
    }
}

注意:路径可以超过4个部分(任意数量)。 递归方法来处理这个问题,并从路径和值创建dict

我尝试了下面的示例代码,但仍然坚持列表处理

import re

def create_dict(data,path,value):
    nodes = path.split('/')
    thisdict = data
    index = -1
    for node in nodes[:-1]:
        keyislist = False
        if '[' in node :
            index = int(re.findall(r"\[\s*\+?(-?\d+)\s*\]", node)[0])
            node = node.split('[')[0]
            keyislist = True
        if (node not in thisdict):
            if keyislist:
                thisdict[node] = []
            elif isinstance(thisdict,dict):
                thisdict[node] = {}
        if isinstance(thisdict[node],dict):
            thisdict = thisdict[node]
        elif isinstance(thisdict[node],list):
            thisdict[node].insert(index,thisdict[node])
    thisdict[nodes[-1]] = value
    return data

data = {}
keys = '/a/b/c[0]/d/e/f'
value = 123456
path = keys[1:]
print(create_dict(data,path,value))

print('---------------')

keys = '/a/b/c[1]/d/e/g'
value = 'ABCDEFG'
path = keys[1:]
print(create_dict(data,path,value))

还增加了2个路径。路径k[1]的顺序可以先为k[0]路径,然后为k[0]路径

/a/e[2]/k[1]/s     '2D_2'
/a/e[2]/k[0]/s     '2D_1'

Tags: pathin路径nodedataindexifvalue
2条回答

与链接代码一样,预期结果无效,因此我对您的意图进行了一两次猜测

首先,

{'b':
    {'c': 'Hi' },
    {'d': 1 }
},

是一个语法错误。这样的键不能有两个值'b'必须是一个列表或一个dict。由于您已经尽力将列表添加到规范中,我想这应该是一个dict

其次,{'y' : ['thank' , 'you']}似乎是一个令人惊讶的结果

/b/x/y[1]    'thank'
/b/x/y[2]    'you'

它使用索引1和2。如果想要原始结果,请使用.append(混淆!),或修复输入中的索引(不混淆)

除此之外,解析新的列表需求还需要使用正则表达式挑选索引和元素,并使用索引和键进入下一个嵌套级别

import json
import re

def add_path(d, path, val):
    path = path.split("/")[1:]

    for i, e in enumerate(path):
        if re.search(r".?\[\d+\]$", e):
            e, idx = re.fullmatch(r"(.+)\[(\d+)\]", e).groups()
            idx = int(idx)

            if e not in d:
                d[e] = [None] * (idx + 1)
            elif len(d[e]) <= idx:
                d[e] += [None] * (idx - len(d[e]) + 1)

            if i == len(path) - 1:
                d[e][idx] = val 
            elif not d[e][idx]:    
                d[e][idx] = {}

            d = d[e][idx]
        else:
            if i == len(path) - 1:
                d[e] = val
            else:
                if e not in d:
                    d[e] = {}

                d = d[e]

if __name__ == "__main__":
    data = """
    /a/b/c      'Hi'
    /a/b/d       1
    /a/c/d       1
    /b/c/d       1
    /a/e[0]/f    1
    /a/e[1]/g    2
    /b/x/y[1]    'thank'
    /b/x/y[2]    'you'
    /b/j/b/c     2
    /b/j/b/d     1
    """
    d = {}

    def clean(x):   
        try:
            return int(x)
        except ValueError:
            return x.strip(" '")

    for path, val in [[clean(x) for x in re.split(r"\s{4,}", x)][1:] 
                      for x in data.split("\n") if x.strip()]:
        add_path(d, path, val)
    
    print(json.dumps(d, indent=4))

输出:

{
    "a": {
        "b": {
            "c": "Hi",
            "d": 1
        },
        "c": {
            "d": 1
        },
        "e": [
            {
                "f": 1
            },
            {
                "g": 2
            }
        ]
    },
    "b": {
        "c": {
            "d": 1
        },
        "x": {
            "y": [
                null,
                "thank",
                "you"
            ]
        },
        "j": {
            "b": {
                "c": 2,
                "d": 1
            }
        }
    }
}

把这段代码整理一下,留给读者作为练习

哈哈。非常类似于@ggorlen的

import re
import pprint

class Parser(object):
  def __init__(self):
    self.index_pattern = re.compile(r'([^[]*)\[(\d+)\]')

  def Add(self, tree, path, value):
    for seg in path[:-1]:
      match = self.index_pattern.fullmatch(seg)
      if match:
        lst, ix = self.AddList(match, tree, dict)
        tree = lst[ix]
      else:
        node = tree[seg] if seg in tree else {}
        tree[seg] = node
        tree = node

    match = self.index_pattern.fullmatch(path[-1])
    if match:
      lst, ix = self.AddList(match, tree, lambda: None)
      lst[ix] = value
    else:
      tree[path[-1]] = value

  @staticmethod
  def AddList(match, tree, ctor):
    name = match.group(1)
    ix = int(match.group(2))
    lst = tree[name] if name in tree else []
    lst.extend(ctor() for i in range(ix - len(lst) + 1))
    tree[name] = lst
    return lst, ix

  def Process(self, data):
    tree = {}
    for path, value in data.items():
      self.Add(tree, path.split('/')[1:], value)
    return tree

def Run():
  data = {
    '/a/b/c': 'Hi',
    '/a/b/d': 1,
    '/a/c/d': 1,
    '/b/c/d': 1,
    '/a/e[0]/f': 1,
    '/a/e[1]/g': 2,
    '/b/x/y[1]': 'thank',
    '/b/x/y[2]': 'you',
    '/b/j/b/c': 2,
    '/b/j/b/d': 1,
  }
  pprint.pprint(Parser().Process(data))

Run()

输出:

{'a': {'b': {'c': 'Hi', 'd': 1}, 'c': {'d': 1}, 'e': [{'f': 1}, {'g': 2}]},
 'b': {'c': {'d': 1},
       'j': {'b': {'c': 2, 'd': 1}},
       'x': {'y': [None, 'thank', 'you']}}}

相关问题 更多 >