解析嵌套字典（allen nlp hierplane_树）

text = "When I was walking to the park yesterday, I saw a man wearing a blue shirt." from allennlp.predictors.predictor import Predictor import allennlp_models.structured_prediction predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/biaffine-dependency-parser-ptb-2020.04.06.tar.gz") tree = predictor.predict(sentence=text) tree = tree['hierplane_tree'] tree

"""Extract nested values from a JSON tree.""" def json_extract(obj, key): """Recursively fetch values from nested JSON.""" arr = [] def extract(obj, arr, key): """Recursively search for values of key in JSON tree.""" if isinstance(obj, dict): for k, v in obj.items(): if isinstance(v, (dict, list)): extract(v, arr, key) elif k == key: arr.append(v) elif isinstance(obj, list): for item in obj: extract(item, arr, key) return arr values = extract(obj, arr, key) return values

# Find every instance of `name` in a Python dictionary. children = json_extract(tree, 'word') print(children) ['walking', 'When', 'I', 'was', 'to', 'park', 'the', 'yesterday', ',', 'saw', 'I', 'man', 'a', 'wearing', 'shirt', 'a', 'blue', '.']

{'word': 'man', 'nodeType': 'dep', 'attributes': ['NOUN'], 'link': 'dep', 'spans': [{'start': 51, 'end': 55}], 'children': [{'word': 'a', 'nodeType': 'det', 'attributes': ['DET'], 'link': 'det', 'spans': [{'start': 49, 'end': 51}]}, {'word': 'wearing', 'nodeType': 'dep', 'attributes': ['VERB'], 'link': 'dep', 'spans': [{'start': 55, 'end': 63}], 'children': [{'word': 'shirt', 'nodeType': 'dep', 'attributes': ['NOUN'], 'link': 'dep', 'spans': [{'start': 70, 'end': 76}], 'children': [{'word': 'a', 'nodeType': 'dep', 'attributes': ['DET'], 'link': 'dep', 'spans': [{'start': 63, 'end': 65}]}, {'word': 'blue', 'nodeType': 'dep', 'attributes': ['ADJ'], 'link': 'dep', 'spans': [{'start': 65, 'end': 70}]}]}]}]}]}]}

{'text': 'When I was walking to the park yesterday , I saw a man wearing a blue shirt .', 'root': {'word': 'walking', 'nodeType': 'root', 'attributes': ['VERB'], 'link': 'root', 'spans': [{'start': 11, 'end': 19}], 'children': [{'word': 'When', 'nodeType': 'dep', 'attributes': ['ADV'], 'link': 'dep', 'spans': [{'start': 0, 'end': 5}]}, {'word': 'I', 'nodeType': 'nsubj', 'attributes': ['PRON'], 'link': 'nsubj', 'spans': [{'start': 5, 'end': 7}]}, {'word': 'was', 'nodeType': 'aux', 'attributes': ['AUX'], 'link': 'aux', 'spans': [{'start': 7, 'end': 11}]}, {'word': 'to', 'nodeType': 'prep', 'attributes': ['ADP'], 'link': 'prep', 'spans': [{'start': 19, 'end': 22}], 'children': [{'word': 'park', 'nodeType': 'pobj', 'attributes': ['NOUN'], 'link': 'pobj', 'spans': [{'start': 26, 'end': 31}], 'children': [{'word': 'the', 'nodeType': 'det', 'attributes': ['DET'], 'link': 'det', 'spans': [{'start': 22, 'end': 26}]}]}]}, {'word': 'yesterday', 'nodeType': 'tmod', 'attributes': ['NOUN'], 'link': 'tmod', 'spans': [{'start': 31, 'end': 41}]}, {'word': ',', 'nodeType': 'dep', 'attributes': ['PUNCT'], 'link': 'dep', 'spans': [{'start': 41, 'end': 43}], 'children': [{'word': 'saw', 'nodeType': 'dep', 'attributes': ['VERB'], 'link': 'dep', 'spans': [{'start': 45, 'end': 49}], 'children': [{'word': 'I', 'nodeType': 'nsubj', 'attributes': ['PRON'], 'link': 'nsubj', 'spans': [{'start': 43, 'end': 45}]}, {'word': 'man', 'nodeType': 'dep', 'attributes': ['NOUN'], 'link': 'dep', 'spans': [{'start': 51, 'end': 55}], 'children': [{'word': 'a', 'nodeType': 'det', 'attributes': ['DET'], 'link': 'det', 'spans': [{'start': 49, 'end': 51}]}, {'word': 'wearing', 'nodeType': 'dep', 'attributes': ['VERB'], 'link': 'dep', 'spans': [{'start': 55, 'end': 63}], 'children': [{'word': 'shirt', 'nodeType': 'dep', 'attributes': ['NOUN'], 'link': 'dep', 'spans': [{'start': 70, 'end': 76}], 'children': [{'word': 'a', 'nodeType': 'dep', 'attributes': ['DET'], 'link': 'dep', 'spans': [{'start': 63, 'end': 65}]}, {'word': 'blue', 'nodeType': 'dep', 'attributes': ['ADJ'], 'link': 'dep', 'spans': [{'start': 65, 'end': 70}]}]}]}]}]}]}, {'word': '.', 'nodeType': 'punct', 'attributes': ['PUNCT'], 'link': 'punct', 'spans': [{'start': 76, 'end': 78}]}]}, 'nodeTypeToStyle': {'root': ['color5', 'strong'], 'dep': ['color5', 'strong'], 'nsubj': ['color1'], 'nsubjpass': ['color1'], 'csubj': ['color1'], 'csubjpass': ['color1'], 'pobj': ['color2'], 'dobj': ['color2'], 'iobj': ['color2'], 'mark': ['color2'], 'pcomp': ['color2'], 'xcomp': ['color2'], 'ccomp': ['color2'], 'acomp': ['color2'], 'aux': ['color3'], 'cop': ['color3'], 'det': ['color3'], 'conj': ['color3'], 'cc': ['color3'], 'prep': ['color3'], 'number': ['color3'], 'possesive': ['color3'], 'poss': ['color3'], 'discourse': ['color3'], 'expletive': ['color3'], 'prt': ['color3'], 'advcl': ['color3'], 'mod': ['color4'], 'amod': ['color4'], 'tmod': ['color4'], 'quantmod': ['color4'], 'npadvmod': ['color4'], 'infmod': ['color4'], 'advmod': ['color4'], 'appos': ['color4'], 'nn': ['color4'], 'neg': ['color0'], 'punct': ['color0']}, 'linkToPosition': {'nsubj': 'left', 'nsubjpass': 'left', 'csubj': 'left', 'csubjpass': 'left', 'pobj': 'right', 'dobj': 'right', 'iobj': 'right', 'pcomp': 'right', 'xcomp': 'right', 'ccomp': 'right', 'acomp': 'right'}}

1条回答

网友

1楼 · 发布于 2024-05-15 04:21:05

这无疑需要优化和清理，但它确实允许您按感兴趣的项（在本例中是man）从AllenNLP解析依赖关系树。希望这能帮助其他人摆脱困境

从文本中，通过提供键/值（单词作为键，人作为值）。你会得到：

助手功能：

def get_entity_attributes(obj, key, value):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
        elif isinstance(obj, list):
            for item in obj:
                if(isinstance(item,dict)):
                    ky,vl = key, value
                    if ky in item and vl == item[ky]:
#                         print(type(item), item)
                        arr.append(item)
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values

def parse_attributes(obj, key):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    values = extract(obj, arr, key)
    return values

# Create list of word tokens after removing stopwords
def get_clean_list(entities):
    filtered_sentence = []

    for word in entities:
        lexeme = nlp.vocab[word]
        if not lexeme.is_stop and not lexeme.is_punct:
            filtered_sentence.append(word) 
    return filtered_sentence

查看输出：

text = "When I was walking to the park yesterday, I saw a man wearing a blue shirt."
tree = predictor.predict(sentence=text)

key = "word"
entity = "man"
entities = get_entity_attributes(tree, key, entity)

for ent in entities:
    if ent['nodeType'] == 'dep':
        attributes = parse_attributes(ent, key)
        clean_attributes = get_clean_list(attributes)
        clean_attributes.remove(entity)
        print(f'entity: {entity} Attributes: {clean_attributes}')
    else:
        attributes = parse_attributes(ent, key)
        clean_attributes = get_clean_list(attributes)
        clean_attributes.remove(entity)
        print(f'entity: {entity} Action Attributes: {clean_attributes}')

给你：

entity: man Attributes: ['wearing', 'shirt', 'blue']

相关问题更多 >

编程相关推荐

热门问题

热门文章