将逻辑字符串转换为JSON

2024-04-23 10:22:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我想转换为:

输入:

"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"

其中“#”-意图 “@”-实体 “:”-值

输出:

{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]}, 
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, 
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, 
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]} 
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data 
plans"]} ]}
]}
]}
]}

我尝试过:

import pyparsing

identifier = pyparsing.QuotedString('"')
operator = (
    pyparsing.Literal("==") |
    pyparsing.Literal("≠") |
    pyparsing.Literal("≥") |
    pyparsing.Literal("≤") |
    pyparsing.Literal("<") |
    pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')

match_format = identifier + operator + value

  #print(match_format.parseString('"foobar"=="123"'))
  def list_to_dict(pos, tokens):
    dic = {}
    lis =[]
   print(tokens)
  abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens


 match_format = (identifier + operator + 
  value).setParseAction(list_to_dict)

 print(match_format.parseString('"intent"=="serviceRequest"'))

提供:

{'==': {'intent', 'serviceRequest'}}

请帮助我使用解析(Python)或任何你想要的替代方法?你知道吗


Tags: andformatvaluerequestvarmatchservicesome
1条回答
网友
1楼 · 发布于 2024-04-23 10:22:10

您可以创建一个更简单的标记器来链接解析器:

import re
class Token:
  grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
  def __init__(self, val, _type):
     self.val, self._type = val, _type
  @property
  def is_cond(self):
     return self._type == 'cond'
  @property
  def is_desc(self):
     return self._type in {'intent', 'entity', 'value'}
  @property
  def var_name(self):
     return f'{self._type}s' if self._type == 'intent' else 'entities'
  @classmethod
  def tokenize(cls, _input):
     return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
  def __repr__(self):
     return f'{self.__class__.__name__}(value={self.val}, type={self._type})'

现在,可以创建一个简单的解析器:

from itertools import groupby
class AST:
   def __init__(self, stream):
      self.stream = iter(stream)
   def p_parse(self, stream):
      _r, _id, _name = [], None, ''
      for i in stream:
         if i._type == 'value':
            if _name:
               _r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
         elif i.is_desc:
            _id = i
         else:
            _name = i.val
            _r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
      return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
   def parse(self, seen=None):
      a, b = next(self.stream, [None, None])
      if a is not None:
         return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen,  self.parse()]}
      return seen
   @classmethod
   def _group(cls, _tokens):
     return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])

现在,组合组件:

s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()

输出:

{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}

毫无疑问,对于这个问题有较短的解决方案,但是,标记器和解析器的目标都是使您能够更容易地在将来扩展这个解决方案,以处理不能由更“黑客”的解决方案处理的输入。你知道吗

相关问题 更多 >