Yoctol话语处理实用程序
uttut的Python项目详细描述
按钮
对话系统的话语工具。这个包在处理chatbot话语数据时提供了一些通用的实用程序。
伯特管
要创建用于bert预处理的管道,请查看BERT。
安装
$ pip install uttut
用法
让我们创建一个管道来用英语语句预处理数据。
建造管道
>>>fromuttut.pipeline.pipeimportPipe>>>p=Pipe()>>>p.add('IntTokenWithSpace')>>>p.add('FloatTokenWithSpace')>>>p.add('MergeWhiteSpaceCharacters')>>>p.add('StripWhiteSpaceCharacters')>>>p.add('EngTokenizer')# word-level (ref: BERT)>>>p.add('AddSosEos',checkpoint='result_of_add_sos_eos')>>>p.add('Pad',{'maxlen':5})>>>p.add('Token2Index',{'token2index':{'<sos>':0,'<eos>':1,# for AddSosEos'<unk>':2,'<pad>':3,# for Pad'_int_':4,# for IntTokenWithSpace'_float_':5,# for FloatTokenWithSpace'I':6,'apples':7,},},)
转换
>>>fromuttut.elementsimportDatum,Entity,Intent>>>datum=Datum(utterance='I like apples.',intents=[Intent(label=1),Intent(label=2)],entities=[Entity(start=7,end=13,value='apples',label=7)],)>>>output_indices,intent_labels,entity_labels,label_aligner,intermediate=p.transform(datum)>>>output_indices[0,6,2,7,1,3,3]>>>intent_labels[1,2]>>>entity_labels[0,0,0,7,0,0,0]# intermediate>>>intermediate.get_from_checkpoint('result_of_add_sos_eos')["<sos>","I","like","apples","<eos>"]# label_aligner>>>label_aligner.inverse_transform(entity_labels)[0,0,0,0,0,0,0,7,7,7,7,7,7,0]
变换序列
>>>output_sequence,label_aligner,intermediate=p.transform_sequence('I like apples.')>>>output_sequence[0,6,2,7,1,3,3]# label_aligner>>>label_aligner.transform([0,0,0,0,0,0,0,7,7,7,7,7,7,0])[0,0,0,7,0,0,0]>>>label_aligner.inverse_transform([0,0,0,7,0,0,0])[0,0,0,0,0,0,0,7,7,7,7,7,7,0]# intermediate>>>intermediate.get_from_checkpoint('result_of_add_sos_eos')["<sos>","I","like","apples","<eos>"]
序列化
序列化
>>>serialized_str=p.serialize()
反序列化
>>>fromuttut.pipeline.pipeimportPipe>>>p=Pipe.deserialize(serialized_str)