从JSON文件中删除Puncuation，只在引号内

input_str = 'please, remove all the commas between quotes,"like in here, here, here!"' quotes = False def noCommas(string): quotes = False output = '' for char in string: if char == '"': quotes = True if quotes == False: output += char if char != ',' and quotes == True: output += char return output print noCommas(input_str)

1条回答

网友

1楼 · 发布于 2024-04-25 22:08:46

这应该管用。你知道吗

import re
import json

with open('C:/test/data.json') as json_file:
    data = json.load(json_file)



for idx, v in enumerate(data['data']['phrases']):
    data['data']['phrases'][idx] = re.sub(r'-',' ',data['data']['phrases'][idx])
    data['data']['phrases'][idx] = re.sub(r'[^\w\s]','',data['data']['phrases'][idx])


with open('C:/test/data.json', 'w') as outfile:
    json.dump(data, outfile,  indent=4)

选项2:

以字符串形式加载json。然后使用regex查找双引号之间的所有子字符串。替换/删除所有子字符串中的标点符号，然后写回文件：

import re
import json
import string




with open('C:/test/data.json') as json_file:
    data = json.load(json_file)

data = json.dumps(data)

strings = re.findall(r'"([^"]*)"', data)

for each in strings:
    new_str =  re.sub(r'-',' ', each)
    new_str = new_str.strip(string.punctuation)
    new_str =  re.sub(r',','', new_str)

    data = data.replace('"%s"' %each, '"%s"' %new_str)


with open('C:/test/data_output.json', 'w') as outfile:
    json.dump(json.loads(data), outfile,  indent=4)

相关问题更多 >

编程相关推荐

热门问题

热门文章

从JSON文件中删除Puncuation，只在引号内

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >