替换以前出现的字符串

s = "(S (S (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.)) (S (Bellevue Bellevue) (Wash. Wash.))) (S (said said) (S (it it) (S (signed signed) (S (a a) (S (definitive definitive) (S (merger merger) (S (agreement agreement) (S (for for) (S (S (a a) (S (National National) (S (Pizza Pizza) (S (Corp. Corp.) (unit unit))))) (S (to to) (S (acquire acquire) (S (S (S (the the) (S (90.6 90.6) (% %))) (S (S (of of) (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.))) (S (it it) (S (does does) (S (n't n't) (own own)))))) (S (for for) (S (S (11.50 11.50) (S (a a) (share share))) (S (or or) (S (about about) (S (28.1 28.1) (million million)))))))))))))))))))"

out = "(S (S (S (S (S Skipper) (S 's)) (S Inc.)) (S (S Bellevue) (S Wash.))) (S (S said) (S (S it) (S (S signed) (S (S a) (S (S definitive) (S (S merger) (S (S agreement) (S (S for) (S (S (S a) (S (S National) (S (S Pizza) (S (S Corp.) (S unit))))) (S (S to) (S (S acquire) (S (S (S (S the) (S (S 90.6) (S %))) (S (S (S of) (S (S (S Skipper) (S 's)) (S Inc.))) (S (S it) (S (S does) (S (S n't) (S own)))))) (S (S for) (S (S (S 11.50) (S (S a) (S share))) (S (S or) (S (S about) (S (S 28.1) (S million)))))))))))))))))))"

from collections import Counter lst = s.lstrip("(").rstrip(")").replace("(", "").replace(")", "").split() d = Counter(lst) mapper = {((k + " ") * v).strip():"S" + " " + k for k, v in d.items()} for k, v in mapper.items(): out = s.replace(k, v)

out = "(S (S (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.)) (S (S Bellevue) (S Wash.))) (S (S said) (S (it it) (S (S signed) (S (a a) (S (S definitive) (S (S merger) (S (S agreement) (S (for for) (S (S (a a) (S (S National) (S (S Pizza) (S (S Corp.) (S unit))))) (S (S to) (S (S acquire) (S (S (S (S the) (S (S 90.6) (S %))) (S (S (S of) (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.))) (S (it it) (S (S does) (S (S n't) (S own)))))) (S (for for) (S (S (S 11.50) (S (a a) (S share))) (S (S or) (S (S about) (S (S 28.1) (S million)))))))))))))))))))"

3条回答

网友

1楼 · 编辑于 2024-05-19 20:54:39

您可能需要在这里研究正则表达式。我已经创建了一个demo，它将匹配所有的内括号

有了这些，您可以分析每个匹配项的内容，并根据您的要求进行替换：

import re

s = "(S (S (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.)) (S (Bellevue Bellevue) (Wash. Wash.))) \
     (S (said said) (S (it it) (S (signed signed) (S (a a) (S (definitive definitive) \
     (S (merger merger) (S (agreement agreement) (S (for for) (S (S (a a) \
     (S (National National) (S (Pizza Pizza) (S (Corp. Corp.) (unit unit))))) \
     (S (to to) (S (acquire acquire) (S (S (S (the the) (S (90.6 90.6) (% %))) \
     (S (S (of of) (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.))) (S (it it) \
     (S (does does) (S (n't n't) (own own)))))) (S (for for) (S (S (11.50 11.50) \
     (S (a a) (share share))) (S (or or) (S (about about) (S (28.1 28.1) (million million)))))))))))))))))))"

# Finding all inner brackets:
# - (Skipper Skipper)
# - ('s 's)
# - etc.
double_words = re.findall(r"(\((?:\(??[^\(]*?\)))", s)


for double_word in double_words:
    words = double_word.lstrip("(").rstrip(")").split()
    # First and second word are the same
    if words[0]==words[1]:
        # Replace ('s 's) with (S 's)
        s = s.replace(double_word, f'(S {words[0]})')
        
print(s)

输出

(S (S (S (S (S Skipper) (S 's)) (S Inc.)) (S (S Bellevue) (S Wash.)))      (S (S said) (S (S it) (S (S signed) (S (S a) (S (S definitive)      (S (S merger) (S (S agreement) (S (S for) (S (S (S a)      (S (S National) (S (S Pizza) (S (S Corp.) (S unit)))))      (S (S to) (S (S acquire) (S (S (S (S the) (S (S 90.6) (S %)))      (S (S (S of) (S (S (S Skipper) (S 's)) (S Inc.))) (S (S it)      (S (S does) (S (S n't) (S own)))))) (S (S for) (S (S (S 11.50)      (S (S a) (S share))) (S (S or) (S (S about) (S (S 28.1) (S million)))))))))))))))))))

网友

2楼 · 编辑于 2024-05-19 20:54:39

有这样一种解决方案，它遍历单词列表，找到重复的单词，并替换每个重复单词的第一次出现

s = """(S (S (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.)) (S (Bellevue Bellevue) (Wash. Wash.))) 
     (S (said said) (S (it it) (S (signed signed) (S (a a) (S (definitive definitive) 
     (S (merger merger) (S (agreement agreement) (S (for for) (S (S (a a) 
     (S (National National) (S (Pizza Pizza) (S (Corp. Corp.) (unit unit))))) 
     (S (to to) (S (acquire acquire) (S (S (S (the the) (S (90.6 90.6) (% %))) 
     (S (S (of of) (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.))) (S (it it) 
     (S (does does) (S (n't n't) (own own)))))) (S (for for) (S (S (11.50 11.50) 
     (S (a a) (share share))) (S (or or) (S (about about) (S (28.1 28.1) (million million)))))))))))))))))))"""

word_list = s.split()

for word, next_word in zip(word_list, word_list[1:]):
    if word.replace('(', '').replace(')', '') == next_word.replace('(', '').replace(')', ''):
        word_list[word_list.index(word)] = "(S"
        

s_new = " ".join(word_list)

网友

3楼 · 编辑于 2024-05-19 20:54:39

使用re.sub替换它们：

import re

def sub(matched):
    return f"(S {matched.group(2)})" if matched.group(1) == matched.group(2) else str(matched.groups())

s = '''(S (S (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.)) (S (Bellevue Bellevue) (Wash. Wash.))) 
     (S (said said) (S (it it) (S (signed signed) (S (a a) (S (definitive definitive) 
     (S (merger merger) (S (agreement agreement) (S (for for) (S (S (a a) 
     (S (National National) (S (Pizza Pizza) (S (Corp. Corp.) (unit unit))))) 
     (S (to to) (S (acquire acquire) (S (S (S (the the) (S (90.6 90.6) (% %))) 
     (S (S (of of) (S (S (Skipper Skipper) ('s 's)) (Inc. Inc.))) (S (it it) 
     (S (does does) (S (n't n't) (own own)))))) (S (for for) (S (S (11.50 11.50) 
     (S (a a) (share share))) (S (or or) (S (about about) (S (28.1 28.1) (million million)))))))))))))))))))'''

result = re.sub(r"\(([\.\%\'\w\d]+) ([\.\%\'\w\d]+)\)", sub, s)

输出

相关问题更多 >

编程相关推荐

热门问题

热门文章