如何在python中查找和替换区分大小写的整词

import re import textwrap import traceback import unittest def replace_words(content, replacements): rc = re.compile(r"[A-Za-z_]\w*") def translate(match): word = match.group(0) return replacements.get(word, word) return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE) class class_name(unittest.TestCase): def setUp(self): self.replacements = [ { 'PLUS': '"+"', 'DASH': '"-"', 'BANG': '"!"', 'TILDE': '"~"', 'STAR': '"*"', 'SLASH': '"/"', 'PERCENT': '"%"', 'LEFT_PAREN': '"("', 'RIGHT_PAREN': '")"' }, { "IF": "fi", "FOO": "oof", "BAR": "rab", "OP_FOO": "oof_op" } ] self.texts = [ textwrap.dedent("""\ variable_identifier : IDENTIFIER primary_expression : foo1 foo2 foo3 LEFT_PAREN expression RIGHT_PAREN unary_operator : PLUS DASH BANG TILDE multiplicative_expression : unary_expression multiplicative_expression STAR unary_expression multiplicative_expression SLASH unary_expression multiplicative_expression PERCENT unary_expression\ """), textwrap.dedent("""\ IF identifier IDENTIFIER FOO BAR BARycentric OP_FOO """) ] self.expected_results = [ textwrap.dedent("""\ variable_identifier : IDENTIFIER primary_expression : foo1 foo2 foo3 "(" expression ")" unary_operator : "+" "-" "!" "~" multiplicative_expression : unary_expression multiplicative_expression "*" unary_expression multiplicative_expression "/" unary_expression multiplicative_expression "%" unary_expression\ """), textwrap.dedent("""\ fi identifier IDENTIFIER oof rab BARycentric oof_op """) ] def _tester(self, f): replacements = self.replacements expected_results = self.expected_results texts = self.texts self.assertEqual(f(texts[0], replacements[0]), expected_results[0]) self.assertEqual(f(texts[1], replacements[1]), expected_results[1]) def test_replace_words(self): self._tester(replace_words) if __name__ == "__main__": unittest.main()

1条回答

网友

1楼 · 发布于 2024-04-20 12:20:42

您可以使用re.sub和re.findall：

import re
def regex_string(d, to_lower = False):
   if not to_lower: 
     return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
   return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])

replacements = {
    'PLUS': '"+"',
    'DASH': '"-"',
    'BANG': '"!"',
    'TILDE': '"~"',
    'STAR': '"*"',
    'SLASH': '"/"',
    'PERCENT': '"%"',
    'LEFT_PAREN': '"("',
    'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])

输出（case 1）：

variable_identifier :
IDENTIFIER
primary_expression :
   foo1
   foo2
   foo3
   "(" expression ")"
unary_operator :
   "+"
   "-"
   "!"
   "~"
multiplicative_expression :
   unary_expression
   multiplicative_expression "*" unary_expression
   multiplicative_expression "/" unary_expression
   multiplicative_expression "%" unary_expression

输出（case 2）：

fi identifier IDENTIFIER oof rab BARycentric
oof_op

或者，甚至更短：

replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)

相关问题更多 >

编程相关推荐

热门问题

热门文章