如何在python中查找和替换区分大小写的整词

2024-04-20 12:20:42 发布

您现在位置:Python中文网/ 问答频道 /正文

考虑以下mcve:

import re
import textwrap

import traceback
import unittest


def replace_words(content, replacements):
    rc = re.compile(r"[A-Za-z_]\w*")

    def translate(match):
        word = match.group(0)
        return replacements.get(word, word)
    return rc.sub(translate, content, re.IGNORECASE | re.MULTILINE)


class class_name(unittest.TestCase):

    def setUp(self):
        self.replacements = [
            {
                'PLUS': '"+"',
                'DASH': '"-"',
                'BANG': '"!"',
                'TILDE': '"~"',
                'STAR': '"*"',
                'SLASH': '"/"',
                'PERCENT': '"%"',
                'LEFT_PAREN': '"("',
                'RIGHT_PAREN': '")"'
            }, {
                "IF": "fi",
                "FOO": "oof",
                "BAR": "rab",
                "OP_FOO": "oof_op"
            }
        ]
        self.texts = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    LEFT_PAREN expression RIGHT_PAREN
                unary_operator :
                    PLUS
                    DASH
                    BANG
                    TILDE
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression STAR unary_expression
                    multiplicative_expression SLASH unary_expression
                    multiplicative_expression PERCENT unary_expression\
            """),
            textwrap.dedent("""\
                IF identifier IDENTIFIER FOO BAR BARycentric
                OP_FOO
            """)
        ]
        self.expected_results = [
            textwrap.dedent("""\
                variable_identifier :
                    IDENTIFIER
                primary_expression :
                    foo1
                    foo2
                    foo3
                    "(" expression ")"
                unary_operator :
                    "+"
                    "-"
                    "!"
                    "~"
                multiplicative_expression :
                    unary_expression
                    multiplicative_expression "*" unary_expression
                    multiplicative_expression "/" unary_expression
                    multiplicative_expression "%" unary_expression\
            """),
            textwrap.dedent("""\
                fi identifier IDENTIFIER oof rab BARycentric
                oof_op
            """)
        ]

    def _tester(self, f):
        replacements = self.replacements
        expected_results = self.expected_results
        texts = self.texts
        self.assertEqual(f(texts[0], replacements[0]), expected_results[0])
        self.assertEqual(f(texts[1], replacements[1]), expected_results[1])

    def test_replace_words(self):
        self._tester(replace_words)


if __name__ == "__main__":
    unittest.main()

replace_words函数正试图使用上面代码的替换字典来搜索和替换给定文本中区分大小写的整词,但是它在self.assertEqual(f(texts[0], replacements[0]), expected_results[0])行中会失败,我不知道为什么。你知道吗

所以问题是,如何在python中使用替换字典查找和替换区分大小写的整词?你知道吗


Tags: importselfredefresultsreplaceexpectedwords
1条回答
网友
1楼 · 发布于 2024-04-20 12:20:42

您可以使用re.subre.findall

import re
def regex_string(d, to_lower = False):
   if not to_lower: 
     return '|'.join(r'\b{}\b'.format(i) for i in d.keys())
   return '|'.join([c for b in [[r'\b{}\b'.format(i.lower()), r'\b{}\b'.format(i)] for i in d.keys()] for c in b])

replacements = {
    'PLUS': '"+"',
    'DASH': '"-"',
    'BANG': '"!"',
    'TILDE': '"~"',
    'STAR': '"*"',
    'SLASH': '"/"',
    'PERCENT': '"%"',
    'LEFT_PAREN': '"("',
    'RIGHT_PAREN': '")"'
}
replaced = re.sub(regex_string(replacements, True), '{}', content)
final_result = replaced.format(*[replacements.get(i, i) for i in re.findall(regex_string(replacements, True), content)])

输出(case 1):

variable_identifier :
IDENTIFIER
primary_expression :
   foo1
   foo2
   foo3
   "(" expression ")"
unary_operator :
   "+"
   "-"
   "!"
   "~"
multiplicative_expression :
   unary_expression
   multiplicative_expression "*" unary_expression
   multiplicative_expression "/" unary_expression
   multiplicative_expression "%" unary_expression  

输出(case 2):

fi identifier IDENTIFIER oof rab BARycentric
oof_op

或者,甚至更短:

replaced = re.sub(regex_string(replacements, True), lambda x:replacements.get(x.group(), x.group()), content)

相关问题 更多 >