Python正则表达式错误字符范围

#Cleaning Text (RT, Punctuation etc) #Creating new dataframe and new features tw_list = pd.DataFrame(tweet_list) tw_list["text"] = tw_list[0] #Removing RT, Punctuation etc remove_rt = lambda x: re.sub('RT @\w+: '," ",x) rt = lambda x: re.sub("(@[A-Za-z0–9]+)|([°-9A-Za-z \t])|(\w+:\/\/\S+)"," ",x) tw_list["text"] = tw_list.text.map(remove_rt).map(rt) tw_list["text"] = tw_list.text.str.lower() tw_list.head(10)

<ipython-input-15-e640b99d08dd> in <module> 8 remove_rt = lambda x: re.sub('RT @\w+: '," ",x) 9 rt = lambda x: re.sub("(@[A-Za-z0–9]+)|([⁰-9A-Za-z \t])|(\w+:\/\/\S+)"," ",x) ---> 10 tw_list["text"] = tw_list.text.map(remove_rt).map(rt) 11 tw_list["text"] = tw_list.text.str.lower() 12 tw_list.head(10) c:\program files\python39\lib\site-packages\pandas\core\series.py in map(self, arg, na_action) 3907 dtype: object 3908 """ -> 3909 new_values = super()._map_values(arg, na_action=na_action) 3910 return self.constructor(new_values, index=self.index).finalize_( 3911 self, method="map" c:\program files\python39\lib\site-packages\pandas\core\base.py in _map_values(self, mapper, na_action) 935 936 # mapper is a function --> 937 new_values = map_f(values, mapper) 938 939 return new_values pandas\_libs\lib.pyx in pandas._libs.lib.map_infer() <ipython-input-15-e640b99d08dd> in <lambda>(x) 7 #Removing RT, Punctuation etc 8 remove_rt = lambda x: re.sub('RT @\w+: '," ",x) ----> 9 rt = lambda x: re.sub("(@[A-Za-z0–9]+)|([⁰-9A-Za-z \t])|(\w+:\/\/\S+)"," ",x) 10 tw_list["text"] = tw_list.text.map(remove_rt).map(rt) 11 tw_list["text"] = tw_list.text.str.lower() c:\program files\python39\lib\re.py in sub(pattern, repl, string, count, flags) 208 a callable, it's passed the Match object and must return 209 a replacement string to be used.""" --> 210 return _compile(pattern, flags).sub(repl, string, count) 211 212 def subn(pattern, repl, string, count=0, flags=0): c:\program files\python39\lib\re.py in _compile(pattern, flags) 302 if not sre_compile.isstring(pattern): 303 raise TypeError("first argument must be string or compiled pattern") --> 304 p = sre_compile.compile(pattern, flags) 305 if not (flags & DEBUG): 306 if len(_cache) >= _MAXCACHE: c:\program files\python39\lib\sre_compile.py in compile(p, flags) 762 if isstring(p): 763 pattern = p --> 764 p = sre_parse.parse(p, flags) 765 else: 766 pattern = None c:\program files\python39\lib\sre_parse.py in parse(str, flags, state) 946 947 try: --> 948 p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0) 949 except Verbose: 950 # the VERBOSE flag was switched on inside the pattern. to be c:\program files\python39\lib\sre_parse.py in _parse_sub(source, state, verbose, nested) 441 start = source.tell() 442 while True: --> 443 itemsappend(_parse(source, state, verbose, nested + 1, 444 not nested and not items)) 445 if not sourcematch("|"): c:\program files\python39\lib\sre_parse.py in _parse(source, state, verbose, nested, first) 832 sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and 833 not (del_flags & SRE_FLAG_VERBOSE)) --> 834 p = _parse_sub(source, state, sub_verbose, nested + 1) 835 if not source.match(")"): 836 raise source.error("missing ), unterminated subpattern", c:\program files\python39\lib\sre_parse.py in _parse_sub(source, state, verbose, nested) 441 start = source.tell() 442 while True: --> 443 itemsappend(_parse(source, state, verbose, nested + 1, 444 not nested and not items)) 445 if not sourcematch("|"): c:\program files\python39\lib\sre_parse.py in _parse(source, state, verbose, nested, first) 596 if hi < lo: 597 msg = "bad character range %s-%s" % (this, that) --> 598 raise source.error(msg, len(this) + 1 + len(that)) 599 setappend((RANGE, (lo, hi))) 600 else:

1条回答

网友

1楼 · 发布于 2024-04-23 19:42:04

由于以下原因，您的正则表达式中存在一个问题：“…[°-9]” 如果需要此字符“°”和“-”，则需要将“-”反斜杠，以避免正则表达式尝试进行范围转换

如果是“0”而不是“°”，则只需替换“°”字符即可

您可以在此处看到您的问题： https://regex101.com/r/hhf27i/1

以下是修复方法： https://regex101.com/r/8d1VxP/1

相关问题更多 >

编程相关推荐

热门问题

热门文章