使用检索多次在for循环中提取python中的不同字段值

result=[] newregex = "[0-9\.\s]+(?:mg|kg|ml|q.s.|ui|M|g|µg)" percentregex = "(\d+(\.\d+)?%)" for s in zz: for e in extracteddata: v = re.search(newregex,e,flags=re.IGNORECASE|re.MULTILINE) xx = re.search(percentregex,e,flags=re.IGNORECASE|re.MULTILINE) if v: if e.upper().startswith(s.upper()): result.append([s,v.group(0), e]) else: if e.upper().startswith(s.upper()): result.append([s, e])

zz = ['HYDROCHLORIC ACID 2M', 'ROPIVACAINE HYDROCHLORIDE MONOHYDRATE', 'SODIUM CHLORIDE', 'SODIUM HYDROXIDE 2M', 'WATER FOR INJECTIONS'] extracteddata = ['Ropivacaine hydrochloride monohydrate for injection (corresponding to 2 mg Ropivacaine hydrochloride anhydrous) 2.12 mg Active ingredient Ph Eur ', 'Sodium chloride for injection 8.6 mg 28% Tonicity contributor Ph Eur ', 'Sodium hydroxide 2M q.s. pH-regulator Ph Eur, NF Hydrochloric acid 2M q.s. pH-regulator Ph Eur, NF ', 'Water for Injections to 1 ml 34% Solvent Ph Eur, USP The product is filled into polypropylene bags sealed with rubber stoppers and aluminium caps with flip-off seals. The primary container is enclosed in a blister. 1(1)']

result = [] mg = [] newregex = "[0-9\.\s]+(?:mg|kg|ml|q.s.|ui|M|g|µg)" percentregex = "(\d+(\.\d+)?%)" print(type(newregex)) for s in zz: for e in extracteddata: v = re.search(newregex,e,flags=re.IGNORECASE|re.MULTILINE) xx = re.search(percentregex,e,flags=re.IGNORECASE|re.MULTILINE) if v: # mg.append(v.group(0)) if e.upper().startswith(s.upper()): result.append([s,v.group(0), e]) elif v is None: if e.upper().startswith(s.upper()): result.append([s, e]) elif xx: if v: if e.upper().startswith(s.upper()): result.append([s,v.group(0),xx.group(0), e]) elif v is None: if xx: if e.upper().startswith(s.upper()): result.append([s,xx.group(0), e]) elif v is None and xx is None: if e.upper().startswith(s.upper()): result.append([s, e]) else: print("DOne")

1条回答

网友

1楼 · 发布于 2024-06-09 22:33:02

下面是我们在评论中讨论的Python演示：

每个请求的模式

>>> import re
>>> 
>>> extracteddata = ['"Water 5.5 ml for injections 0.80 and 100 at 2.2 % ','Injections 100 and 0.80', 'Ropivacaine hydrochloride monohydrate for injection (corresponding to 2 mg Ropivacaine hydrochloride anhydrous) 2.12 mg Active ingredient Ph Eur ', 'Sodium chloride for injection 8.6 mg 28% Tonicity contributor Ph Eur ', 'Sodium hydroxide 2M q.s. pH-regulator Ph Eur, NF Hydrochloric acid 2M q.s. pH-regulator Ph Eur, NF ', 'Water for Injections to 1 ml 34% Solvent Ph Eur, USP The product is filled into polypropylene bags sealed with rubber stoppers and aluminium caps with flip-off seals. The primary container is enclosed in a blister. 1(1)']
>>> 
>>> Rx = r"(?i)(?=.*?((?:\d+(?:\.\d*)?|\.\d+)\s*(?:mg|kg|ml|q\.s\.|ui|M|g|µg)))?(?=.*?(\d+(?:\.\d+)?\s*%))?(?=.*?((?:\d+(?:\.\d*)?|\.\d+))(?![\d.])(?!\s*(?:%|mg|kg|ml|q\.s\.|ui|M|g|µg)))?.+"
>>> 
>>> for e in extracteddata:
...         match = re.search( Rx, e )
...         print("                      ")
...         if match.group(1):
...                 print( "Unit num:  \t\t", match.group(1) )
...         if match.group(2):
...                 print( "Percentage num:  \t", match.group(2) )
...         if match.group(3):
...                 print( "Just a num:  \t\t", match.group(3) )
... 
                      
Unit num:                5.5 ml
Percentage num:          2.2 %
Just a num:              0.80
                      
Just a num:              100
                      
Unit num:                2 mg
                      
Unit num:                8.6 mg
Percentage num:          28%
                      
Unit num:                2M
                      
Unit num:                1 ml
Percentage num:          34%
Just a num:              1

这是正则表达式

 (?i)
 (?=
      .*? 
      (                             # (1 start)
           (?:
                \d+ 
                (?: \. \d* )?
             |  \. \d+ 
           )
           \s* 
           (?: mg | kg | ml | q \. s \. | ui | M | g | µg )
      )                             # (1 end)
 )?
 (?=
      .*? 
      (                             # (2 start)
           \d+ 
           (?: \. \d+ )?
           \s* %
      )                             # (2 end)
 )?
 (?=
      .*? 
      (                             # (3 start)
           (?:
                \d+ 
                (?: \. \d* )?
             |  \. \d+ 
           )
      )                             # (3 end)
      (?! [\d.] )
      (?!
           \s* 
           (?: % | mg | kg | ml | q \. s \. | ui | M | g | µg )
      )
 )?
 .+

如前所述，它使用三个前瞻断言来查找第一个实例
单位和百分比数字以及独立数字。
所有值都是唯一的，而不是重叠的。你知道吗

测试每一项是否为非空显示它是否在行中找到该项。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章