Python递归和/或动态编程

2024-04-16 23:43:08 发布

您现在位置:Python中文网/ 问答频道 /正文

我偶然发现了一个超出我技能水平的问题

找到任何提供的化学式的唯一原子及其计数,如K4(ON(SO3)2)2。-我的要求就在这里

我发现这个问题可能需要递归和/或动态编程(或至少有用)

以下是我取得的成绩:

class Solution(object):
    def __init__(self):
        self.code = []
        self.atoms = {}

    def encode_string(self,string):
        """
        Return a list of elements where 
        @'a' = alphabetical 
        @'d' = digit
        @int = parentheses, wherre number reflects nesting.

        'k4(on(so3)2)2' --> ['a', 'd', 1, 'a', 'a', 2, 'a', 'a', 'd', 2, 'd', 1, 'd']
        """

        self.string = string

        for char in string:
            if char.isalpha():
                self.code.append('a')
            elif char.isdigit():
                self.code.append('d')
            elif char == '(':
                self.code.append('l') #left parenthesis 
            else:
                self.code.append('r') #right parenthesis 

        self.pars = [elem for elem in self.code if elem in ['r','l']]

        self.par_structure = []
        count = 1
        for idx, elem in enumerate(self.pars):
            if elem == 'l':
                self.par_structure.append(count)
                count += 1
            elif elem == 'r':
                count -= 1
                self.par_structure.append(count)

        count = 0        
        for idx, char in enumerate(self.code):
            if char in ['l','r']:
                self.code[idx] = self.par_structure[count]
                count += 1        


    def id_atoms(self):
        self.indices = [idx for idx,elem in enumerate(self.code) if elem == 0]
        for idx in self.indices:
            atom = self.string[idx]
            self.atoms[atom] = 0

    def parse_code():
        pass

我列出了encode_string方法的用例,该方法通过字母、数字和括号的深度级别来识别字母、数字和括号。我认为这是朝着解决问题的方向取得的进展。下一步是将括号内的字符乘以找到的数字值

任何建议都将不胜感激


Tags: inselfforstringifdefcountcode
3条回答

下面是我将如何着手解决上述问题。我假设原子以大写字母('H')开头,后面可能是小写字母('Mg'):

import re
from collections import defaultdict

def tokenize(string):  # "Mg(OH)2" -> ['Mg', '(', 'O', 'H', ')', '2']
    tokens = re.split(r"([A-Z0-9][a-z]?)", string)
    return [token for token in tokens if token]

def process_multiple(count, entity, dictionary):
    if isinstance(entity, dict):
        for atom, quantity in entity.items():
            dictionary[atom] += quantity * count
    else:
        dictionary[entity] += count

def count_atoms(string):

    def count_atoms_recursive(tokens):
        dictionary = defaultdict(int)
        previous_entity = None

        while tokens:
            token = tokens.pop(0)

            if token == ')':
                return dictionary

            if token.isdigit():
                process_multiple(int(token) - 1, previous_entity, dictionary)
            elif token == '(':
                previous_entity = count_atoms_recursive(tokens)
                process_multiple(1, previous_entity, dictionary)
            else:
                dictionary[token] += 1
                previous_entity = token

        return dictionary

    return dict(count_atoms_recursive(tokenize(string)))

if __name__ == "__main__":

    tests = ["H2O", "Mg(OH)2", "K4(ON(SO3)2)2"]

    for test in tests:
        print(test, "->", count_atoms(test))

对于格式错误的字符串,代码并不健壮,我将此作为练习留给读者。我希望您能在代码中找到一些有用的想法

输出

> python3 test.py
H2O -> {'H': 2, 'O': 1}
Mg(OH)2 -> {'Mg': 1, 'O': 2, 'H': 2}
K4(ON(SO3)2)2 -> {'K': 4, 'O': 14, 'N': 2, 'S': 4}
>

另一次尝试:

import re
from collections import Counter

# valid atoms definition:
atoms = r'(?:K|O|N|S|H|Mg)'

def get_count(expr):
    def flatten(lst):
        for v in lst:
            if isinstance(v, list):
                yield from flatten(v)
            else:
                yield v

    def parse_expression(i):
        stack = []
        for t in i:
            if re.match(atoms, t.group()):
                stack.append(t.group())
            elif re.match(r'\d+', t.group()):
                val = stack.pop()
                stack.append([val] * int(t.group()))
            elif t.group() == '(':
                stack.append( parse_expression(i) )
            else:
                break
        return stack

    l = parse_expression(re.finditer('(' + atoms + '|\d+|[()])', expr))
    return Counter(flatten(l))

tests = ["H2O", "Mg(OH)2", "K4(ON(SO3)2)2"]

for t in tests:
    print('{:<20} {!s}'.format(t, get_count(t)))

印刷品:

H2O                  Counter({'H': 2, 'O': 1})
Mg(OH)2              Counter({'O': 2, 'H': 2, 'Mg': 1})
K4(ON(SO3)2)2        Counter({'O': 14, 'K': 4, 'S': 4, 'N': 2})

这个问题来自726. Number of Atoms

基于aJava Solution on Github to Python平移的算法

class Solution:
  def __init__(self, formula):
    self.formula = formula

  def count_of_atoms(self):
    " Main routine to count the atoms "
    if len(self.formula) <= 1:
      return self.formula

    self.index = 0
    mp = self.recursion()  # use recursive function to count

    # create result for display
    return dict(sorted(mp.items(), key = lambda kv: kv[0]))

  def recursion(self):
    " Recursive counts the atoms (uses recursion to handle parens) "

    # Placing formula count in dictionary
    rst = {}

    while self.index < len(self.formula):
      if self.formula[self.index] == "(":
        # Start paren group (recursive call)
        self.index += 1
        sub_rst = self.recursion()  # dictionary for paren group
        count = self.get_count()    # count of paren group

        # Merge paren group dictionary (sub_rst) with current (rst)
        for k, v in sub_rst.items():
          if not k in rst:
            rst[k] = 0
          rst[k] += v*count
      elif self.formula[self.index] == ")":
        # Ending paren group
        self.index += 1
        return rst

      else:
        name = self.get_name()
        if not name in rst:
          rst[name] = 0
        rst[name] += self.get_count()

    return rst

  def get_name(self):
    " name of atom "
    i, j = self.index, self.index + 1
    while j < len(self.formula) and self.formula[j].islower():
      j += 1

    self.index = j
    return self.formula[i:j]

  def get_count(self):
    " Count of atoms or () group "
    i = j = self.index
    while j < len(self.formula) and self.formula[j].isdigit():
      j += 1

    self.index = j
    return 1 if i == j else int(self.formula[i:j])

测试

使用来自726. Number of Atoms的测试用例

tests = ["H2O", "Mg(OH)2", "K4(ON(SO3)2)2"]

for t in tests:
  s = Solution(t)
  print(f"Formula {t} has atom count {s.count_of_atoms()}")

输出

Formula H2O has atom count {'H': 2, 'O': 1}
Formula Mg(OH)2 has atom count {'H': 2, 'Mg': 1, 'O': 2}
Formula K4(ON(SO3)2)2 has atom count {'K': 4, 'N': 2, 'O': 14, 'S': 4}

相关问题 更多 >