234树 Python
我正在尝试在Python中构建一个2-3-4树。目前为止,插入操作在高度为3的节点上似乎是正常的。但是一旦超过这个高度,数据就好像被丢掉了,而不是被插入到树里。我不太明白为什么会这样,已经仔细检查了我的代码好几遍。我在插入代码附近注释了我参考的插入算法。提前感谢大家对我问题的任何见解。
import re
class Node:
def __init__(self, newInfo = None, p = None, q = None,
r = None, s = None, parent = None):
#initilize parent / child links
self.children = [p, q, r, s]
self.parent = parent
#initialize data
self.info = [ newInfo, None, None ]
class TwoThreeFourTree:
def __init__(self):
self.root = Node()
# Built in sort sinks None types to front, which
# is opposite of how info was structured to work,
# Using this sort will push None values to back.
def sortNode(self, curr):
c = []
for i in curr.info:
if i is not None:
c.append(i)
c.sort()
for i in range(3-len(c)):
c.append(None)
return c
# Built in len counts None, this one doesn't.
def length(self, node):
i = 0
for info in node:
if info is not None:
i = i + 1
return i
def isLeaf(self, node):
for child in node.children:
if child is not None:
return False
return True
def isOrphan(self, node):
if node.parent is None:
return True
return False
def lookup(self, userStr, reg):
curr = self.root
while curr is not None:
#Two Node
if self.length(curr.info) is 1:
if re.match(reg, str(curr.info[0])) is not None:
return curr.info[0]
else:
if userStr < curr.info[0]:
curr = curr.children[0]
else:
curr = curr.children[1]
#Three Node
elif self.length(curr.info) is 2:
for item in curr.info:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
else:
curr = curr.children[2]
#Four Node
elif self.length(curr.info) is 3:
for item in curr.info:
if item is not None:
if re.match(reg, str(item)) is not None:
return item
if userStr < curr.info[0]:
curr = curr.children[0]
elif userStr < curr.info[1]:
curr = curr.children[1]
elif userStr < curr.info[2]:
curr = curr.children[2]
else:
curr = curr.children[3]
def inorder(self, node, retlst = None):
if retlst is None:
retlst = []
if node.children[0]:
retlst = self.inorder(node.children[0], retlst)
retlst += [node.info[0]]
if node.children[1]:
retlst = self.inorder(node.children[1], retlst)
retlst += [node.info[1]]
if node.children[2]:
retlst = self.inorder(node.children[2], retlst)
retlst += [node.info[2]]
if node.children[3]:
retlst = self.inorder(node.children[3], retst)
return retlst
## Using Algorithm from: http://www.clear.rice.edu/comp212/01-fall/lectures/33/
def insert(self, info, node):
curr = node
if self.length(curr.info) == 0: # curr is empty
curr.info[0] = info
return True
elif self.length(curr.info) == 1: # curr is two node.
if self.isLeaf(curr):
curr.info[1] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
else:
self.insert(info, curr.children[1])
elif self.length(curr.info) == 2: # curr is 3 node
if self.isLeaf(curr):
curr.info[2] = info
curr.info = self.sortNode(curr)
return True
else:
if info < curr.info[0]:
self.insert(info, curr.children[0])
elif info < curr.info[1]:
self.insert(info, curr.children[1])
elif info > curr.info[2]:
self.insert(info, curr.children[2])
elif self.length(curr.info) == 3: # curr is 4 node
if self.isOrphan(curr): # curr has no parent
curr = Node(curr.info[1],
Node(curr.info[0], curr.children[0], curr.children[1]),
Node(curr.info[2], curr.children[2], curr.children[3]))
for child in curr.children:
if child is not None:
child.parent = curr
self.root = curr
self.insert(info, self.root)
else: #curr has a parent
if self.length(curr.parent.info) == 1: # Parent is Two Node:
if curr.parent.children[0] == curr:# cur is lst.
#If P = [curr, M, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M, p-rst].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is rst.
#If P = [p-lst, M, curr], then P becomes [p-lst, M, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[1] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif self.length(curr.parent.info) == 2: # Parent is Three Node:
if curr.parent.children[0] == curr: # curr is lst
#If P = [curr, M1, p-mst, M2, p-rst], then P becomes [[lst, X, mlst], Y, [mrst, Z, rst], M1, p-mst, M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = curr.parent.children[1]
curr.parent.children[1] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[0] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[1] == curr: # curr is mst
#If P = [p-lst, M1, curr, M2, p-rst], then P becomes [p-lst, M1,[lst, X, mlst], Y, [mrst, Z, rst], M2, p-rst].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = curr.parent.children[2]
curr.parent.children[2] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[1] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
elif curr.parent.children[2] == curr: # curr is rst
#If P = [p-lst, M1, p-mst, M2, curr], then P becomes [p-lst, M1, p-mst, M2, [lst, X, mlst], Y, [mrst, Z, rst]].
curr.parent.info[2] = curr.info[1]
curr.parent.info = self.sortNode(curr.parent)
curr.parent.children[3] = Node(curr.info[2], curr.children[2], curr.children[3], None, None, curr.parent)
curr.parent.children[2] = Node(curr.info[0], curr.children[0], curr.children[1], None, None, curr.parent)
self.insert(info, self.root)
1 个回答
2
作为部分解答,你可以先减少代码中的重复部分。比如,你不需要为三种不同的节点类型特别处理 lookup()
函数。你可以写成这样:
def lookup(self, s, r, node=None):
if not node: node = self.root
for item in node.info:
if re.match(r, str(item)):
return item
for (i, item) in enumerate(curr.info):
if s < item:
return self.lookup(s, r, curr.children[i])
return self.lookup(s, r, curr.children[-1]) # Assume len(children) == len(info) + 1
另外,遍历操作应该用生成器来实现,同样地,使用循环来替代重复的代码:
def inorder(self, node):
for (i, item) in enumerate(node.info):
if node.children[i]: self.inorder(node.children[i])
yield item
if node.children[-1]: self.inorder(node.children[-1])
这样整理代码,首先会让别人更容易找到问题。其次,你可能会发现问题在这个过程中就消失了。