148 lines
5 KiB
Python
148 lines
5 KiB
Python
|
import logging
|
||
|
|
||
|
class PatriciaTrie:
|
||
|
def __init__(self):
|
||
|
self.root = Node()
|
||
|
|
||
|
def find(self, prefix, node=None, collector=""):
|
||
|
if not node: return self.find(prefix, self.root)
|
||
|
|
||
|
logging.debug(f"Looking for prefix {prefix} at {node.elem}")
|
||
|
if not prefix:
|
||
|
res = []
|
||
|
if node.leaf:
|
||
|
logging.debug(f"Found leaf {node.elem}")
|
||
|
res.append(collector)
|
||
|
for child in node.children:
|
||
|
logging.debug(f"Looking for leafs in {node.elem}")
|
||
|
res.extend(self._find(prefix, child, collector+child.elem))
|
||
|
logging.debug(f"Result for {node.elem}: {res}")
|
||
|
return res
|
||
|
|
||
|
for child in node.children:
|
||
|
if prefix.startswith(child.elem):
|
||
|
return self._find(prefix[len(child.elem):], child, collector+child.elem)
|
||
|
|
||
|
return []
|
||
|
|
||
|
def add(self, elem):
|
||
|
(node, split_idx, elem_rest) = self.find_longest_match(elem, self.root)
|
||
|
|
||
|
def new_child():
|
||
|
return Node(elem=elem_rest, parent=node, leaf=True, children=[])
|
||
|
|
||
|
def split_node(leaf):
|
||
|
(oelem, ochild, oleaf) = (node.elem, node.children, node.leaf)
|
||
|
node.leaf = leaf
|
||
|
node.elem = oelem[:split_idx]
|
||
|
node.children = []
|
||
|
|
||
|
node.children.append(Node(elem=oelem[split_idx:], parent=node, leaf=oleaf, children=ochild))
|
||
|
|
||
|
# elem already found in trie
|
||
|
# just make sure node is marked as leaf
|
||
|
if not split_idx and not elem_rest:
|
||
|
node.leaf = True
|
||
|
return
|
||
|
|
||
|
# - elem not in trie
|
||
|
# - parent node exhausted
|
||
|
# This can happen if parent is root, or elem is larger than
|
||
|
# largest matching elem in trie so far
|
||
|
if not split_idx:
|
||
|
node.children.append(Node(elem=elem_rest, parent=node, leaf=True, children=[]))
|
||
|
return
|
||
|
|
||
|
# - elem already found in trie
|
||
|
# - elem ends in the middle of a node
|
||
|
# This can happen if an existing node up to index and its
|
||
|
# parents make up the entire elem. We need to split
|
||
|
# the node at split_idx and mark it as leaf.
|
||
|
if not elem_rest:
|
||
|
old_elem = node.elem
|
||
|
old_children = node.children
|
||
|
old_leaf = node.leaf
|
||
|
|
||
|
node.leaf = True
|
||
|
node.elem = old_elem[:split_idx]
|
||
|
node.children = []
|
||
|
|
||
|
split_node = Node(elem=old_elem[split_idx:], parent=node, leaf=old_leaf, children=old_children)
|
||
|
|
||
|
node.children.append(split_node)
|
||
|
return
|
||
|
|
||
|
|
||
|
# - elem not found in trie
|
||
|
# - node up to split_idx and its parent make up elem
|
||
|
# Node needs to be split at split_idx (preserving leaf status for split off old node) and
|
||
|
# a new child is added for elem
|
||
|
old_children = node.children
|
||
|
old_leaf = node.leaf
|
||
|
|
||
|
node.leaf = False
|
||
|
node.elem = old_elem[:split_idx]
|
||
|
node.children = []
|
||
|
|
||
|
node_a = Node(elem=old_elem[split_idx:], parent=node, leaf=old_leaf, children=old_children)
|
||
|
node_b = Node(elem=elem_rest, parent=node, leaf=True, children=[])
|
||
|
node.children.append(node_a)
|
||
|
node.children.append(node_b)
|
||
|
|
||
|
def find_longest_match(self, elem, node):
|
||
|
for child in node.children:
|
||
|
if not child.elem or not elem: continue
|
||
|
|
||
|
# child does not match
|
||
|
if child.elem[0] is not elem[0]: continue
|
||
|
|
||
|
# child matches completely
|
||
|
if elem.startswith(child.elem):
|
||
|
# special case: the node already exists
|
||
|
if len(elem) == len(child.elem):
|
||
|
return (child, None, None)
|
||
|
# recourse down the trie
|
||
|
return self.find_longest_match(elem[len(child.elem):], child)
|
||
|
|
||
|
# elem matches completely, implies that elem is shorter
|
||
|
# than child.elem. Split child at len(elem)
|
||
|
if child.elem.startswith(elem):
|
||
|
return (child, len(elem), None)
|
||
|
|
||
|
# child does not match completely but at least first char matches
|
||
|
# find longest split index
|
||
|
for i in range(len(elem)):
|
||
|
if elem[i] == child.elem[i]: continue
|
||
|
else: return (child, i, elem[i:])
|
||
|
|
||
|
# No child(-prefix) matched, create another child
|
||
|
return (node, None, elem)
|
||
|
|
||
|
def to_dot(self):
|
||
|
print("graph {")
|
||
|
self._to_dot(self.root)
|
||
|
print("}")
|
||
|
|
||
|
def _to_dot(self, node):
|
||
|
for child in node.children:
|
||
|
if not node.elem: print(f'root -- "{child.elem}";')
|
||
|
else: print(f'"{node.elem}" -- "{child.elem}";')
|
||
|
|
||
|
if child.leaf:
|
||
|
print(f'"{child.elem}" [color=blue];')
|
||
|
|
||
|
self._to_dot(child)
|
||
|
|
||
|
class Node:
|
||
|
def __init__(self, elem=None, parent=None, children=[],
|
||
|
leaf=False, offset=0, title=None, info=None):
|
||
|
self.elem = elem
|
||
|
self.parent = parent
|
||
|
self.children = children
|
||
|
self.leaf = leaf
|
||
|
|
||
|
# payload
|
||
|
self.offset = offset
|
||
|
self.title = title if title else elem
|
||
|
self.info = info
|