from typing import TypeVar
= TypeVar("DataType")
DataType
class Tree:
"""A tree
Parameters
----------
data
The data contained in this tree
children
The subtrees of this tree
"""
def __init__(self, data: DataType, children: list['Tree']=[]):
self._data = data
self._children = children
self._validate()
def _validate(self) -> None:
try:
assert all(isinstance(c, Tree)
for c in self._children)
except AssertionError:
= 'all children must be trees'
msg raise TypeError(msg)
@property
def data(self) -> DataType:
return self._data
@property
def children(self) -> list['Tree']:
return self._children
def __str__(self):
if self._children:
return ' '.join(c.__str__() for c in self._children)
else:
return str(self._data)
def __repr__(self):
return self.to_string(0)
def to_string(self, depth: int) -> str:
= (depth - 1) * ' ' +\
s int(depth > 0) * '--' +\
self._data + '\n'
+= ''.join(c.to_string(depth+1)
s for c in self._children)
return s
def __contains__(self, data: DataType) -> bool:
# pre-order depth-first search
if self._data == data:
return True
else:
for child in self._children:
if data in child:
return True
return False
Containment Tests
Suppose I’m interested in finding all sentences with a definite determiner in a subject. This means checking whether a particular subtree (corresponding to the subject) contains a particular element.
Let’s figure out how to compute whether a tree contains a particular piece of data, and then we’ll get back to figuring out how to grab the relevant subtree.
Containment tests for a particular class (viewed as a sort of container) are often implemented in the __contains__
magic method, which defines the behavior of in
. The idea is that __contains__
should take a piece of data and tell us whether it matches a piece of data somewhere in the tree. So basically, we want the analogue of asking whether a list contains some element using in
.
For a list, containment could be naturally implemented using a for
-loop. So suppose we’re redefining the list
class, __contains__
could be implemented something like this:
def __contains__(self, data):
for d in self:
if d == data:
return True
return False
The way we’ll implement __contains__
for Tree
is analogous, but we have to take into account that the tree has a more interesting structure to it. We’ll do this by implementing two kinds of tree search algorithms:
- depth-first search
- breath-first search
In both kinds of search, we start at the top of the tree and work our way down, the question is which nodes we look at.
Depth-first search
We’ll look at two ways we can implement depth-first search: pre-order and post-order.
Pre-order depth-first search
To conduct pre-order depth-first search, we start from the left-most child subtree and, moving right, look at the data at the root of that subtree; then do pre-order depth-first search on that subtree.
This search path is visualized in the image below, where the line is our traversal path and the dots are wehn we look at a piece of data in a node.
= Tree('S',
tree 'NP',
[Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])]),
[Tree('VP',
Tree('V',
[Tree('loves')]),
[Tree('NP',
Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])])])]) [Tree(
"loves" in tree, "hates" in tree
(True, False)
%%timeit
"loves" in tree
581 ns ± 5.9 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
%%timeit
"hates" in tree
943 ns ± 10.4 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
Post-order depth-first search
In post-order depth-first search, we start from the left-most child subtree and moving right, do post-order depth-first search on that subtree, then look at the data at the root of that subtree.
class Tree(Tree):
def __contains__(self, data: DataType) -> bool:
# post-order depth-first search
if not self._children:
return self._data == data
else:
for c in self._children:
if data in c:
return True
return self._data == data
= Tree('S',
tree 'NP',
[Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])]),
[Tree('VP',
Tree('V',
[Tree('loves')]),
[Tree('NP',
Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])])])]) [Tree(
"loves" in tree, "hates" in tree
(True, False)
Breadth-first search
Rather than recursing on subtrees, bread-first search looks at the data in all nodes at depth \(i\) then to breadth-first search at depth \(i+1\).
One natural way to implement breadth-first search is using what’s called iteratively deepening depth-first search.
class Tree(Tree):
@property
def depth(self) -> int:
if self._children:
return 1 + max(c.depth for c in self._children)
else:
return 0
def __contains__(self, data: DataType) -> bool:
# breadth-first search
for d in range(self.depth):
if self._iddfs(data, d):
return True
return False
def _iddfs(self, data, depth):
# iterative deepening depth-first search
if depth == 0:
return self._data == data
elif depth > 0:
for c in self._children:
if c._iddfs(data, depth-1):
return True
return False
= Tree('S',
tree 'NP',
[Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])]),
[Tree('VP',
Tree('V',
[Tree('loves')]),
[Tree('NP',
Tree('D',
[Tree('a')]),
[Tree('N',
Tree('greyhound')])])])]) [Tree(
"loves" in tree, "hates" in tree
(True, False)