Definition of Tree
up to this point
from typing import TypeVar
from rdflib import Graph, URIRef
= TypeVar("DataType")
DataType
class Tree:
"""A tree
Parameters
----------
data
The data contained in this tree
children
The subtrees of this tree
"""
def __init__(self, data: DataType, children: list['Tree']=[]):
self._data = data
self._children = children
self._validate()
def _validate(self) -> None:
try:
assert all(isinstance(c, Tree)
for c in self._children)
except AssertionError:
= 'all children must be trees'
msg raise TypeError(msg)
@property
def data(self) -> DataType:
return self._data
@property
def children(self) -> list['Tree']:
return self._children
def __str__(self):
if self._children:
return ' '.join(c.__str__() for c in self._children)
else:
return str(self._data)
def __repr__(self):
return self.to_string(0)
def to_string(self, depth: int) -> str:
= (depth - 1) * ' ' +\
s int(depth > 0) * '--' +\
self._data + '\n'
+= ''.join(c.to_string(depth+1)
s for c in self._children)
return s
def __contains__(self, data: DataType) -> bool:
# pre-order depth-first search
if self._data == data:
return True
else:
for child in self._children:
if data in child:
return True
return False
def __getitem__(self, idx: tuple[int]) -> 'Tree':
= (idx,) if isinstance(idx, int) else idx
idx
try:
assert all(isinstance(i, int) for i in idx)
assert all(i >= 0 for i in idx)
except AssertionError:
= 'index must be a positive int or tuple of positive ints'
errmsg raise IndexError(errmsg)
if not idx:
return self
elif len(idx) == 1:
return self._children[idx[0]]
else:
return self._children[idx[0]][idx[1:]]
= {}
RDF_TYPES = {'is': URIRef('is-a'),
RDF_EDGES 'parent': URIRef('is-the-parent-of'),
'child': URIRef('is-a-child-of'),
'sister': URIRef('is-a-sister-of')}
def to_rdf(self, graph=None, nodes={}, idx=tuple()) -> Graph:
= Graph() if graph is None else graph
graph
= '_'.join(str(i) for i in idx)
idxstr = URIRef(idxstr)
nodes[idx]
if self._data not in Tree.RDF_TYPES:
self._data] = URIRef(self._data)
Tree.RDF_TYPES[
= (nodes[idx],
typetriple 'is'],
Tree.RDF_EDGES[self.data])
Tree.RDF_TYPES[
graph.add(typetriple)
for i, child in enumerate(self._children):
= idx+(i,)
childidx
child.to_rdf(graph, nodes, childidx)
= (nodes[idx],
partriple 'parent'],
Tree.RDF_EDGES[
nodes[childidx])= (nodes[childidx],
chitriple 'child'],
Tree.RDF_EDGES[
nodes[idx])
graph.add(partriple)
graph.add(chitriple)
for i, child1 in enumerate(self._children):
for j, child2 in enumerate(self._children):
= idx+(i,)
child1idx = idx+(j,)
child2idx = (nodes[child1idx],
sistriple 'sister'],
Tree.RDF_EDGES[
nodes[child2idx])
graph.add(sistriple)
self._rdf_nodes = nodes
return graph
@property
def rdf(self) -> Graph:
if not hasattr(self, "_rdf"):
self._rdf = self.to_rdf()
return self._rdf
def find(self, query: str) -> list[tuple[int]]:
return [tuple([int(i)
for i in str(res[0]).split('_')])
for res in self.rdf.query(query)]