Definition of Tree up to this point
from typing import TypeVar
from rdflib import Graph, URIRef
DataType = TypeVar("DataType")
class Tree:
"""A tree
Parameters
----------
data
The data contained in this tree
children
The subtrees of this tree
"""
def __init__(self, data: DataType, children: list['Tree']=[]):
self._data = data
self._children = children
self._validate()
def _validate(self) -> None:
try:
assert all(isinstance(c, Tree)
for c in self._children)
except AssertionError:
msg = 'all children must be trees'
raise TypeError(msg)
@property
def data(self) -> DataType:
return self._data
@property
def children(self) -> list['Tree']:
return self._children
def __str__(self):
if self._children:
return ' '.join(c.__str__() for c in self._children)
else:
return str(self._data)
def __repr__(self):
return self.to_string(0)
def to_string(self, depth: int) -> str:
s = (depth - 1) * ' ' +\
int(depth > 0) * '--' +\
self._data + '\n'
s += ''.join(c.to_string(depth+1)
for c in self._children)
return s
def __contains__(self, data: DataType) -> bool:
# pre-order depth-first search
if self._data == data:
return True
else:
for child in self._children:
if data in child:
return True
return False
def __getitem__(self, idx: tuple[int]) -> 'Tree':
idx = (idx,) if isinstance(idx, int) else idx
try:
assert all(isinstance(i, int) for i in idx)
assert all(i >= 0 for i in idx)
except AssertionError:
errmsg = 'index must be a positive int or tuple of positive ints'
raise IndexError(errmsg)
if not idx:
return self
elif len(idx) == 1:
return self._children[idx[0]]
else:
return self._children[idx[0]][idx[1:]]
RDF_TYPES = {}
RDF_EDGES = {'is': URIRef('is-a'),
'parent': URIRef('is-the-parent-of'),
'child': URIRef('is-a-child-of'),
'sister': URIRef('is-a-sister-of')}
def to_rdf(self, graph=None, nodes={}, idx=tuple()) -> Graph:
graph = Graph() if graph is None else graph
idxstr = '_'.join(str(i) for i in idx)
nodes[idx] = URIRef(idxstr)
if self._data not in Tree.RDF_TYPES:
Tree.RDF_TYPES[self._data] = URIRef(self._data)
typetriple = (nodes[idx],
Tree.RDF_EDGES['is'],
Tree.RDF_TYPES[self.data])
graph.add(typetriple)
for i, child in enumerate(self._children):
childidx = idx+(i,)
child.to_rdf(graph, nodes, childidx)
partriple = (nodes[idx],
Tree.RDF_EDGES['parent'],
nodes[childidx])
chitriple = (nodes[childidx],
Tree.RDF_EDGES['child'],
nodes[idx])
graph.add(partriple)
graph.add(chitriple)
for i, child1 in enumerate(self._children):
for j, child2 in enumerate(self._children):
child1idx = idx+(i,)
child2idx = idx+(j,)
sistriple = (nodes[child1idx],
Tree.RDF_EDGES['sister'],
nodes[child2idx])
graph.add(sistriple)
self._rdf_nodes = nodes
return graph
@property
def rdf(self) -> Graph:
if not hasattr(self, "_rdf"):
self._rdf = self.to_rdf()
return self._rdf
def find(self, query: str) -> list[tuple[int]]:
return [tuple([int(i)
for i in str(res[0]).split('_')])
for res in self.rdf.query(query)]