from copy import copy, deepcopy
from functools import lru_cache
class TransitionFunction:
"""A finite state machine transition function.
Parameters
----------
transition_graph : dict[tuple[str, str], set[str]]
Mapping from (state, symbol) pairs to sets of target states.
Attributes
----------
transition_graph : dict[tuple[str, str], set[str]]
The underlying transition graph.
"""
def __init__(self, transition_graph: dict[tuple[str, str], set[str]]):
self._transition_graph = transition_graph
def __call__(self, state: str, symbol: str) -> set[str]:
"""Return the set of states reachable from state on symbol.
Parameters
----------
state : str
The source state.
symbol : str
The input symbol.
Returns
-------
set[str]
The set of target states, or an empty set if undefined.
"""
try:
return self._transition_graph[(state, symbol)]
except KeyError:
return set()
def __or__(self, other: 'TransitionFunction') -> 'TransitionFunction':
"""Merge two transition functions.
Parameters
----------
other : TransitionFunction
The transition function to merge with.
Returns
-------
TransitionFunction
A new transition function combining both graphs.
"""
return TransitionFunction(dict(self._transition_graph,
**other._transition_graph))
def add_transitions(self, transition_graph: dict[tuple[str, str], set[str]]) -> None:
"""Add new transitions to the graph.
Parameters
----------
transition_graph : dict[tuple[str, str], set[str]]
Transitions to add.
"""
self._transition_graph.update(transition_graph)
def validate(self, alphabet: set[str], states: set[str]) -> None:
"""Validate the transition function against an alphabet and state set.
Parameters
----------
alphabet : set[str]
The valid alphabet symbols.
states : set[str]
The valid states.
"""
self._validate_input_values(alphabet, states)
self._validate_output_types()
self._homogenize_output_types()
self._validate_output_values(states)
def _validate_input_values(self, alphabet, states):
"""Check that all input symbols and states are valid."""
for state, symbol in self._transition_graph.keys():
if symbol not in alphabet:
msg = ('all input symbols in transition function '
'must be in alphabet')
raise ValueError(msg)
if state not in states:
msg = ('all input states in transition function '
'must be in set of states')
raise ValueError(msg)
def _validate_output_types(self):
"""Check that all output values are str or set."""
for states in self._transition_graph.values():
t = type(states)
if t is not str and t is not set:
msg = ('all outputs in transition function '
'must be specified via str or set')
raise ValueError(msg)
def _homogenize_output_types(self):
"""Wrap any bare str outputs as singleton sets."""
for inp, out in self._transition_graph.items():
if type(out) is str:
self._transition_graph[inp] = {out}
def _validate_output_values(self, states):
"""Check that all output states are in the state set."""
for out in self._transition_graph.values():
if not all(state in states for state in out):
msg = ('all output symbols in transition function '
'must be in states')
raise ValueError(msg)
@property
def transition_graph(self) -> dict[tuple[str, str], set[str]]:
"""The underlying transition graph."""
return self._transition_graph
class FiniteStateAutomaton:
"""A finite state automaton.
Parameters
----------
alphabet : set[str]
The input alphabet.
states : set[str]
The set of states.
initial_state : str
The start state.
final_states : set[str]
The set of accepting states.
transition_graph : TransitionFunction
The transition function mapping (state, symbol) to states.
"""
def __init__(self, alphabet: set[str], states: set[str],
initial_state: str, final_states: set[str],
transition_graph: 'TransitionFunction'):
self._alphabet = {''} | alphabet
self._states = states
self._initial_state = initial_state
self._final_states = final_states
self._transition_function = TransitionFunction(transition_graph)
self._validate_initial_state()
self._validate_final_states()
self._transition_function.validate(self._alphabet, states)
self._generator = self._build_generator()
def __iter__(self):
return self
def __next__(self):
return next(self._generator)
def _build_generator(self):
"""Breadth-first enumerate strings accepted by the automaton."""
string_buffer = [(self._initial_state, '')]
if self._initial_state in self._final_states:
stack = ['']
else:
stack = []
while string_buffer:
if stack:
yield stack.pop()
else:
# Inefficient with total transition functions that
# have many transitions to the sink state; could be
# optimized but we don't know the sink state in general
new_buffer = []
for symb in self._alphabet:
for old_state, string in string_buffer:
new_states = self._transition_function(old_state, symb)
for st in new_states:
new_elem = (st, string+symb)
new_buffer.append(new_elem)
stack += [string
for state, string in new_buffer
if state in self._final_states]
string_buffer = new_bufferTwo equivalent definitions
Let’s now formalize the definition of a finite state automaton (FSA). We’re going to consider two possibilities–Deterministic Finite State Automaton (DFA) and Nondeterministic Finite State Automaton (NFA). I’m going to show you that they are at least weakly equivalent. That is, the class of languages generated by the DFAs is exactly equivalent to the one generated by the NFAs.
Deterministic Finite State Automaton (DFA)
A Deterministic Finite State Automaton (DFA) is a grammar with 5 components:
- A set of states \(Q\)
- An alphabet \(\Sigma\)
- A transition function \(\delta : Q \times \Sigma \rightarrow Q\)
- An initial state \(q_0 \in Q\)
- A set of final states \(F \subseteq Q\)
Nondeterministic Finite State Automaton (NFA)
A Nondeterministic Finite State Automaton (NFA) is also a grammar with 5 components:
- A set of states \(Q\)
- An alphabet \(\Sigma\)
- A transition function \(\delta : Q \times (\Sigma\,\color{red}{\cup \{\epsilon\}}) \rightarrow \color{red}{\mathcal{P}(Q)}\)
- An initial state \(q_0 \in Q\)
- A set of final states \(F \subseteq Q\)
What makes NFAs nondeterministic is that the transition function \(\delta\) can map a given state \(q\) and symbol \(\sigma\) to multiple states. That is, there are potentially multiple states to choose from when transitioning from \(q\) on \(\sigma\).
We often define (and draw) FSAs s.t. their transition functions are partial. We can assume all FSA transition functions \(\delta\) are in fact total by adding a sink state \(q_\text{sink} \not\in F\) to the FSA and mapping all \(\langle q, \sigma \rangle\) pairs for which \(\delta\) is undefined to \(q_\text{sink}\).
class TransitionFunction(TransitionFunction):
"""A finite state machine transition function with totalization.
Extends the base transition function with methods to check
totality and to totalize via a sink state.
Parameters
----------
transition_graph : dict[tuple[str, str], set[str]]
Mapping from (state, symbol) pairs to sets of target states.
"""
def istotalfunction(self, states: set[str], alphabet: set[str]) -> bool:
"""Check whether the transition function is total.
Parameters
----------
states : set[str]
The set of states.
alphabet : set[str]
The input alphabet.
Returns
-------
bool
True if every (state, symbol) pair is defined.
"""
return all((s, a) in self._transition_graph
for s in states
for a in alphabet)
def totalize(self, states: set[str], alphabet: set[str]) -> None:
"""Make the transition function total by adding a sink state.
Parameters
----------
states : set[str]
The set of states.
alphabet : set[str]
The input alphabet.
"""
if not self.istotalfunction(states, alphabet):
domain = {(s, a) for s in states for a in alphabet}
sink_state = 'qsink'
while sink_state in states:
sink_state += 'sink'
for inp in domain:
if inp not in self._transition_graph:
self._transition_graph[inp] = {sink_state}As I mentioned, it turns out that these two ways of defining FSAs are at least weakly equivalent. That is, the class of languages generated by NFAs is the same as the class generated by DFAs; therefore, both generate exactly the regular languages.
DFAs are equivalent to NFAs
Every DFA can be converted to a weakly equivalent NFA. A DFA \(G = \langle Q, \Sigma, \delta, q_0, F \rangle\) can be converted to an NFA \(G' = \langle Q', \Sigma, \delta', q'_0, F' \rangle\) by defining the transition function of \(G'\) as \(\delta'(q,\sigma) \equiv \{\delta(q,\sigma)\}\). Basically, the new NFA simulates the DFA by constraining itself to just be that DFA: it gives itself no choice of which states to move to.
class TransitionFunction(TransitionFunction):
"""A finite state machine transition function with determinism check.
Extends the transition function with a property to test whether
the automaton is deterministic.
Parameters
----------
transition_graph : dict[tuple[str, str], set[str]]
Mapping from (state, symbol) pairs to sets of target states.
"""
@property
def isdeterministic(self) -> bool:
"""Whether the transition function is deterministic.
Returns
-------
bool
True if no epsilon transitions exist and every input maps
to at most one state.
"""
has_epsilon = any(symb == '' for _, symb in self._transition_graph.keys())
all_singleton = all(len(v) < 2 for v in self._transition_graph.values())
return all_singleton and not has_epsilon
class FiniteStateAutomaton(FiniteStateAutomaton):
"""A finite state automaton with determinism check.
Parameters
----------
alphabet : set[str]
The input alphabet.
states : set[str]
The set of states.
initial_state : str
The start state.
final_states : set[str]
The set of accepting states.
transition_graph : TransitionFunction
The transition function.
"""
@property
def isdeterministic(self) -> bool:
"""Whether the automaton is deterministic.
Returns
-------
bool
True if the underlying transition function is deterministic.
"""
return self._transition_function.isdeterministicNFAs are equivalent to DFAs
Every NFA can be converted to a weakly equivalent DFA.
The \(\epsilon\)-closure of \(\delta\) is \[\delta_\epsilon(q, \sigma) = \begin{cases} \emptyset & \text{if } \delta(q, \sigma) \text{ is undefined}\\ \delta(q, \sigma) \cup \bigcup_{r \in \delta(q, \sigma)} \delta_\epsilon(r, \epsilon) & \text{otherwise} \\ \end{cases}\]
An NFA \(G = \langle Q, \Sigma, \delta, q_0, F \rangle\) can be converted to a DFA \(G' = \langle Q', \Sigma, \delta', q'_0, F' \rangle\) by defining:
- \(Q' = \mathcal{P}(Q)\)
- \(\delta'(R, \sigma) = \bigcup_{r\in R}\delta_\epsilon(r, \sigma)\)
- \(q'_0 = \{q_0\} \cup \delta_\epsilon(q_0, \epsilon)\)
- \(F' = \{R\;|\; \exists r \in R: r \in F\}\)
Basically, the new DFA simulates the NFA by tracking the set of states that the NFA could be in after producing or reading a given string. This is possible because our notion of state for both DFAs and NFAs is extremely general. We sometimes think of them as atomic things–like characters or numbers–but they can be arbitrarily complex–e.g. sets, strings, even trees–as long as the set of states is itself finite.
from functools import reduce
from itertools import chain, combinations
def powerset(iterable):
"""Generate all subsets of an iterable.
From the itertools recipes:
https://docs.python.org/3/library/itertools.html#recipes
Parameters
----------
iterable
The collection to compute subsets of.
Returns
-------
itertools.chain
All subsets, from empty to full.
"""
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def transitive_closure(edges: set[tuple[str, str]]) -> set[tuple[str, str]]:
"""Compute the transitive closure of a directed graph.
Parameters
----------
edges : set[tuple[str, str]]
The edge set of the graph.
Returns
-------
set[tuple[str, str]]
The transitive closure of the edge set.
"""
while True:
new_edges = {(x, w)
for x, y in edges
for q, w in edges
if q == y}
all_edges = edges | new_edges
if all_edges == edges:
return edges
edges = all_edges
class TransitionFunction(TransitionFunction):
"""A finite state machine transition function with epsilon closure.
Extends the transition function with epsilon-closure computation
and NFA-to-DFA determinization.
Parameters
----------
transition_graph : dict[tuple[str, str], set[str]]
Mapping from (state, symbol) pairs to sets of target states.
"""
def validate(self, alphabet: set[str], states: set[str]) -> None:
"""Validate and compute the epsilon transitive closure.
Parameters
----------
alphabet : set[str]
The valid alphabet symbols.
states : set[str]
The valid states.
"""
self._validate_input_values(alphabet, states)
self._validate_output_types()
self._homogenize_output_types()
self._validate_output_values(states)
self._add_epsilon_transitive_closure()
def _add_epsilon_transitive_closure(self):
"""Expand the transition graph with all epsilon-reachable states."""
# Collect the state graph of all epsilon transitions
transitions = {(instate, outstate)
for (instate, insymb), outs in self._transition_graph.items()
for outstate in outs if not insymb}
# Compute the transitive closure of the epsilon transition
# state graph; requires homogenization beforehand
for instate, outstate in transitive_closure(transitions):
self._transition_graph[(instate, '')] |= {outstate}
new_graph = dict(self._transition_graph)
for (instate1, insymb1), outs1 in self._transition_graph.items():
for (instate2, insymb2), outs2 in self._transition_graph.items():
# Epsilon-after-symbol: if (s1, a) -> s2 and
# (s2, epsilon) -> s3, then add s3 to (s1, a)
if instate2 in outs1 and not insymb2:
try:
new_graph[(instate1, insymb1)] |= outs2
except KeyError:
new_graph[(instate1, insymb1)] = set(outs2)
# Symbol-after-epsilon: if (s1, epsilon) -> s2 and
# (s2, a) -> s3, then add s3 to (s1, a)
if instate2 in outs1 and not insymb1 and insymb2:
try:
new_graph[(instate1, insymb2)] |= outs2
except KeyError:
new_graph[(instate1, insymb2)] = set(outs2)
self._transition_graph = new_graph
def determinize(self, initial_state: str) -> tuple[set[str], 'TransitionFunction']:
"""Prepare the transition function for subset construction.
Computes the new initial state (with epsilon closure) and
strips epsilon transitions from the graph.
Parameters
----------
initial_state : str
The NFA's initial state.
Returns
-------
tuple[set[str], TransitionFunction]
The new initial state set and epsilon-free transition function.
"""
new_initial_state = {initial_state} | self(initial_state, "")
new_transition_graph = {(instate, insymb): outstates
for (instate, insymb), outstates
in self._transition_graph.items()
if insymb}
return new_initial_state, TransitionFunction(new_transition_graph)
class FiniteStateAutomaton(FiniteStateAutomaton):
"""A finite state automaton with subset-construction determinization.
Parameters
----------
alphabet : set[str]
The input alphabet.
states : set[str]
The set of states.
initial_state : str
The start state.
final_states : set[str]
The set of accepting states.
transition_graph : TransitionFunction
The transition function.
"""
def determinize(self) -> 'FiniteStateAutomaton':
"""Determinize the FSA using the standard subset construction.
Returns
-------
FiniteStateAutomaton
A weakly equivalent deterministic FSA.
"""
new_init, new_trans = self._transition_function.determinize(self._initial_state)
alphabet = self._alphabet - {''}
# Each DFA state is a frozenset of NFA states; explore
# reachable states breadth-first from the initial state
new_init_frozen = frozenset(new_init)
worklist = [new_init_frozen]
visited = set()
new_transition = {}
while worklist:
current = worklist.pop()
if current in visited:
continue
visited.add(current)
for a in alphabet:
# Epsilon closure is already baked into new_trans
target = set()
for q in current:
target |= new_trans(q, a)
if target:
target_frozen = frozenset(target)
src_label = '_'.join(sorted(current))
tgt_label = '_'.join(sorted(target_frozen))
new_transition[(src_label, a)] = {tgt_label}
if target_frozen not in visited:
worklist.append(target_frozen)
new_states = {'_'.join(sorted(s)) for s in visited}
new_final = {'_'.join(sorted(s)) for s in visited
if any(t in s for t in self._final_states)}
return FiniteStateAutomaton(alphabet, new_states,
'_'.join(sorted(new_init_frozen)),
new_final, new_transition)
@property
def isdeterministic(self) -> bool:
"""Whether the automaton is deterministic.
Returns
-------
bool
True if the underlying transition function is deterministic.
"""
return self._transition_function.isdeterministic