Some useful definitions

Joint probability

The joint probability \(\mathbb{P}(A, B)\) of two events \(A \in \mathcal{F}\) and \(B \in \mathcal{F}\) is defined as the probability of the intersection of those two events \(\mathbb{P}(A, B) = \mathbb{P}(A \cap B)\), which must be defined give that \(\mathcal{F}\) is closed under countable intersection.

Define FiniteMeasurableSpace
from collections.abc import Iterable
from itertools import chain, combinations
from functools import reduce

SampleSpace = frozenset[str]
Event = frozenset[str]
SigmaAlgebra = frozenset[Event]

def powerset(iterable: Iterable) -> Iterable:
    """Compute the power set of an iterable.

    See https://docs.python.org/3/library/itertools.html#itertools-recipes

    Parameters
    ----------
    iterable : Iterable
        The set to take the power set of.

    Returns
    -------
    Iterable
        All subsets of the input as tuples.
    """
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

class FiniteMeasurableSpace:
  """A finite measurable space.

  Parameters
  ----------
  atoms : SampleSpace
      The atoms of the space.
  sigma_algebra : SigmaAlgebra
      The sigma-algebra of the space.
  """
  def __init__(self, atoms: SampleSpace, sigma_algebra: SigmaAlgebra):
    self._atoms = atoms
    self._sigma_algebra = sigma_algebra

    self._validate()

  def _validate(self):
    for subset in self._sigma_algebra:
      if not subset <= self._atoms:
        raise ValueError("All events must be a subset of the atoms")

      if not (self._atoms - subset) in self._sigma_algebra:
        raise ValueError("The σ-algebra must be closed under complements")

    for subsets in powerset(self._sigma_algebra):
      subsets = list(subsets)

      # reduce raises on empty iterables
      if not subsets:
        continue

      union = frozenset(reduce(frozenset.union, subsets))
      if union not in self._sigma_algebra:
        raise ValueError(
            "The σ-algebra must be closed under countable union"
        )

      intersection = frozenset(reduce(frozenset.intersection, subsets))
      if intersection not in self._sigma_algebra:
        raise ValueError(
            "The σ-algebra must be closed under countable intersection"
        )

  @property
  def atoms(self) -> SampleSpace:
    """The atoms of the space."""
    return self._atoms

  @property
  def sigma_algebra(self) -> SigmaAlgebra:
    """The sigma-algebra of the space."""
    return self._sigma_algebra
Define ProbabilityMeasure
from itertools import combinations

class ProbabilityMeasure:
  """A probability measure with finite support.

  Parameters
  ----------
  domain : FiniteMeasurableSpace
      The domain of the probability measure.
  measure : dict[Event, float]
      The graph of the measure.
  """

  def __init__(self, domain: FiniteMeasurableSpace, measure: dict[Event, float]):
    self._domain = domain
    self._measure = measure

    self._validate()

  def __call__(self, event: Event) -> float:
    """Return the probability of an event.

    Parameters
    ----------
    event : Event
        The event to measure.

    Returns
    -------
    float
        The probability of the event.
    """
    return self._measure[event]

  def _validate(self):
    for event in self._domain.sigma_algebra:
      if event not in self._measure:
        raise ValueError("Probability measure must be defined for all events.")

    if self._measure[frozenset(self._domain.atoms)] != 1:
      raise ValueError("The probability of the sample space must be 1.")

    for events in powerset(self._domain.sigma_algebra):
      events = list(events)

      if not events:
        continue

      if not any(e1.intersection(e2) for e1, e2 in combinations(events, 2)):
        prob_union = self._measure[reduce(frozenset.union, events)]
        prob_sum = sum(self._measure[e] for e in events)

        if round(prob_union, 4) != round(prob_sum, 4):
          raise ValueError("The measure does not satisfy 𝜎-additivity.")
        
  def are_mutually_exclusive(self, *events: Event) -> bool:
    """Check whether events are pairwise disjoint.

    Parameters
    ----------
    *events : Event
        The events to check.

    Returns
    -------
    bool
        True if no two events overlap.
    """
    self._validate_events(events)
    return not any(e1.intersection(e2) for e1, e2 in combinations(events, 2))

  def _validate_events(self, events: Iterable[Event]):
    for i, event in enumerate(events):
      if event not in self._domain.sigma_algebra:
        raise ValueError(f"event{i} is not in the event space.")
class ProbabilityMeasure(ProbabilityMeasure):

  def __call__(self, *events: Event) -> float:
    """Return the joint probability of one or more events.

    Parameters
    ----------
    *events : Event
        The events whose joint probability to compute.

    Returns
    -------
    float
        The probability of the intersection of the events.
    """
    self._validate_events(events)

    intersection = reduce(frozenset.intersection, events)

    return self._measure[intersection]

In our running example, the probability of a high back vowel is the joint probability \(\mathbb{P}(H, B)\).

Define generate_sigma_algebra
def generate_sigma_algebra(family: SigmaAlgebra) -> SigmaAlgebra:
  """Generate a sigma-algebra from a family of sets.

  Parameters
  ----------
  family : SigmaAlgebra
      The family of sets from which to generate the sigma-algebra.

  Returns
  -------
  SigmaAlgebra
      The smallest sigma-algebra containing the family.
  """

  sigma_algebra = set(family)
  old_sigma_algebra = set(family)

  complete = False

  while not complete:
    for subsets in powerset(old_sigma_algebra):
      subsets = list(subsets)

      if not subsets:
        continue

      union = reduce(frozenset.union, subsets)
      sigma_algebra.add(union)

      intersection = reduce(frozenset.intersection, subsets)
      sigma_algebra.add(intersection)

    complete = sigma_algebra == old_sigma_algebra
    old_sigma_algebra = set(sigma_algebra)

  return frozenset(sigma_algebra)
Define highness_backness_space
emptyset = frozenset()
vowels = frozenset({'e', 'i', 'o', 'u', 'æ', 'ɑ', 'ɔ', 'ə', 'ɛ', 'ɪ', 'ʊ'})

# high v. nonhigh
high = frozenset({'i', 'u', 'ɪ', 'ʊ'})
nonhigh = vowels - high

f_highness = frozenset({
    frozenset(emptyset),
    frozenset(high), frozenset(nonhigh),
    frozenset(vowels)
})

# back v. nonback
back = frozenset({'u', 'ʊ', 'o', 'ɔ'})
nonback = vowels - back

f_backness = frozenset({
    frozenset(emptyset),
    frozenset(back), frozenset(nonback),
    frozenset(vowels)
})

highness_space = FiniteMeasurableSpace(vowels, f_highness)
backness_space = FiniteMeasurableSpace(vowels, f_backness)

f_highness_backness = generate_sigma_algebra(f_highness | f_backness)

highness_backness_space = FiniteMeasurableSpace(vowels, f_highness_backness)
measure_highness_backness = ProbabilityMeasure(
    highness_backness_space,
    {e: len(e)/len(highness_backness_space.atoms)
     for e in highness_backness_space.sigma_algebra}
)

measure_highness_backness(frozenset(high), frozenset(back))
0.18181818181818182

Conditional probability

The probability of an event \(A \in \mathcal{F}\) conditioned on (or given) an event \(B \in \mathcal{F}\) is defined as \(\mathbb{P}(A \mid B) = \frac{\mathbb{P}(A, B)}{\mathbb{P}(B)}\). Note that \(\mathbb{P}(A \mid B)\) is undefined if \(\mathbb{P}(B) = 0\).

class ProbabilityMeasure(ProbabilityMeasure):

  def __or__(self, conditions: list[Event]) -> ProbabilityMeasure:
    """Condition the measure on a set of events.

    Parameters
    ----------
    conditions : list[Event]
        The events to condition on.

    Returns
    -------
    ProbabilityMeasure
        A new measure conditioned on the intersection of the events.
    """
    condition = reduce(frozenset.intersection, conditions)

    self._validate_condition(condition)

    measure = {
        event: self(event, condition)/self(condition)
        for event in self._domain.sigma_algebra
    }

    return ProbabilityMeasure(self._domain, measure)

  def _validate_condition(self, condition: Event):
    if condition not in self._domain.sigma_algebra:
      raise ValueError("The conditions must be in the event space.")

    if self._measure[condition] == 0:
      raise ZeroDivisionError("Conditions cannot have probability 0.")

In our running example, the probability that a vowel is high given that it is back is the conditional probability \(\mathbb{P}(H \mid B) = \frac{\mathbb{P}(H, B)}{\mathbb{P}(B)}\).

highness_backness_measure = {
    event: len(event)/len(highness_backness_space.atoms)
    for event in highness_backness_space.sigma_algebra
}

measure_highness_backness = ProbabilityMeasure(
    highness_backness_space,
    highness_backness_measure
)

measure_given_back = measure_highness_backness | [back]

measure_given_back(high)
0.5

From this definition, it immediately follows that \(\mathbb{P}(A, B) = \mathbb{P}(A \mid B)\mathbb{P}(B) = \mathbb{P}(B \mid A)\mathbb{P}(A)\), which in turn implies Bayes’ theorem.

\[\mathbb{P}(A \mid B) = \frac{\mathbb{P}(A, B)}{\mathbb{P}(B)} = \frac{\mathbb{P}(B \mid A)\mathbb{P}(A)}{\mathbb{P}(B)}\]

Bayes’ theorem will be very important in this course.

Another important consequence of the definition of conditional probability is the chain rule:

\[\begin{align*}\mathbb{P}(E_1, E_2, E_3, \ldots, E_N) &= \mathbb{P}(E_1)\mathbb{P}(E_2 \mid E_1)\mathbb{P}(E_3 \mid E_1, E_2)\ldots\mathbb{P}(E_N \mid E_1, E_2, \ldots, E_{N-1})\\ &= \mathbb{P}(E_1)\prod_{i=2}^N \mathbb{P}(E_i\mid E_1, \ldots, E_{i-1})\end{align*}\]

The chain rule will also be very important in this course.

Independence

An event \(A \in \mathcal{F}\) is independent of an event \(B \in \mathcal{F}\) (under \(\mathbb{P}\)) if \(\mathbb{P}(A \mid B) = \mathbb{P}(A)\). A theoreom that immediately follows from this definition is that \(A\) and \(B\) are independent under \(\mathbb{P}\) if and only if \(\mathbb{P}(A, B) = \mathbb{P}(A \mid B)\mathbb{P}(B) = \mathbb{P}(A)\mathbb{P}(B)\).

class ProbabilityMeasure(ProbabilityMeasure):

  def are_independent(self, *events: Event) -> bool:
    """Check whether events are mutually independent.

    Parameters
    ----------
    *events : Event
        The events to check.

    Returns
    -------
    bool
        True if the joint probability equals the product of marginals.
    """
    self._validate_events(events)

    joint = self(*events)
    product = reduce(lambda x, y: x * y, [self(e) for e in events])

    return joint == product

In our running example of an event space structured by high and back vowels, assuming all vowels are equiprobable, none of the events are independent. In the discrete event space, many events will be independent.

measure_highness_backness = ProbabilityMeasure(
    highness_backness_space,
    {e: len(e)/len(highness_backness_space.atoms)
     for e in highness_backness_space.sigma_algebra}
)

measure_highness_backness.are_independent(frozenset(high), frozenset(back))
False

Note that independence is not the same as mutual exclusivity; indeed, mutually exclusive events are not independent, since \(\mathbb{P}(A \mid B) = \frac{\mathbb{P}(A, B)}{\mathbb{P}(B)} = \frac{0}{\mathbb{P}(B)} = 0\) (or is undefined if \(\mathbb{P}(B) = 0\)) regardless of \(\mathbb{P}(A)\), and therefore either \(\mathbb{P}(A \mid B)\) does not equal \(\mathbb{P}(A)\) or \(\mathbb{P}(B \mid A)\) is undefined (because \(\mathbb{P}(A) = 0\)), even when \(\mathbb{P}(B)\) is.