Source code for tt.expressions.bexpr

"""Tools for interacting with Boolean expressions."""

import re

from contextlib import contextmanager

from tt._assertions import (
    assert_all_valid_keys,
    assert_iterable_contains_all_expr_symbols)
from tt.definitions import (
    CONSTANT_VALUES,
    DELIMITERS,
    is_valid_identifier,
    OPERATOR_MAPPING,
    SYMBOLIC_OPERATOR_MAPPING,
    TT_NOT_OP)
from tt.errors import (
    BadParenPositionError,
    EmptyExpressionError,
    ExpressionOrderError,
    InvalidArgumentTypeError,
    InvalidIdentifierError,
    UnbalancedParenError)
from tt.trees import (
    BinaryOperatorExpressionTreeNode,
    BooleanExpressionTree,
    ExpressionTreeNode,
    OperandExpressionTreeNode,
    UnaryOperatorExpressionTreeNode)


[docs]class BooleanExpression(object): """An interface for interacting with a Boolean expression. Instances of ``BooleanExpression`` are meant to be immutable and can be instantiated from a few different representations of expressions. The simplest way to make an expression object is from a string:: >>> from tt import BooleanExpression >>> BooleanExpression('(A or B) iff (C and D)') <BooleanExpression "(A or B) iff (C and D)"> If you already have an instance of :class:`BooleanExpressionTree \ <tt.trees.expr_tree.BooleanExpressionTree>` laying around, you can make a new expression object from that, too:: >>> from tt import BooleanExpressionTree >>> tree = BooleanExpressionTree( ... ['A', 'B', 'or', ... 'C', 'D', 'and', ... 'iff']) >>> BooleanExpression(tree) <BooleanExpression "(A or B) iff (C and D)"> Additionally, any tree node can be used to build an expression object. Continuing from above, let's make a new expression object for each of the sub-expressions wrapped in parentheses:: >>> BooleanExpression(tree.root.l_child) <BooleanExpression "A or B"> >>> BooleanExpression(tree.root.r_child) <BooleanExpression "C and D"> Expressions also implement the equality and inequality operators (``==`` and ``!=``). Equality is determined by the same semantic structure and the same operand names; the string used to represent the operators in two expressions is not taken into account. Here's a few examples:: >>> from tt import BooleanExpression as be >>> be('A or B or C') == be('A or B or C') True >>> be('A or B or C') == be('A || B || C') True >>> be('A or B or C') == be('A or C or B') False :param expr: The expression representation from which this object is derived. :type expr: :class:`str <python:str>`, :class:`BooleanExpressionTree\ <tt.trees.expr_tree.BooleanExpressionTree>`, or \ :class:`ExpressionTreeNode <tt.trees.tree_node.ExpressionTreeNode>` :raises BadParenPositionError: If the passed expression contains a parenthesis in an invalid position. :raises EmptyExpressionError: If the passed expressions contains nothing other than whitespace. :raises ExpressionOrderError: If the expression contains invalid consecutive operators or operands. :raises InvalidArgumentTypeError: If ``expr`` is not an acceptable type. :raises InvalidIdentifierError: If any parsed variable symbols in the expression are invalid identifiers. :raises UnbalancedParenError: If any parenthesis pairs remain unbalanced. It is important to note that aside from :exc:`InvalidArgumentTypeError \ <tt.errors.arguments.InvalidArgumentTypeError>`, all exceptions raised in expression initialization will be descendants of :exc:`GrammarError \ <tt.errors.grammar.GrammarError>`. """ def __init__(self, expr): if isinstance(expr, str): self._init_from_str(expr) elif isinstance(expr, BooleanExpressionTree): self._init_from_expr_node(expr.root) elif isinstance(expr, ExpressionTreeNode): self._init_from_expr_node(expr) else: raise InvalidArgumentTypeError( 'expr must be a str, BooleanExpressionTree, or ' 'ExpressionTreeNode') def _init_from_expr_node(self, expr_node): """Initalize this object from an expression node.""" self._symbols = [] self._symbol_set = set() self._postfix_tokens = [] self._tokens = [] self._raw_expr = '' with self._symbol_set_includes_constant_values(): self._init_from_expr_node_recursive_helper(expr_node) self._tree = BooleanExpressionTree(self._postfix_tokens) def _init_from_expr_node_recursive_helper(self, expr_node, parent=None): """Recursive helper for initializing from an expression node. This method will populate the ``_symbols``, ``_symbol_set``, ``_postfix_tokens``, ``_tokens``, and ``_raw_expr`` attributes of this object. """ if isinstance(expr_node, OperandExpressionTreeNode): operand_str = expr_node.symbol_name self._tokens.append(operand_str) self._postfix_tokens.append(operand_str) self._raw_expr += operand_str if operand_str not in self._symbol_set: self._symbols.append(operand_str) self._symbol_set.add(operand_str) elif isinstance(expr_node, UnaryOperatorExpressionTreeNode): operator_str = expr_node.symbol_name self._tokens.append(operator_str) self._raw_expr += operator_str if operator_str not in SYMBOLIC_OPERATOR_MAPPING: self._raw_expr += ' ' self._init_from_expr_node_recursive_helper( expr_node.l_child, parent=expr_node) self._postfix_tokens.append(operator_str) elif isinstance(expr_node, BinaryOperatorExpressionTreeNode): operator_str = expr_node.symbol_name include_parens = True if parent is None: include_parens = False elif isinstance(parent, BinaryOperatorExpressionTreeNode): this_operator = OPERATOR_MAPPING[operator_str] parent_operator = OPERATOR_MAPPING[parent.symbol_name] if (expr_node is parent.r_child and this_operator == parent_operator): include_parens = False elif (expr_node is parent.l_child and this_operator == parent_operator and (parent.r_child.is_really_unary or this_operator == parent.r_child.operator)): include_parens = False if include_parens: self._tokens.append('(') self._raw_expr += '(' self._init_from_expr_node_recursive_helper( expr_node.l_child, parent=expr_node) self._tokens.append(operator_str) self._raw_expr += (' ' + operator_str + ' ') self._init_from_expr_node_recursive_helper( expr_node.r_child, parent=expr_node) if include_parens: self._tokens.append(')') self._raw_expr += ')' self._postfix_tokens.append(operator_str) def _init_from_str(self, raw_expr_str): """Initalize this object from a raw expression string.""" self._raw_expr = raw_expr_str self._symbols = [] self._symbol_set = set() self._tokens = [] self._postfix_tokens = [] with self._symbol_set_includes_constant_values(): self._tokenize() self._to_postfix() self._tree = BooleanExpressionTree(self._postfix_tokens) @property def is_cnf(self): """Whether this expression is in conjunctive norma form or not. :type: :class:`bool <python:bool>` .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('(A or ~B) and (~C or D or E) and F') >>> b.is_cnf True >>> b = BooleanExpression('A nand B') >>> b.is_cnf False """ return self._tree.is_cnf @property def is_dnf(self): """Whether this expression is in conjunctive normal form or not. :type: :class:`bool <python:bool>` .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('(A and B) or (~C and D)') >>> b.is_dnf True >>> b = BooleanExpression('(op1 or !op2) and (op3 or op4)') >>> b.is_dnf False """ return self._tree.is_dnf @property def raw_expr(self): """The raw string expression, parsed upon initialization. This is what you pass into the ``BooleanExpression`` constructor; it is kept on the object as an attribute for convenience. :type: :class:`str <python:str>` .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('A nand B') >>> b.raw_expr 'A nand B' """ return self._raw_expr @property def symbols(self): """The list of unique symbols present in this expression. The order of the symbols in this list matches the order of symbol appearance in the original expression. :type: List[:class:`str <python:str>`] .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('A xor (B or C)') >>> b.symbols ['A', 'B', 'C'] """ return self._symbols @property def tokens(self): """The parsed, non-whitespace tokens of an expression. :type: List[:class:`str <python:str>`] .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('A xor (B or C)') >>> b.tokens ['A', 'xor', '(', 'B', 'or', 'C', ')'] """ return self._tokens @property def postfix_tokens(self): """Similar to the ``tokens`` attribute, but in postfix order. :type: List[:class:`str <python:str>`] .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('A xor (B or C)') >>> b.postfix_tokens ['A', 'B', 'C', 'or', 'xor'] """ return self._postfix_tokens @property def tree(self): """The expression tree representing this Boolean expression. :type: :class:`BooleanExpressionTree <tt.trees.expr_tree.BooleanExpressionTree>` .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('A xor (B or C)') >>> print(b.tree) xor `----A `----or `----B `----C """ return self._tree def __eq__(self, other): if isinstance(other, BooleanExpression): return self._tree.root == other._tree.root else: return NotImplemented def __ne__(self, other): return not (self == other) def __str__(self): return self._raw_expr def __repr__(self): return '<BooleanExpression "{}">'.format(self._raw_expr)
[docs] def evaluate(self, **kwargs): """Evaluate the Boolean expression for the passed keyword arguments. This is a checked wrapper around the :func:`evaluate_unchecked` function. :param kwargs: Keys are names of symbols in this expression; the specified value for each of these keys will be substituted into the expression for evaluation. :returns: The result of evaluating the expression. :rtype: :class:`bool <python:bool>` :raises ExtraSymbolError: If a symbol not in this expression is passed through ``kwargs``. :raises MissingSymbolError: If any symbols in this expression are not passed through ``kwargs``. :raises InvalidBooleanValueError: If any values from ``kwargs`` are not valid Boolean inputs. :raises InvalidIdentifierError: If any symbol names are invalid identifiers. Usage:: >>> from tt import BooleanExpression >>> b = BooleanExpression('A or B') >>> b.evaluate(A=0, B=0) False >>> b.evaluate(A=1, B=0) True """ assert_all_valid_keys(kwargs, self._symbol_set) assert_iterable_contains_all_expr_symbols(kwargs.keys(), self._symbol_set) return self.evaluate_unchecked(**kwargs)
[docs] def evaluate_unchecked(self, **kwargs): """Evaluate the Boolean expression without checking the input. This is used for evaluation by the :func:`evaluate` method, which validates the input ``kwargs`` before passing them to this method. :param kwargs: Keys are names of symbols in this expression; the specified value for each of these keys will be substituted into the expression for evaluation. :returns: The Boolean result of evaluating the expression. :rtype: :class:`bool <python:bool>` """ truthy = self._tree.evaluate(kwargs) return bool(truthy)
[docs] def iter_clauses(self): """Iterate over the clauses in this expression. An expression must be in conjunctive normal form (CNF) or disjunctive normal form (DNF) in order to iterate over its clauses. Here's a simple example:: >>> from tt import BooleanExpression >>> b = BooleanExpression('(~A or B) and (C or D) and (~E or ~F)') >>> for clause in b.iter_clauses(): ... clause ... <BooleanExpression "~A or B"> <BooleanExpression "C or D"> <BooleanExpression "~E or ~F"> In the case of an ambiguous expression form (between CNF and DNF), the clauses will be interpreted to be in CNF form. For example:: >>> b = BooleanExpression('A and ~B and C') >>> b.is_cnf True >>> b.is_dnf True >>> print(', '.join(str(clause) for clause in b.iter_clauses())) A, ~B, C If you want to enforce a specific CNF or DNF interpretation of the clauses, take a look at :func:`iter_cnf_clauses` and :func:`iter_dnf_clauses`. :returns: An iterator of expression objects, each representing a separate clause of this expression. :rtype: Iterator[:class:`BooleanExpression`] :raises RequiresNormalFormError: If this expression is not in conjunctive or disjunctive normal form. """ for node in self._tree.root.iter_clauses(): yield BooleanExpression(node)
[docs] def iter_cnf_clauses(self): """Iterate over the CNF clauses in this expression. .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('(A or B) and ~C') >>> for clause in b.iter_cnf_clauses(): ... print(clause) ... A or B ~C :returns: An iterator of expression objects, each representing a separate CNF clause of this expression. :rtype: Iterator[:class:`BooleanExpression`] :raises RequiresNormalFormError: If this expression is not in conjunctive normal form. """ for node in self._tree.root.iter_cnf_clauses(): yield BooleanExpression(node)
[docs] def iter_dnf_clauses(self): """Iterate over the DNF clauses in this expression. .. code-block:: python >>> from tt import BooleanExpression >>> b = BooleanExpression('(A and ~B) or (C and D and E)') >>> for clause in b.iter_dnf_clauses(): ... print(clause) ... A and ~B C and D and E :returns: An iterator of expression objects, each representing a separate DNF clause of this expression. :rtype: Iterator[:class:`BooleanExpression`] :raises RequiresNormalFormError: If this expression is not in disjunctive normal form. """ for node in self._tree.root.iter_dnf_clauses(): yield BooleanExpression(node)
def _tokenize(self): """Make the first pass through the expression, tokenizing it. This method is a helper for initializing an expression object from a string and will populate the ``_symbols``, ``_symbol_set``, and ``_tokens`` attributes of this object. :raises GrammarError: If a malformed expression is received. """ operator_strs = [k for k in OPERATOR_MAPPING.keys()] is_symbolic = {op: not op[0].isalpha() for op in operator_strs} operator_search_list = sorted(operator_strs, key=len, reverse=True) delimiters = DELIMITERS | set(k[0] for k, v in is_symbolic.items() if v) EXPECTING_OPERAND = 1 EXPECTING_OPERATOR = 2 grammar_state = EXPECTING_OPERAND idx = 0 open_paren_count = 0 num_chars = len(self._raw_expr) while idx < num_chars: c = self._raw_expr[idx].strip() if not c: # do nothing idx += 1 elif c == '(': if grammar_state != EXPECTING_OPERAND: raise BadParenPositionError('Unexpected parenthesis', self._raw_expr, idx) open_paren_count += 1 self._tokens.append(c) idx += 1 elif c == ')': if grammar_state != EXPECTING_OPERATOR: raise BadParenPositionError('Unexpected parenthesis', self._raw_expr, idx) elif not open_paren_count: raise UnbalancedParenError('Unbalanced parenthesis', self._raw_expr, idx) open_paren_count -= 1 self._tokens.append(c) idx += 1 else: is_operator = False num_chars_remaining = num_chars - idx matching_operators = [ operator for operator in operator_search_list if len(operator) <= num_chars_remaining and self._raw_expr[idx:(idx+len(operator))] == operator] if matching_operators: match = matching_operators[0] match_length = len(match) next_c_pos = idx + match_length next_c = (None if next_c_pos >= num_chars else self._raw_expr[idx + match_length]) if next_c is None: # trailing operator raise ExpressionOrderError( 'Unexpected operator "{}"'.format(match), self._raw_expr, idx) if next_c in delimiters or is_symbolic[match]: if OPERATOR_MAPPING[match] == TT_NOT_OP: if grammar_state != EXPECTING_OPERAND: raise ExpressionOrderError( 'Unexpected unary operator "{}"'.format( match), self._raw_expr, idx) else: if grammar_state != EXPECTING_OPERATOR: raise ExpressionOrderError( 'Unexpected binary operator "{}"'.format( match), self._raw_expr, idx) grammar_state = EXPECTING_OPERAND is_operator = True self._tokens.append(match) idx += match_length if not is_operator: if grammar_state != EXPECTING_OPERAND: raise ExpressionOrderError('Unexpected operand', self._raw_expr, idx) operand_end_idx = idx + 1 while (operand_end_idx < num_chars and self._raw_expr[operand_end_idx] not in delimiters): operand_end_idx += 1 operand = self._raw_expr[idx:operand_end_idx] if (operand not in CONSTANT_VALUES and not is_valid_identifier(operand)): raise InvalidIdentifierError( 'Invalid operand name "{}"'.format(operand), self._raw_expr, idx) self._tokens.append(operand) if operand not in self._symbol_set: self._symbols.append(operand) self._symbol_set.add(operand) idx = operand_end_idx grammar_state = EXPECTING_OPERATOR if open_paren_count: left_paren_positions = [m.start() for m in re.finditer(r'\(', self._raw_expr)] raise UnbalancedParenError( 'Unbalanced left parenthesis', self._raw_expr, left_paren_positions[open_paren_count-1]) if not self._tokens: raise EmptyExpressionError('Empty expression is invalid') def _to_postfix(self): """Populate the ``_postfix_tokens`` attribute.""" stack = [] for token in self._tokens: if token in self._symbol_set: self._postfix_tokens.append(token) elif token == '(': stack.append(token) elif token in OPERATOR_MAPPING.keys(): if not stack: stack.append(token) else: while (stack and stack[-1] != '(' and OPERATOR_MAPPING[stack[-1]].precedence > OPERATOR_MAPPING[token].precedence): self._postfix_tokens.append(stack.pop()) stack.append(token) elif token == ')': while stack and stack[-1] != '(': self._postfix_tokens.append(stack.pop()) stack.pop() for token in reversed(stack): self._postfix_tokens.append(token) @contextmanager def _symbol_set_includes_constant_values(self): """Context manager to include CONSTANT_VALUES in _symbol_set.""" self._symbol_set |= CONSTANT_VALUES yield self._symbol_set -= CONSTANT_VALUES