Source code for scalib.attacks.factor_graph

from typing import Sequence, Mapping, Union, Optional
from dataclasses import dataclass
from enum import IntEnum
import warnings

import numpy as np
import numpy.typing as npt

from scalib import _scalib_ext
from scalib.config import get_config

__all__ = ["FactorGraph", "BPState", "GenFactor"]


[docs]@dataclass class GenFactor: r"""Generic factor which can implement arbitrary functions between variables. A factor can be declared as `MULTI` if the factor changes between executions. For most use cases, where the implementation of the factor is static, `SINGLE` is sufficient. An example of a "butterfly" factor which computes the sum and difference of the inputs is shown below. >>> # Describe and generate the SASCAGraph >>> graph_desc = ''' ... NC 13 # arbitrary prime number ... VAR MULTI a # inputs ... VAR MULTI b ... VAR MULTI c # outputs ... VAR MULTI d ... GENERIC SINGLE f # declare the factor ... PROPERTY F0: f(a,b,c,d) # declare the relationship c = a + b and d = a - b''' >>> factor = [] >>> nc = 13 >>> # Construct the factor table >>> for a in range(nc): ... for b in range(nc): ... factor.append([a, b, (a + b) % nc, (a - b) % nc]) >>> factor = GenFactor.sparse_functional(np.array(factor, dtype=np.uint32)) >>> graph = FactorGraph(graph_desc) >>> bpstate = BPState(graph, nexec=1, gen_factors={"f": factor}) Notes ----- In general, the computational cost of sparse_functional factors is linear in the number of rows. For dense factors, the cost is exponential in the number of axes. Therefore, one should prefer to use built-in functions (^, &, !, etc) whenever possible to minimize the performance penalty of generic factors, and to keep the number entries in the factor small when they must be used. """
[docs] class GenFactorKind(IntEnum): """Choices of generic factor types""" DENSE = 0 SPARSE_FUNCTIONAL = 1
kind: GenFactorKind factor: npt.NDArray
[docs] @classmethod def dense(cls, factor: npt.NDArray[np.float64]): """A dense factor. ``factor`` is a n-dimensional array, each axis corresponds to one variable, entries in the array are probabilities. """ assert len(set(factor.shape)) == 1 assert factor.dtype == np.float64 return cls(cls.GenFactorKind.DENSE, factor)
[docs] @classmethod def sparse_functional(cls, factor: npt.NDArray[np.uint32]): """A sparse functional factor. ``factor`` is a 2D array, each row corresponding to an entry in the factor, and in each row, the values are the values of the variables. Each row corresponds to setting the factor entry to 1. Omitting a factor entry implicitly assigns that entry to 0. Sparse factors leads to better performance and lower memory usage if the number of rows is much smaller than the total number of entries in the factor. """ assert len(factor.shape) == 2 assert factor.dtype == np.uint32 return cls(cls.GenFactorKind.SPARSE_FUNCTIONAL, factor)
CstValue = Union[int, Sequence[int]] ValsAssign = Mapping[str, CstValue] GenFactors = Mapping[str, Union[GenFactor, Sequence[GenFactor]]]
[docs]class FactorGraph: r"""FactorGraph allows to run Soft Analytical Side-Channel Attacks (SASCA). A SASCA is based on a set of variables, on knowledge of relationships between those variables and information about the values of the variables. Variables have values in a binary finite field of size `nc`, and are viewed as bit vectors. Those values are represented as integers in [0, nc). Variables can be qualified as `SINGLE` or `MULTI`, which relates to the multiple-execution feature of `FactorGraph`: when performing a SASCA, it is useful to acquire multiple execution traces. In these executions, some variables stay the same (e.g. an encryption key), and other change (e.g. plaintext, masked variables, etc.). The `SINGLE` qualifier should be used for variables that remain the same and `MULTI` for variables that change. `FactorGraph` will then build a graph where each `MULTI` variable is replicated `n` times (once for each execution), as well as all the relationships that relate at least on `MULTI` variable. Relationships between variables are bitwise XOR, bitwise AND, bitwise OR, bitiwise negation, modular addition, modular multiplication and lookup table. A lookup table can describe any function that maps a single variable to another variable. Description of `nc`, the variables, and the relationships is given in a text format specified below. Finally, variables are of two kinds: the public values and the variables. The public values have a single known value, while variables have an uncertain value modelled as a probability distribution. The SASCA computes these distributions from the relationships encoded by the graph, and from prior information. The prior distributions are by default uniform, but this can be changed. An attack attempting to recover the secret key byte `k` is shown below. >>> # Describe and generate the SASCAGraph >>> graph_desc = ''' ... # Small unprotected Sbox example ... NC 256 # Graph over GF(256) ... TABLE sbox # Sbox ... VAR SINGLE k # key (to recover !) ... PUB MULTI p # plaintext (known) ... VAR MULTI x # Sbox input ... VAR MULTI y # Sbox output (whose leakage is targeted) ... PROPERTY x = k ^ p # Key addition ... PROPERTY y = sbox[x] # Sbox lookup ... ''' >>> sbox = np.arange(256, dtype=np.uint32) # don't use this S-box ;) >>> graph = FactorGraph(graph_desc, {"sbox": sbox}) >>> # n is the number of traces for our attack. >>> n = 2 >>> plaintexts = np.array([0, 1], dtype=np.uint32) >>> bp = BPState(graph, n, {"p": plaintexts}) >>> y_leakage = np.random.rand(2, 256) # this might come from an LDA >>> y_leakage = y_leakage / y_leakage.sum(axis=1, keepdims=True) >>> bp.set_evidence("y",y_leakage) >>> # Solve graph >>> bp.bp_loopy(it=3, initialize_states=True) >>> # Get key distribution and derive key guess >>> k_distri = bp.get_distribution("k") >>> key_guess = np.argmax(k_distri) By running a belief propagation algorithm (see [1]_), the distributions on all the variables are updated based on their initial distributions. The `SASCAGraph` can be solved by using `run_bp()`. Notes ----- **The graph description format** is a text line-oriented format (each line of text is a statement) that describes a set of variables and relationships between those variables. The ordering of the statements is irrelevant and whitespace is irrelevant except for newlines and around keywords. End-of-line comments start with the `#` symbol. The statements are: - `NC <nc>`: specifies the field size (must be a power of two). There must be one `NC` statement in the description. - `PUB SINGLE|MULTI variable_name`: declares a public variable. `variable_name` is an identifier of the variable (allowed characters are letters, digits and underscore). One of the qualifiers `SINGLE` or `MULTI` must be given. - `VAR SINGLE|MULTI variable_name`: declares a variables. - `PROPERTY w = x ^ y ^ z`: declares a bitwise XOR property. There can be any number of operands. - `PROPERTY z = x & y`: declares a bitwise AND property. - `PROPERTY x = t[y]`: declares a LOOKUP property (`y` is the lookup of the table `t` at index `y`). No public variable is allowed in this property. - `PROPERTY x = !y`: declares a bitwise NOT property. No public variable is allowed in this property. - `PROPERTY x = y + z - w`: declares a modular sum (+ and - are supported, with arbitrary number of terms). - `PROPERTY f(x, y, z)`: declares a "Generic factor" property, f must be declared as a GENERIC. - `TABLE` t = [0, 3, 2, 1]`: Declares a table that can be used in a LOOKUP. The values provided in the table must belong to the interval [0, nc). The initialization expression can be omitted from the graph description (e.g. `TABLE t`) and be given with `tables` parameter. - `GENERIC SINGLE|MULTI f`: declares a "Generic factor" f. A generic factor can be used to define an arbitrary functional relationships between variables. See the docs for ``GenFactor`` for more information and examples. **Note**: if the `MULTI` feature doesn't match your use-case, using only `SINGLE` variables works (or set the number of execution to 1). .. [1] "Soft Analytical Side-Channel Attacks". N. Veyrat-Charvillon, B. Gérard, F.-X. Standaert, ASIACRYPT2014. Parameters ---------- graph: string The graph description. """ def __init__( self, graph_text: str, tables: Optional[Mapping[str, npt.NDArray[np.uint32]]] = None, ): if tables is None: tables = dict() self._inner = _scalib_ext.FactorGraph(graph_text, tables)
[docs] def sanity_check( self, pub_assignment: ValsAssign, var_assignment: ValsAssign, factor_assignment: Optional[GenFactors] = None, ): """Verify that the graph is compatible with example variable assignments. If the graph is not compatible, raise a ``ValueError``. Parameters ---------- pub_assignment: For each public variable its value for all test executions. var_assignment: For each non-public variable its value for all test executions. factor_assignment: The probability tables (i.e., ``GenFactor`` assignments) for each generic factor. Returns ------- None """ if factor_assignment is None: factor_assignment = dict() self._inner.sanity_check(pub_assignment, var_assignment, factor_assignment)
[docs] def vars(self) -> Sequence[str]: """Return the names of the variables in the graph.""" return self._inner.var_names()
[docs] def factors(self) -> Sequence[str]: """Return the names of the factors in the graph.""" return self._inner.factor_names()
[docs]class BPState: """Belief propagation state associated to a :class:`FactorGraph`. This is a stateful object on which belief propagation operations can be run. See :class:`scalib.attacks.FactorGraph` for usage example. """ def __init__( self, factor_graph: FactorGraph, nexec: int, public_values: Optional[ValsAssign] = None, gen_factors: Optional[GenFactors] = None, ): if public_values is None: public_values = dict() if gen_factors is None: gen_factors = dict() self._fg = factor_graph self._inner = factor_graph._inner.new_bp(nexec, public_values, gen_factors) @property def fg(self) -> FactorGraph: """The associated factor graph.""" return self._fg
[docs] def set_evidence(self, var: str, distribution: Optional[npt.NDArray[np.float64]]): r"""Sets prior distribution of a variable. Parameters ---------- var : Identifier of the variable to assign the distribution to. distribution : Distribution to assign. If `var` is SINGLE, must be of shape `(nc,)`. If `var` is MULTI, must be of shape `(nexec,nc)`. If None, sets the prior distribution to uniform. """ if distribution is None: self._inner.drop_evidence(var) else: self._inner.set_evidence(var, distribution)
[docs] def bp_loopy( self, it: int, initialize_states: bool, clear_beliefs: bool = True, ): """Runs belief propagation algorithm on the current state of the graph. This is a shortcut for calls to :meth:`propagate_var` and :meth:`propagate_factor`. It is equivalent to: .. code-block:: python if initialize_states: for var in self.fg.vars(): self.propagate_var(var) for _ in range(it): for factor in self.fg.factors(): self.propagate_factor(factor) for var in self.fg.vars(): self.propagate_var(var) Parameters ---------- it : Number of iterations of belief propagation. initialize_states: Whether to update variable distributions before running the BP iterations. Recommended after using :func:`BPState.set_evidence`. clear_beliefs: Whether to clear beliefs between vars -> factors. Setting to False can help debugging. Default value is True. """ if initialize_states: self._inner.propagate_all_vars(get_config(), clear_beliefs) self._inner.propagate_loopy_step(it, get_config(), clear_beliefs)
[docs] def bp_acyclic( self, dest: str, *, clear_intermediates: bool = True, clear_evidence: bool = False, ): """Runs the non-loopy belief propagation algorithm on the current state of the graph. This only works if the graph is not cyclic. Parameters ---------- dest: Variable for which the belief propagation is computed. clear_intermediates: Drop the intermetidate distributions and beliefs that are computed. clear_evidence: Drop the evidence for the variables, once used in the algorithm. """ self._inner.propagate_acyclic( dest, clear_intermediates, clear_evidence, get_config() )
[docs] def get_distribution(self, var: str) -> Optional[npt.NDArray[np.float64]]: r"""Returns the current distribution of a variable `var`. Parameters ---------- var : string Identifier of the variable for which distribution must be returned. Distribution cannot be obtained for public variables. Returns ------- distribution : array_like, f64 Distribution of `var`. If `var` is SINGLE, distribution has shape `(nc)`. Else, it has shape `(n,nc)`. If the variable has a uniform distribution, None may be returned (but this is not guaranteed). """ return self._inner.get_state(var)
[docs] def is_cyclic(self) -> bool: """Test is the graph is cyclic.""" return self._inner.is_cyclic()
[docs] def set_distribution( self, var: str, distribution: Optional[npt.NDArray[np.float64]] ): r"""Sets current distribution of a variable in the BP. .. deprecated:: 0.6.1 This method only impacts subsequent calls to ``get_distribution`` and does not affect the subsequent behavior of belief propagation, its use is therefore probably incorrect. For dropping state in order to reduce RAM usage, use ``drop_distribution``. Parameters ---------- var : Identifier of the variable to assign the distribution to. distribution : Distribution to assign. If `var` is SINGLE, must be of shape `(nc,)`. If `var` is MULTI, must be of shape `(nexec,nc)`. If None, sets the distribution to uniform. """ warnings.warn( "This method does not affect the subsequent behavior of belief propagation. " + "Usage is probably incorrect. For dropping state (memory usage savings), " + "use drop_distribution.", DeprecationWarning, ) if distribution is None: self._inner.drop_state(var) else: self._inner.set_state(var, distribution)
[docs] def drop_distribution(self, var: str): r"""Delete the current distribution of a variable in the BP. This method only impacts subsequent calls to ``get_distribution`` and does not affect the subsequent behavior of belief propagation, its main use-case is therefore reducing RAM usage. Parameters ---------- var : Identifier of the variable for which the distribution is deleted. """ self._inner.drop_state(var)
[docs] def get_belief_to_var( self, var: str, factor: str ) -> Optional[npt.NDArray[np.float64]]: r"""Returns the current belief from factor to var. Parameters ---------- var : string Identifier of the variable for which distribution must be returned. factor : string Identifier of the factor for which distribution must be returned. Returns ------- distribution : array_like, f64 Belief on the edge from `factor` to `var`. If `factor` is SINGLE, distribution has shape `(nc)`. Else, it has shape `(n,nc)`. If the belief is a uniform distribution, None may be returned (but this is not guaranteed). """ return self._inner.get_belief_to_var(var, factor)
[docs] def get_belief_from_var( self, var: str, factor: str ) -> Optional[npt.NDArray[np.float64]]: r"""Returns the current belief from var to factor. Parameters ---------- var : string Identifier of the variable for which distribution must be returned. factor : string Identifier of the factor for which distribution must be returned. Returns ------- distribution : array_like, f64 Belief on the edge from `var` to `factor`. If `factor` is SINGLE, distribution has shape `(nc)`. Else, it has shape `(n,nc)`. If the belief is a uniform distribution, None may be returned (but this is not guaranteed). """ return self._inner.get_belief_from_var(var, factor)
[docs] def propagate_var( self, var: str, factors: list[str] | None = None, clear_beliefs: bool = True ): """Run belief propagation on variable var. This fetches beliefs from adjacent factors, computes the var distribution, and sends updated beliefs to all adjacent factors. Parameters ---------- var: Identifier of the variable. factors: New beliefs are propagated from the variable to these factors. If ``None``, then we propagate to all adjacent factors. clear_beliefs: Whether to clear beliefs between ``vars -> factors``. Setting to ``False`` can help debugging. Default value is ``True``. """ if factors is None: return self._inner.propagate_var(var, get_config(), clear_beliefs) else: return self._inner.propagate_var_to( var, factors, get_config(), clear_beliefs, clear_evidence=False )
[docs] def propagate_factor(self, factor: str, vars: list[str] | None = None): """Run belief propagation on the given factor. This fetches beliefs from adjacent variables and sends updated beliefs to all adjacent variables. Parameters ---------- factor: Identifier of the variable. vars: New beliefs are propagated from the factor to these variables. If ``None``, then we propagate to all adjacent variables. """ if vars is None: return self._inner.propagate_factor_all(factor, config=get_config()) else: return self._inner.propagate_factor( factor, vars, clear_incoming=False, config=get_config() )
[docs] def debug(self): """Debug-print the current state.""" s = [] s.append("VAR DISTRIBUTION") for var in self._inner.graph().var_names(): s.append(f"\tVar {var}") s.append(repr(self.get_distribution(var))) s.append("VARS TO FACTORS") for factor in self._inner.graph().factor_names(): for var in self._inner.graph().factor_scope(factor): s.append(f"\t{var} -> {factor}") s.append(repr(self.get_belief_from_var(var, factor))) s.append("FACTORS TO VARS") for factor in self._inner.graph().factor_names(): for var in self._inner.graph().factor_scope(factor): s.append(f"\t{factor} -> {var}") s.append(repr(self.get_belief_to_var(var, factor))) return "\n".join(s)