Source code for scalib.modeling.rldaclassifier

import numpy as np
import numpy.typing as npt

from scalib import _scalib_ext
from scalib.config import get_config
import scalib.utils


[docs]class RLDAClassifier: r"""Regression-based Linear Discriminant Analysis. Models the leakage using a regression-based linear discriminant analysis (RLDA) classifier :footcite:p:`RLDA`, which can efficiently handle long traces and large number of classes. In a nutshell, this model performs LDA with the class means modelled as linear regression based on the :math:`n_b` bits of the class value. Compared to the :class:`scalib.modeling.LDAClassifier`, this model will perform better when the number of classes is large and/or there are few profiling traces. Internally, it first estimates the coefficients of the linear regression, then computes a projection matrix that reduces the dimensionality of the gaussian template to :math:`p` dimensions and makes the covariance matrix the identity. It is then able to predict the leakage likelihood .. math:: \hat{\mathsf{f}}[\mathbf{l}|X=x] = \alpha \exp\left( -\frac{1}{2} \lVert\mathbf{W}^T\mathbf{l} - \mathbf{A}\mathbf\beta(x)\rVert^2 \right). Where :math:`\mathbf{W}` is the projection matrix, :math:`\mathbf{A}` the projected regression coefficients, and :math:`\mathbf{\beta(x)}` the coefficients of :math:`x`. The parameter :math:`\alpha = 1/\sqrt{(2\pi)^p\lvert\hat\Sigma_\mathbf{W}}\rvert` does not need to be calculated as it will get canceled out when applying Bayes' law. :class:`RLDAClassifier` provides the probability for each of the :math:`2^{n_b}` classes with :meth:`predict_proba`. Examples -------- >>> from scalib.modeling import RLDAClassifier >>> import numpy as np >>> traces_model = np.random.randint(0,256,(5000,10),dtype=np.int16) >>> labels_model = np.random.randint(0,256,(5000,1),dtype=np.uint64) >>> rlda = RLDAClassifier(8, 3) >>> rlda.fit_u(traces_model, labels_model) >>> rlda.solve() >>> traces_test = np.random.randint(0,256,(5000,10),dtype=np.int16) >>> prs = rlda.predict_proba(traces_test, 0) References ---------- .. footbibliography:: """ def __init__(self, nb: int, p: int): """ Parameters ---------- nb: Number of bits of the profiled variables. nv: Number of variables to profile p: Number of dimensions in the linear subspace. """ self._ns = None self._nv = None self._p = p self._nb = nb self._init = False self._solved = False
[docs] def fit_u( self, traces: npt.NDArray[np.int16], x: npt.NDArray[np.uint64], gemm_mode=1 ): """Update statistical model estimates with additional data. This can be called multiple times, the state is accumulated. Parameters ---------- traces : array_like, int16 Array that contains the traces. Shape ``(n,ns)``. x : array_like, uint64 Labels for each trace. Shape ``(n,nv)``. """ traces = scalib.utils.clean_traces(traces, self._ns) x = scalib.utils.clean_labels(x, self._nv, exp_type=np.uint64) if not self._init: self._init = True self._ns = traces.shape[1] self._nv = x.shape[1] self._inner = _scalib_ext.RLDA(self._nb, self._ns, self._nv, self._p) self._inner.update(traces, x.T, gemm_mode, get_config())
[docs] def solve(self): """Solve the RLDA equations. Notes ----- Once this has been called, predictions can be performed. """ self._inner.solve(get_config()) self._solved = True
[docs] def get_proj(self) -> npt.NDArray[np.float64]: """Returns the projection matrix. Returns ------- array_like, float64 Shape ``(nv,p,ns)``.""" return self._inner.get_norm_proj()
[docs] def get_proj_coefs( self, ) -> npt.NDArray[np.float64]: """The projected regression coefficients. Returns ------- array_like, float64 Shape ``(nv,p,nb+1)``. """ return self._inner.get_proj_coefs()
[docs] def predict_proba( self, traces: npt.NDArray[np.int16], var: int ) -> npt.NDArray[np.float64]: r"""Computes the probability for each of the classes for the requested variables. Parameters ---------- traces: Array that contains the traces. Shape ``(n,ns)``. var: Id (position in the ``x`` array) of the variable for which the probabilities are computed. Returns ------- array_like, f64 Probabilities. Shape ``(n, nc)``. """ assert self._solved, "Model not solved" return self._inner.predict_proba(traces, var, get_config())
[docs] class ClusteredModel: """Clustered RLDA model, see :func:`RLDAClassifier.get_clustered_model`.""" pass
[docs] def get_clustered_model( self, var: int, t: float, max_clusters: int = 10_000_000, store_associated_classes: bool = True, ) -> ClusteredModel: """Generate a simplified model for faster estimation of the information content in this model. This generates a model with clustered means that can be used to estimate the percevied or training information of the model. It applies a clustering method on the classes to regroup the closest ones up to a threshold distance :math:`t`. Internally, it uses a Kd-tree data structure to find the nearest cluster efficiently. Details on the clustering algorithm can be found in [1]. The resulting model can be used with :class:`scalib.metrics.RLDAInformationEstimator` (see there for usage example). Parameters ---------- var: Id (position in the ``x`` array) of the variable for which the probabilities are computed. t: Maximum distance between 2 cluster centers. This is a trade-off parameter between the tightness of the information bounds (lower value of t) and computation (time and memory) efficiency (higher value of t). max_clusters: The maximum number of clusters that can be generated. If during generation, this limit is exceeded, an exception is raised. store_associated_classes : bool If True, the generated model stores the classes associated to each cluster. This allows refining the information bounds by calculating using the exact class mean (and not the centroid it is associated to) for clusters that contribute the most to an untight bound. Note that this option requires significantly more RAM for high values of :math:`n_b`. Returns ------- ClusteredModel A clustered model to be used in :class:`scalib.metrics.RLDAInformationEstimator` """ res = self.ClusteredModel() res._inner = self._inner.get_clustered_model( var, store_associated_classes, t, max_clusters ) return res