import numpy as np
import numpy.typing as npt
from scalib import _scalib_ext
from scalib.config import get_config
import scalib.utils
[docs]class RLDAClassifier:
r"""Regression-based Linear Discriminant Analysis.
Models the leakage using a regression-based linear discriminant analysis
(RLDA) classifier :footcite:p:`RLDA`, which can efficiently handle long
traces and large number of classes.
In a nutshell, this model performs LDA with the class means modelled as
linear regression based on the :math:`n_b` bits of the class value.
Compared to the :class:`scalib.modeling.LDAClassifier`, this model will
perform better when the number of classes is large and/or there are few
profiling traces.
Internally, it first estimates the coefficients of the linear regression,
then computes a projection matrix that reduces the dimensionality of the
gaussian template to :math:`p` dimensions and makes the covariance matrix
the identity.
It is then able to predict the leakage likelihood
.. math::
\hat{\mathsf{f}}[\mathbf{l}|X=x] =
\alpha
\exp\left(
-\frac{1}{2} \lVert\mathbf{W}^T\mathbf{l} - \mathbf{A}\mathbf\beta(x)\rVert^2
\right).
Where :math:`\mathbf{W}` is the projection matrix, :math:`\mathbf{A}` the projected
regression coefficients, and :math:`\mathbf{\beta(x)}` the coefficients of :math:`x`.
The parameter :math:`\alpha = 1/\sqrt{(2\pi)^p\lvert\hat\Sigma_\mathbf{W}}\rvert` does
not need to be calculated as it will get canceled out when applying Bayes' law.
:class:`RLDAClassifier` provides the probability for each of the :math:`2^{n_b}`
classes with :meth:`predict_proba`.
Examples
--------
>>> from scalib.modeling import RLDAClassifier
>>> import numpy as np
>>> traces_model = np.random.randint(0,256,(5000,10),dtype=np.int16)
>>> labels_model = np.random.randint(0,256,(5000,1),dtype=np.uint64)
>>> rlda = RLDAClassifier(8, 3)
>>> rlda.fit_u(traces_model, labels_model)
>>> rlda.solve()
>>> traces_test = np.random.randint(0,256,(5000,10),dtype=np.int16)
>>> prs = rlda.predict_proba(traces_test, 0)
References
----------
.. footbibliography::
"""
def __init__(self, nb: int, p: int):
"""
Parameters
----------
nb:
Number of bits of the profiled variables.
nv:
Number of variables to profile
p:
Number of dimensions in the linear subspace.
"""
self._ns = None
self._nv = None
self._p = p
self._nb = nb
self._init = False
self._solved = False
[docs] def fit_u(
self, traces: npt.NDArray[np.int16], x: npt.NDArray[np.uint64], gemm_mode=1
):
"""Update statistical model estimates with additional data.
This can be called multiple times, the state is accumulated.
Parameters
----------
traces : array_like, int16
Array that contains the traces. Shape ``(n,ns)``.
x : array_like, uint64
Labels for each trace. Shape ``(n,nv)``.
"""
traces = scalib.utils.clean_traces(traces, self._ns)
x = scalib.utils.clean_labels(x, self._nv, exp_type=np.uint64)
if not self._init:
self._init = True
self._ns = traces.shape[1]
self._nv = x.shape[1]
self._inner = _scalib_ext.RLDA(self._nb, self._ns, self._nv, self._p)
self._inner.update(traces, x.T, gemm_mode, get_config())
[docs] def solve(self):
"""Solve the RLDA equations.
Notes
-----
Once this has been called, predictions can be performed.
"""
self._inner.solve(get_config())
self._solved = True
[docs] def get_proj(self) -> npt.NDArray[np.float64]:
"""Returns the projection matrix.
Returns
-------
array_like, float64
Shape ``(nv,p,ns)``."""
return self._inner.get_norm_proj()
[docs] def get_proj_coefs(
self,
) -> npt.NDArray[np.float64]:
"""The projected regression coefficients.
Returns
-------
array_like, float64
Shape ``(nv,p,nb+1)``.
"""
return self._inner.get_proj_coefs()
[docs] def predict_proba(
self, traces: npt.NDArray[np.int16], var: int
) -> npt.NDArray[np.float64]:
r"""Computes the probability for each of the classes for the requested variables.
Parameters
----------
traces:
Array that contains the traces. Shape ``(n,ns)``.
var:
Id (position in the ``x`` array) of the variable for which the
probabilities are computed.
Returns
-------
array_like, f64
Probabilities. Shape ``(n, nc)``.
"""
assert self._solved, "Model not solved"
return self._inner.predict_proba(traces, var, get_config())
[docs] class ClusteredModel:
"""Clustered RLDA model, see :func:`RLDAClassifier.get_clustered_model`."""
pass
[docs] def get_clustered_model(
self,
var: int,
t: float,
max_clusters: int = 10_000_000,
store_associated_classes: bool = True,
) -> ClusteredModel:
"""Generate a simplified model for faster estimation of the information content in this model.
This generates a model with clustered means that can be used to
estimate the percevied or training information of the model. It applies
a clustering method on the classes to regroup the closest ones up to a
threshold distance :math:`t`.
Internally, it uses a Kd-tree data structure to find the nearest cluster efficiently.
Details on the clustering algorithm can be found in [1].
The resulting model can be used with
:class:`scalib.metrics.RLDAInformationEstimator` (see there for usage
example).
Parameters
----------
var:
Id (position in the ``x`` array) of the variable for which the
probabilities are computed.
t:
Maximum distance between 2 cluster centers. This is a trade-off parameter between
the tightness of the information bounds (lower value of t) and
computation (time and memory) efficiency (higher value of t).
max_clusters:
The maximum number of clusters that can be generated. If during generation, this
limit is exceeded, an exception is raised.
store_associated_classes : bool
If True, the generated model stores the classes associated to each cluster. This
allows refining the information bounds by calculating using the exact class mean (and not the
centroid it is associated to) for clusters that contribute the most to an untight bound.
Note that this option requires significantly more RAM for high values of :math:`n_b`.
Returns
-------
ClusteredModel
A clustered model to be used in :class:`scalib.metrics.RLDAInformationEstimator`
"""
res = self.ClusteredModel()
res._inner = self._inner.get_clustered_model(
var, store_associated_classes, t, max_clusters
)
return res