"""Statistical tests for randomness
This sub-package comprises of implementations of statistical tests laid out in a
comprehensive paper from NIST [1]_ in regards to assessing (pseudo-)random number
generators.
Notes
-----
A copy of the NIST paper can be found at the root of the ``coinflip`` repository as
`SP800-22.pdf <https://github.com/Honno/coinflip/blob/master/SP800-22.pdf>`_.
The test themselves are defined in section 2., "Random Number Generation Tests",
p. 23-62. Further detail of the tests are provided in section 3. "Technical
Descriptions of Tests", p. 63-87.
These tests were implemented in a complimentary program ``sts``, which can be
downloaded from the `NIST website
<https://csrc.nist.gov/projects/random-bit-generation/documentation-and-software>`_.
Note that the paper assumes a great amount of familiarity with certain
concepts in statistics. It also uses some constants and algorithms without any
explanations. Part of the purpose for ``coinflip`` is to "describe" the NIST tests
more wholly than in the paper itself, whilst also reducing the noise of some
non-idiomatic programming conventions used in ``sts``.
.. [1] National Institute of Standards and Technology <Andrew Rukhin, Juan Soto,
James Nechvatal, Miles Smid, Elaine Barker, Stefan Leigh, Mark Levenson, Mark
Vangel, David Banks, Alan Heckert, James Dray, San Vo,
Lawrence E Bassham II>, "A Statistical Test Suite for Random and Pseudorandom
Number Generators for Cryptographic Applications", *Special Publication
800-22 Revision 1a*, April 2010.
"""
from typing import Optional
from typing import Tuple
from coinflip import _randtests
from coinflip._randtests.common import exceptions
__all__ = [
"monobit",
"frequency_within_block",
"runs",
"longest_runs",
"binary_matrix_rank",
"spectral",
"non_overlapping_template_matching",
"overlapping_template_matching",
"maurers_universal",
"linear_complexity",
"serial",
"approximate_entropy",
"cusum",
"random_excursions",
"random_excursions_variant",
"exceptions",
]
[docs]def monobit(sequence):
"""Proportion of values is compared to expected 1:1 ratio
The difference between the occurrences of the two values in the sequence is
found, and referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
result : ``MonobitTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.monobit(sequence)
[docs]def frequency_within_block(sequence, blocksize: Optional[int] = None):
"""Proportion of values per block is compared to expected 1:1 ratio
The sequence is split into blocks, and the difference between the occurrences
of the two values in the sequence is found. This is referenced to a
hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
blocksize : ``int``
Size of the blocks that partition the given sequence
Returns
-------
result : ``FrequencyWithinBlockTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.frequency_within_block(sequence, blocksize=blocksize)
[docs]def runs(sequence):
"""Number of runs is compared to expected result
The number of runs (uninterrupted sequence of the same value) is found, and
referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
result : ``RunsTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.runs(sequence)
[docs]def longest_runs(sequence):
"""Longest runs per block is compared to expected result
The sequence is split into blocks, where the longest number of runs
(uninterrupted sequence of the same value) in each block is found. This is
referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
result : ``LongestRunsTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.longest_runs(sequence)
[docs]def binary_matrix_rank(sequence, matrix_dimen: Optional[Tuple[int, int]] = None):
"""Independence of neighbouring sequences is compared to expected result
The sequence is split into matrices, where the rank of each matrix is found
to determine independence. The counts of different rank bins is referenced
to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
matrix_dimen : ``Tuple[int, int]``
Number of rows and columns in each matrix
Returns
-------
result : ``BinaryMatrixRankTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.binary_matrix_rank(sequence, matrix_dimen=matrix_dimen)
[docs]def spectral(sequence):
"""Potency of periodic features in sequence is compared to expected result
The sequence is treated as a signal, which is applied a Fourier transform so
as to find constituent periodic features. The strength of these features is
referenced to the expected periodic features present in a hypothetically
truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
result : ``SpectralTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
Raises
------
NonBinaryTruncatedSequenceError
When odd-lengthed sequence is truncated there is only one distinct value
present
"""
return _randtests.spectral(sequence)
[docs]def non_overlapping_template_matching(
sequence, template_size: Optional[int] = None, blocksize: Optional[int] = None
):
"""Matches to template per block is compared to expected result
The sequence is split into blocks, where the number of non-overlapping
pattern matches to the template in each block is found. This is referenced
to the expected mean and variance in matches of a hypothetically truly
random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
template_size : ``int``
Size of all the templates to be generated
blocksize : ``int``
Size of the blocks that partition the given sequence
Returns
-------
results : ``NonOverlappingTemplateMatchingMultiTestResult``
Dataclass that contains the statistics and p-values of all sub-tests, as
well as other relevant information gathered.
"""
return _randtests.non_overlapping_template_matching(
sequence,
template_size=template_size,
blocksize=blocksize,
)
[docs]def overlapping_template_matching(
sequence,
template_size: Optional[int] = None,
blocksize: Optional[int] = None,
matches_ceil: Optional[int] = None,
):
"""Overlapping matches to template per block is compared to expected result
The sequence is split into blocks, where the number of overlapping patterns
matches to the template in each block is found. This is referenced to the
expected mean and variance in matches of a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
template_size : ``int``
Size of the template to be generated
blocksize : ``int``
Size of the blocks that partition the given sequence
matches_ceil : ``int``
Group matches of this value and higher as one single tally
Returns
-------
result : ``OverlappingTemplateMatchingTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.overlapping_template_matching(
sequence,
template_size=template_size,
blocksize=blocksize,
matches_ceil=matches_ceil,
)
[docs]def maurers_universal(
sequence, blocksize: Optional[int] = None, init_nblocks: Optional[int] = None
):
"""Distance between patterns is compared to expected result
The distinct patterns in an initial sequence are identified, and the
distances between subsequent occurences of these patterns in a remaining
sequence are accumulated. The normalised value for the accumulated distances
is referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
blocksize : ``int``
Size of the patterns
init_nblocks : ``int``
Number of initial blocks to identify patterns
Returns
-------
result : ``UniversalTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.maurers_universal(
sequence,
blocksize=blocksize,
init_nblocks=init_nblocks,
)
[docs]def linear_complexity(sequence, blocksize: Optional[int] = None):
"""LSFRs of blocks is compared to expected length
The sequence is split into blocks, where the shortest linear feedback shift
register is found for each block. The difference of each LSFR's length to the
expected mean length is binned, and is referenced to a hypothetically truly
random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
blocksize : ``int``
Size of the blocks
Returns
-------
results : ``LinearComplexityTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.linear_complexity(sequence, blocksize=blocksize)
[docs]def serial(sequence, blocksize: Optional[int] = None):
"""Proportion of all overlapping patterns is compared to expected uniformity
The number of overlapping pattern matches for each distinct pattern is
found. This is referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
blocksize : ``int``
Size of the patterns
Returns
-------
results : ``SerialMultiTestResult``
Dataclass that contains the statistics and p-values of all sub-tests, as
well as other relevant information gathered.
"""
return _randtests.serial(sequence, blocksize=blocksize)
[docs]def approximate_entropy(sequence, blocksize: Optional[int] = None):
"""Approximate entropy of sequence is compared to expected result
The approximate entropy of the sequence is found and referenced to a truly
random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
blocksize : ``int``
Size of the patterns
Returns
-------
results : ``ApproximateEntropyTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.approximate_entropy(sequence, blocksize=blocksize)
[docs]def cusum(sequence, reverse: bool = False):
"""Furthest detour in a randomn walk is compared to expected result
The sequence is treated as a random walk, where the furthest detour from the
axis is identified and referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
reverse : ``bool``
Cumulate sums from the end of the sequence first.
Returns
-------
results : ``CusumTestResult``
Dataclass that contains the test's statistic and p-value as well as
other relevant information gathered.
"""
return _randtests.cusum(sequence, reverse=reverse)
[docs]def random_excursions(sequence):
"""Frequency of states per cycle in a random walk is compared to expected results
The sequence is treated as a random walk, where the frequency of states -4
to 4 in each cycle is found. This is referenced to a hypothetically truly
random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
results : ``RandomExcursionsMultiTestResult``
Dataclass that contains the statistics and p-values of all sub-tests, as
well as other relevant information gathered.
"""
return _randtests.random_excursions(sequence)
[docs]def random_excursions_variant(sequence):
"""Proportion of states in a random walk is compared to expected results
The sequence is treated as a random walk, where the occurrences of states -9
to 9 is founded and referenced to a hypothetically truly random sequence.
Parameters
----------
sequence : array-like with two distinct values
Sequence containing 2 distinct elements
Returns
-------
results : ``RandomExcursionsVariantMultiTestResult``
Dataclass that contains the statistics and p-values of all sub-tests, as
well as other relevant information gathered.
"""
return _randtests.random_excursions_variant(sequence)