Source code for coinflip.randtests.matrix

from dataclasses import astuple
from dataclasses import dataclass
from math import exp
from math import floor
from math import sqrt
from typing import Iterable
from typing import Tuple

from coinflip.randtests._decorators import elected
from coinflip.randtests._decorators import randtest
from coinflip.randtests._result import TestResult
from coinflip.randtests._result import make_testvars_table
from coinflip.randtests._result import vars_list
from coinflip.randtests._testutils import blocks
from coinflip.randtests._testutils import check_recommendations
from coinflip.randtests._testutils import rawblocks

__all__ = ["binary_matrix_rank", "gf2_rank"]


@dataclass
class RankCounts:
    full: int = 0
    runnerup: int = 0
    remaining: int = 0


[docs]@randtest(min_input=4, rec_input=152) # nblocks=38, blocksize=4 @elected def binary_matrix_rank(series, candidate, matrix_dimen: Tuple[int, int] = None): """Independence of neighbouring sequences is compared to expected result Independence is determined by the matrix rank of a subsequence, where it is split into multiple rows to form a matrix. The counts of different rank bins is referenced to a hypothetically truly random RNG. Parameters ---------- sequence : array-like Output of the RNG being tested candidate : Value present in given sequence The value which is counted in each block matrix_dimen : `Tuple[int, int]` Number of rows and columns in each matrix Returns ------- BinaryMatrixRankTestResult Dataclass that contains the test's statistic and p-value """ n = len(series) if matrix_dimen is None: if n // (32 * 32) > 38: nrows = 32 ncols = 32 else: blocksize = max(n // 38, 4) nrows = floor(sqrt(blocksize)) ncols = nrows else: nrows, ncols = matrix_dimen blocksize = nrows * ncols nblocks = n // blocksize check_recommendations({"n ≥ 38 * blocksize": n >= 38 * blocksize}) fullrank = min(nrows, ncols) # TODO find expressive and performative calculation for constants expected_rankcounts = RankCounts( full=0.2888 * nblocks, runnerup=0.5776 * nblocks, remaining=0.1336 * nblocks, ) noncandidate = next(value for value in series.unique() if value != candidate) rankable_series = series.map({candidate: 1, noncandidate: 0}) matrices = [] for block in blocks(rankable_series, blocksize=blocksize): matrix = [row for row in rawblocks(block, nblocks=nrows)] matrices.append(matrix) ranks = [gf2_rank(matrix) for matrix in matrices] rankcounts = RankCounts() for rank in ranks: if rank == fullrank: rankcounts.full += 1 elif rank == fullrank - 1: rankcounts.runnerup += 1 else: rankcounts.remaining += 1 reality_check = [] for count_expect, count in zip(astuple(expected_rankcounts), astuple(rankcounts)): diff = (count - count_expect) ** 2 / count_expect reality_check.append(diff) statistic = sum(reality_check) p = exp(-statistic / 2) return BinaryMatrixRankTestResult( statistic, p, nrows, ncols, fullrank, expected_rankcounts, rankcounts, )
@dataclass class BinaryMatrixRankTestResult(TestResult): nrows: int ncols: int fullrank: int expected_rankcounts: RankCounts rankcounts: RankCounts def __post_init__(self): expected_counts = astuple(self.expected_rankcounts) counts = astuple(self.rankcounts) self.rankcount_diffs = [] for expect, actual in zip(expected_counts, counts): diff = actual - expect self.rankcount_diffs.append(diff) def __rich_console__(self, console, options): yield self._results_text("chi-square") yield "" yield vars_list( ("nrows", self.nrows), ("ncols", self.ncols), ) runnerup = self.fullrank - 1 remaining = runnerup - 1 f_ranks = [ str(self.fullrank), str(runnerup), "0" if remaining == 0 else f"0-{remaining}", ] table = zip( f_ranks, astuple(self.rankcounts), astuple(self.expected_rankcounts), self.rankcount_diffs, ) f_table = make_testvars_table("rank", "count", "expected", "diff") for f_rank, count, count_expect, diff in table: f_count = str(count) f_count_expect = str(round(count_expect, 1)) f_diff = str(round(diff, 1)) f_table.add_row(f_rank, f_count, f_count_expect, f_diff) yield f_table def bits2int(bits: Iterable[int]) -> int: """Converts a list of bits into a numerical representation""" num = 0 for bit in bits: num = (num << 1) | bit return num
[docs]def gf2_rank(matrix: Iterable[Iterable[int]]) -> int: """Finds the rank of a binary matrix Parameters ---------- matrix : `List[Tuple[int, ...]]` Binary matrix to rank Returns ------- rank : int Rank of `matrix` Notes ----- Implementaton inpisred by a `StackOverflow answer <https://stackoverflow.com/a/56858995/5193926>`_ from Mark Dickinson. """ numbers = [bits2int(bits) for bits in matrix] rank = 0 while len(numbers) > 0: pivot = numbers.pop() if pivot: rank += 1 lsb = pivot & -pivot for i, num in enumerate(numbers): if lsb & num: numbers[i] = num ^ pivot return rank