clustering/rng_8h_source.html

#pragma once


#include <algorithm>

#include <array>

#include <cassert>

#include <cmath>

#include <cstddef>

#include <cstdint>

#include <span>

#include <type_traits>

#include <utility>

#include <vector>


#include "clustering/ndarray.h"


#ifndef __SIZEOF_INT128__

#error "clustering::math::rng requires a compiler with __uint128_t support (GCC/Clang)."

#endif


namespace clustering::math {


struct pcg64 {

  __uint128_t m_state = 0;

  __uint128_t m_inc = 0;


  void seed(std::uint64_t seedValue, std::uint64_t stream = 0) noexcept {

    static constexpr __uint128_t kMultHi =

        (static_cast<__uint128_t>(2549297995355413924ULL) << 64) | 4865540595714422341ULL;

    m_state = 0;

    m_inc = (static_cast<__uint128_t>(stream) << 1U) | 1U;

    m_state = (m_state * kMultHi) + m_inc;

    m_state += seedValue;

    m_state = (m_state * kMultHi) + m_inc;

  }


};


inline std::uint64_t advanceState(pcg64 &rng) noexcept {

  static constexpr __uint128_t kMult =

      (static_cast<__uint128_t>(2549297995355413924ULL) << 64) | 4865540595714422341ULL;

  const __uint128_t old = rng.m_state;

  rng.m_state = (old * kMult) + rng.m_inc;

  const auto rot = static_cast<std::uint64_t>(old >> 122);

  const auto xorshifted = static_cast<std::uint64_t>(old ^ (old >> 64));

  return (xorshifted >> rot) | (xorshifted << ((-rot) & 63U));

}


struct xoshiro256ss {

  std::array<std::uint64_t, 4> m_s{0, 0, 0, 0};


  void seed(std::uint64_t seedValue) noexcept {

    std::uint64_t z = seedValue;

    for (auto &word : m_s) {

      z += 0x9E3779B97F4A7C15ULL;

      std::uint64_t x = z;

      x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9ULL;

      x = (x ^ (x >> 27)) * 0x94D049BB133111EBULL;

      x = x ^ (x >> 31);

      word = x;

    }

  }


};


inline std::uint64_t advanceState(xoshiro256ss &rng) noexcept {

  const auto rotl = [](std::uint64_t x, int k) -> std::uint64_t {

    return (x << k) | (x >> (64 - k));

  };

  const std::uint64_t result = rotl(rng.m_s[1] * 5U, 7) * 9U;

  const std::uint64_t t = rng.m_s[1] << 17U;

  rng.m_s[2] ^= rng.m_s[0];

  rng.m_s[3] ^= rng.m_s[1];

  rng.m_s[1] ^= rng.m_s[2];

  rng.m_s[0] ^= rng.m_s[3];

  rng.m_s[2] ^= t;

  rng.m_s[3] = rotl(rng.m_s[3], 45);

  return result;

}


template <class Rng> inline std::uint32_t randUniformU32(Rng &rng) noexcept {

  return static_cast<std::uint32_t>(advanceState(rng) >> 32U);

}


template <class Rng> inline std::uint64_t randUniformU64(Rng &rng) noexcept {

  return advanceState(rng);

}


template <class T, class Rng> inline T randUnit(Rng &rng) noexcept {

  static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,

                "randUnit<T> requires T to be float or double");

  if constexpr (std::is_same_v<T, double>) {

    return static_cast<double>(advanceState(rng) >> 11U) * 0x1.0p-53;

  } else {

    return static_cast<float>(advanceState(rng) >> 40U) * 0x1.0p-24F;

  }

}


template <class T, Layout L, class Rng>


inline std::size_t weightedCategorical(const NDArray<T, 1, L> &weights, Rng &rng) noexcept {

  static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,

                "weightedCategorical<T> requires T to be float or double");

  const std::size_t n = weights.dim(0);

  assert(n > 0 && "weightedCategorical requires at least one weight");


  T total = T{0};

  for (std::size_t i = 0; i < n; ++i) {

    const T w = weights(i);

    assert(w >= T{0} && "weightedCategorical requires non-negative weights");

    total += w;

  }

  assert(total > T{0} && "weightedCategorical requires at least one positive weight");


  const T u = randUnit<T>(rng) * total;

  T cumulative = T{0};

  std::size_t lastPositive = 0;

  for (std::size_t i = 0; i < n; ++i) {

    const T w = weights(i);

    cumulative += w;

    if (cumulative > u) {

      return i;

    }

    if (w > T{0}) {

      lastPositive = i;

    }

  }

  // Guard against floating-point drift pushing the final cumulative just below u*total: fall

  // back to the last index that actually contributed mass so we never return a zero-weight slot.

  return lastPositive;

}


template <class T, Layout L, class Rng>


inline void aExpjReservoir(const NDArray<T, 1, L> &weights, std::size_t k, Rng &rng,

                           std::span<std::size_t> outIdx) noexcept {

  static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,

                "aExpjReservoir<T> requires T to be float or double");

  const std::size_t n = weights.dim(0);

  assert(outIdx.size() == k && "aExpjReservoir requires outIdx.size() == k");

  assert(k <= n && "aExpjReservoir requires k <= weights.dim(0)");

  if (k == 0) {

    return;

  }


  // Generate all keys, partial-sort by key descending, emit the top-k indices. O(n log n) in the

  // straight path; a size-k min-heap variant would trim this to O(n log k) when k << n.

  std::vector<std::pair<T, std::size_t>> keyed;

  keyed.reserve(n);

  for (std::size_t i = 0; i < n; ++i) {

    const T w = weights(i);

    assert(w > T{0} && "aExpjReservoir requires strictly positive weights");

    // randUnit draws from [0, 1); nudge zero away from the log singularity by resampling. In

    // double precision a single redraw is sufficient with probability 1 - 2^-53.

    T u = randUnit<T>(rng);

    while (u <= T{0}) {

      u = randUnit<T>(rng);

    }

    const T key = std::log(u) / w;

    keyed.emplace_back(key, i);

  }


  // Partial sort by key descending: the k largest keys bubble to the front.

  const auto cmp = [](const std::pair<T, std::size_t> &a,

                      const std::pair<T, std::size_t> &b) noexcept { return a.first > b.first; };

  std::partial_sort(keyed.begin(), keyed.begin() + static_cast<std::ptrdiff_t>(k), keyed.end(),

                    cmp);


  for (std::size_t i = 0; i < k; ++i) {

    outIdx[i] = keyed[i].second;

  }

}


} // namespace clustering::math

clustering::NDArray
Represents a multidimensional array (NDArray) of a fixed number of dimensions N and element type T.
Definition ndarray.h:136

clustering::math
Definition aabb.h:12

clustering::math::randUnit
T randUnit(Rng &rng) noexcept
Draw a uniform variate in the half-open unit interval [0, 1).
Definition rng.h:152

clustering::math::aExpjReservoir
void aExpjReservoir(const NDArray< T, 1, L > &weights, std::size_t k, Rng &rng, std::span< std::size_t > outIdx) noexcept
Efraimidis-Spirakis weighted reservoir sampling (A-Exp variant, log-key form).
Definition rng.h:233

clustering::math::advanceState
std::uint64_t advanceState(pcg64 &rng) noexcept
Advance a pcg64 one step and return the 64-bit XSL-RR output.
Definition rng.h:63

clustering::math::weightedCategorical
std::size_t weightedCategorical(const NDArray< T, 1, L > &weights, Rng &rng) noexcept
Sample one category index proportionally to non-negative weights.
Definition rng.h:179

clustering::math::randUniformU32
std::uint32_t randUniformU32(Rng &rng) noexcept
Draw a 32-bit unsigned integer uniformly at random from the full u32 range.
Definition rng.h:132

clustering::math::randUniformU64
std::uint64_t randUniformU64(Rng &rng) noexcept
Draw a 64-bit unsigned integer uniformly at random from the full u64 range.
Definition rng.h:139

ndarray.h

clustering::math::pcg64
128-bit state for the PCG-XSL-RR 64-bit output generator (Melissa O'Neill).
Definition rng.h:30

clustering::math::pcg64::m_inc
__uint128_t m_inc
Stream-encoded odd increment mixed into the LCG step.
Definition rng.h:34

clustering::math::pcg64::m_state
__uint128_t m_state
128-bit generator state; advanced by every advanceState call.
Definition rng.h:32

clustering::math::pcg64::seed
void seed(std::uint64_t seedValue, std::uint64_t stream=0) noexcept
Initialize the generator per PCG's canonical seeding procedure.
Definition rng.h:46

clustering::math::xoshiro256ss
256-bit state for Vigna & Blackman's xoshiro256** generator.
Definition rng.h:80

clustering::math::xoshiro256ss::seed
void seed(std::uint64_t seedValue) noexcept
Initialize the four state words via SplitMix64 diffusion of a single 64-bit seed.
Definition rng.h:92

clustering::math::xoshiro256ss::m_s
std::array< std::uint64_t, 4 > m_s
Four 64-bit state words; SplitMix64-diffused at seed time.
Definition rng.h:82