30template <
class T, AutoSeederMode Mode = AutoSeederMode::kSingleRun>
class AutoSeeder {
32#ifdef CLUSTERING_KMEANS_AFKMC2_N_THRESHOLD
39 static constexpr std::size_t
afkmc2NThreshold = CLUSTERING_KMEANS_AFKMC2_N_THRESHOLD;
47#ifdef CLUSTERING_KMEANS_AFKMC2_BEST_OF_N_THRESHOLD
54 CLUSTERING_KMEANS_AFKMC2_BEST_OF_N_THRESHOLD;
60#ifdef CLUSTERING_KMEANS_AFKMC2_BEST_OF_D_THRESHOLD
67 CLUSTERING_KMEANS_AFKMC2_BEST_OF_D_THRESHOLD;
73#ifdef CLUSTERING_KMEANS_AFKMC2_BEST_OF_K_FLOOR
79 static constexpr std::size_t
afkmc2BestOfKFloor = CLUSTERING_KMEANS_AFKMC2_BEST_OF_K_FLOOR;
85#ifdef CLUSTERING_KMEANS_AFKMC2_BEST_OF_WORK_THRESHOLD
92 CLUSTERING_KMEANS_AFKMC2_BEST_OF_WORK_THRESHOLD;
104 const std::size_t n = X.
dim(0);
105 const std::size_t d = X.
dim(1);
106 ensureShape(n, d, k);
107 std::visit([&](
auto &s) { s.run(X, k, seed, pool, outCentroids); }, m_held);
111 void ensureShape(std::size_t n, std::size_t d, std::size_t k) {
112 if (n == m_lastN && d == m_lastD && k == m_lastK) {
115 if (shouldUseAfkmc2(n, d, k)) {
116 if (!std::holds_alternative<AfkMc2Seeder<T>>(m_held)) {
117 m_held.template emplace<AfkMc2Seeder<T>>();
120 if (!std::holds_alternative<GreedyKmppSeeder<T>>(m_held)) {
121 m_held.template emplace<GreedyKmppSeeder<T>>();
129 [[nodiscard]]
static constexpr bool shouldUseAfkmc2(std::size_t n, std::size_t d,
130 std::size_t k)
noexcept {
136 return largeKEnvelope || bestOfEnvelope;
139 return largeKEnvelope;
143 std::variant<GreedyKmppSeeder<T>, AfkMc2Seeder<T>> m_held{
144 std::in_place_type<GreedyKmppSeeder<T>>};
145 std::size_t m_lastN = 0;
146 std::size_t m_lastD = 0;
147 std::size_t m_lastK = 0;
Represents a multidimensional array (NDArray) of a fixed number of dimensions N and element type T.
size_t dim(std::size_t index) const noexcept
Returns the size of a specific dimension of the NDArray.
static constexpr std::size_t chainLengthDefault
Default Markov-chain length per centroid pick.
static constexpr std::size_t kFloor
Minimum k below which the AFK-MC2 chain's log-k bound is too loose to win.
Seeder that picks between greedy k-means++ and AFK-MC2 against workload shape.
static constexpr std::size_t afkmc2NThreshold
n threshold above which AFK-MC2 is preferred over greedy k-means++.
void run(const NDArray< T, 2 > &X, std::size_t k, std::uint64_t seed, math::Pool pool, NDArray< T, 2 > &outCentroids)
Seed outCentroids with the dispatched seeder; see the class docs for the dispatch rule.
static constexpr std::size_t afkmc2KFloor
Minimum k at which AFK-MC2 is considered; mirrors AfkMc2Seeder::kFloor.
static constexpr std::size_t afkmc2BestOfKFloor
Minimum k for best-of restart AFK-MC2 dispatch.
static constexpr std::size_t afkmc2BestOfNThreshold
Best-of restart threshold on n above which AFK-MC2 is preferred.
static constexpr std::size_t afkmc2BestOfWorkThreshold
Minimum n * d work envelope for best-of restart AFK-MC2 dispatch.
static constexpr std::size_t afkmc2BestOfDThreshold
Best-of restart threshold on d above which AFK-MC2 is preferred.
static constexpr std::size_t afkmc2ChainLengthDefault
Default Markov-chain length passed through to AFK-MC2.
Thin injection wrapper around a BS::light_thread_pool.