3#include <BS_thread_pool.hpp>
38template <
class T,
class Algo = kmeans::LloydFusedGemm<T>,
class Seeder = kmeans::AutoSeeder<T>>
41 static_assert(std::is_same_v<T, float>,
42 "KMeans<T> supports only float; add a double specialization to extend.");
53 explicit KMeans(std::size_t k, std::size_t nJobs = 0)
54 : m_k(k), m_nJobs(
math::clampedJobCount(nJobs)), m_centroids({0, 0}), m_labels({0}) {
83 std::uint64_t seed = 0) {
84 const std::size_t n = X.
dim(0);
85 const std::size_t d = X.
dim(1);
90 ensureOutputShape(n, d);
92 if (n == 0 || d == 0) {
100 m_pool.emplace(m_nJobs);
102 const math::Pool pool{m_pool.has_value() ? &*m_pool :
nullptr};
103 m_seeder.run(X, m_k, seed, pool, m_centroids);
104 m_lloyd.run(X, m_centroids, m_k, maxIter, tol, pool, m_labels, m_inertia, m_nIter, m_converged);
114 [[nodiscard]]
double inertia() const noexcept {
return m_inertia; }
116 [[nodiscard]] std::size_t
nIter() const noexcept {
return m_nIter; }
118 [[nodiscard]]
bool converged() const noexcept {
return m_converged; }
132 void ensureOutputShape(std::size_t n, std::size_t d) {
133 if (m_centroids.
dim(0) != m_k || m_centroids.
dim(1) != d) {
136 if (m_labels.dim(0) != n) {
137 m_labels = NDArray<std::int32_t, 1>({n});
143 std::optional<BS::light_thread_pool> m_pool;
144 NDArray<T, 2, Layout::Contig> m_centroids;
145 NDArray<std::int32_t, 1> m_labels;
146 double m_inertia = 0.0;
147 std::size_t m_nIter = 0;
148 bool m_converged =
false;
#define CLUSTERING_ALWAYS_ASSERT(cond)
Release-active assertion: evaluates cond in every build configuration.
double inertia() const noexcept
Final inertia: Kahan-summed f64 total of per-point squared distance to assignment.
KMeans & operator=(KMeans &&)=delete
void run(const NDArray< T, 2 > &X, std::size_t maxIter=300, T tol=T{1e-4}, std::uint64_t seed=0)
Fit to X.
std::size_t nIter() const noexcept
Iterations executed before tol or maxIter fired.
bool converged() const noexcept
True iff the last run stopped because centroid shift fell at or below tol.
void reset()
Release every scratch buffer. The next run call reallocates against its shape.
KMeans & operator=(const KMeans &)=delete
const NDArray< T, 2, Layout::Contig > & centroids() const noexcept
k x d fitted centroids.
KMeans(const KMeans &)=delete
KMeans(std::size_t k, std::size_t nJobs=0)
Construct a reusable k-means fitter.
const NDArray< std::int32_t, 1 > & labels() const noexcept
Length-n assignment; each entry is in [0, k).
Represents a multidimensional array (NDArray) of a fixed number of dimensions N and element type T.
size_t dim(std::size_t index) const noexcept
Returns the size of a specific dimension of the NDArray.
Contract for the Lloyd driver that KMeans<T> delegates to.
Contract for the seeder that produces initial centroids for the Lloyd driver.
bool shouldSpawnPool(std::size_t totalOps, std::size_t nJobs, std::size_t minOpsPerWorker=std::size_t{1}<< 15) noexcept
Decide whether spawning a pool with nJobs workers is worth it for totalOps of arithmetic work.