3#include <BS_thread_pool.hpp>
36template <
class T,
class QueryModel = index::AutoRangeIndex<T>>
41 static constexpr std::int32_t
NOISY = -1;
52 explicit DBSCAN(T eps, std::size_t minPts, std::size_t nJobs = 0)
53 : m_eps(eps), m_minPts(minPts), m_nJobs(
math::clampedJobCount(nJobs)), m_labels({0}) {
79 const std::size_t n = X.
dim(0);
95 m_pool.emplace(m_nJobs);
97 const math::Pool pool{m_pool.has_value() ? &*m_pool :
nullptr};
99 QueryModel queryModel(X);
100 const auto adj = queryModel.query(m_eps, pool);
102 std::vector<std::uint8_t> isCore(n, 0);
103 for (std::size_t i = 0; i < n; ++i) {
104 isCore[i] = (adj[i].size() >= m_minPts) ? 1U : 0U;
107 std::int32_t *labelPtr = m_labels.data();
110 for (std::size_t i = 0; i < n; ++i) {
112 expandCluster(i, isCore, adj, labelPtr);
124 [[nodiscard]] std::size_t
nClusters() const noexcept {
return m_clusterId; }
133 void ensureLabelsShape(std::size_t n) {
134 if (m_labels.
dim(0) != n) {
150 void expandCluster(std::size_t seed,
const std::vector<std::uint8_t> &isCore,
151 const std::vector<std::vector<std::int32_t>> &adj, std::int32_t *
labels) {
152 const auto clusterLabel =
static_cast<std::int32_t
>(m_clusterId);
153 labels[seed] = clusterLabel;
154 std::vector<std::size_t> frontier;
155 frontier.reserve(adj[seed].size());
156 for (
const std::int32_t n : adj[seed]) {
157 frontier.push_back(
static_cast<std::size_t
>(n));
160 while (!frontier.empty()) {
161 std::vector<std::size_t> next;
162 next.reserve(frontier.size());
163 for (
const std::size_t point : frontier) {
167 labels[point] = clusterLabel;
168 if (isCore[point] == 0) {
171 for (
const std::int32_t n : adj[point]) {
173 next.push_back(
static_cast<std::size_t
>(n));
182 std::size_t m_minPts;
184 std::size_t m_clusterId = 0;
188 std::optional<BS::light_thread_pool> m_pool;
192 NDArray<std::int32_t, 1> m_labels;
#define CLUSTERING_ALWAYS_ASSERT(cond)
Release-active assertion: evaluates cond in every build configuration.
void run(const NDArray< T, 2 > &X)
Fit to X.
std::size_t nClusters() const noexcept
Total number of clusters discovered by the most recent run.
DBSCAN(T eps, std::size_t minPts, std::size_t nJobs=0)
Construct a reusable DBSCAN fitter.
DBSCAN(const DBSCAN &)=delete
const NDArray< std::int32_t, 1 > & labels() const noexcept
Per-point cluster labels after run; NOISY marks outliers.
DBSCAN & operator=(const DBSCAN &)=delete
static constexpr std::int32_t NOISY
Label assigned to points that no cluster claimed.
DBSCAN & operator=(DBSCAN &&)=delete
void reset()
Release every scratch buffer. The next run call reallocates against its shape.
static constexpr std::int32_t UNCLASSIFIED
Sentinel for a point not yet visited.
Represents a multidimensional array (NDArray) of a fixed number of dimensions N and element type T.
size_t dim(std::size_t index) const noexcept
Returns the size of a specific dimension of the NDArray.
Contract for spatial indexes that can surface the radius-neighborhood adjacency over a borrowed point...
bool shouldSpawnPool(std::size_t totalOps, std::size_t nJobs, std::size_t minOpsPerWorker=std::size_t{1}<< 15) noexcept
Decide whether spawning a pool with nJobs workers is worth it for totalOps of arithmetic work.
Thin injection wrapper around a BS::light_thread_pool.