Clustering
C++20 header-only: DBSCAN, HDBSCAN, k-means.
Loading...
Searching...
No Matches
reduce.h
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <cassert>
5#include <cstddef>
6#include <span>
7#include <utility>
8#include <vector>
9
10#include "clustering/ndarray.h"
11
12namespace clustering::math {
13
25template <class T, Layout L> inline T sum(const NDArray<T, 1, L> &x) noexcept {
26 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
27 "sum<T> requires T to be float or double");
28 const std::size_t n = x.dim(0);
29 T s = T{0};
30 for (std::size_t i = 0; i < n; ++i) {
31 s += x(i);
32 }
33 return s;
34}
35
47template <class T, Layout L> inline std::size_t argmin(const NDArray<T, 1, L> &x) noexcept {
48 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
49 "argmin<T> requires T to be float or double");
50 const std::size_t n = x.dim(0);
51 assert(n > 0 && "argmin requires non-empty input");
52 std::size_t bestIdx = 0;
53 T bestVal = x(0);
54 for (std::size_t i = 1; i < n; ++i) {
55 const T v = x(i);
56 if (v < bestVal) {
57 bestVal = v;
58 bestIdx = i;
59 }
60 }
61 return bestIdx;
62}
63
74template <class T, Layout L> inline std::size_t argmax(const NDArray<T, 1, L> &x) noexcept {
75 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
76 "argmax<T> requires T to be float or double");
77 const std::size_t n = x.dim(0);
78 assert(n > 0 && "argmax requires non-empty input");
79 std::size_t bestIdx = 0;
80 T bestVal = x(0);
81 for (std::size_t i = 1; i < n; ++i) {
82 const T v = x(i);
83 if (v > bestVal) {
84 bestVal = v;
85 bestIdx = i;
86 }
87 }
88 return bestIdx;
89}
90
106template <class T, Layout L>
107inline void topk(const NDArray<T, 1, L> &x, std::size_t k, std::span<std::size_t> outIdx) noexcept {
108 static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
109 "topk<T> requires T to be float or double");
110 assert(outIdx.size() == k && "topk requires outIdx.size() == k");
111 if (k == 0) {
112 return;
113 }
114 const std::size_t n = x.dim(0);
115 assert(k <= n && "topk requires k <= x.dim(0)");
116
117 std::vector<std::pair<T, std::size_t>> staged;
118 staged.reserve(n);
119 for (std::size_t i = 0; i < n; ++i) {
120 staged.emplace_back(x(i), i);
121 }
122
123 std::vector<std::pair<T, std::size_t>> top(k);
124 const auto cmp = [](const std::pair<T, std::size_t> &a,
125 const std::pair<T, std::size_t> &b) noexcept {
126 // Primary: larger value first. Secondary on ties: smaller index first so the output is
127 // deterministic when duplicate values collide.
128 if (a.first != b.first) {
129 return a.first > b.first;
130 }
131 return a.second < b.second;
132 };
133 std::partial_sort_copy(staged.begin(), staged.end(), top.begin(), top.end(), cmp);
134
135 for (std::size_t i = 0; i < k; ++i) {
136 outIdx[i] = top[i].second;
137 }
138}
139
140} // namespace clustering::math
Represents a multidimensional array (NDArray) of a fixed number of dimensions N and element type T.
Definition ndarray.h:136
std::size_t argmin(const NDArray< T, 1, L > &x) noexcept
Index of the first minimum in a rank-1 array.
Definition reduce.h:47
void topk(const NDArray< T, 1, L > &x, std::size_t k, std::span< std::size_t > outIdx) noexcept
Indices of the top-k largest values, written in descending value order.
Definition reduce.h:107
T sum(const NDArray< T, 1, L > &x) noexcept
Naive single-pass sum of a rank-1 array.
Definition reduce.h:25
std::size_t argmax(const NDArray< T, 1, L > &x) noexcept
Index of the first maximum in a rank-1 array.
Definition reduce.h:74