105 const std::size_t n = X.
dim(0);
106 const std::size_t d = X.
dim(1);
118 out.
edges.reserve(n - 1);
121 const T *xData = X.
data();
122 const bool useDenseCore = shouldUseDenseCore(n, d, minSamples);
128 const bool rowsAligned32 =
129 X.template isAligned<32>() && (d % (std::size_t{32} /
sizeof(T)) == 0);
131 std::vector<T> rowNorms;
134 for (std::size_t i = 0; i < n; ++i) {
135 const T *row = xData + (i * d);
136 rowNorms[i] = rowsAligned32 ? math::detail::dotRowAligned32Ptr(row, row, d)
137 : math::detail::dotRowPtr(row, row, d);
139 computeDenseCoreDistances(X, rowNorms, minSamples, rowsAligned32, pool, coreDistData);
145 const auto kSigned =
static_cast<std::int32_t
>(minSamples);
146 auto [knnIdx, knnSqDist] = tree.
knnQuery(kSigned, pool);
148 for (std::size_t i = 0; i < n; ++i) {
149 coreDistData[i] = knnSqDist(i, minSamples - 1);
158 std::vector<std::uint8_t> visited(n, std::uint8_t{0});
159 std::vector<std::int32_t> parent(n, std::int32_t{0});
160 std::vector<T> edgeWeight(n, std::numeric_limits<T>::max());
162 auto sqDistance = [&](
const T *rowT, std::size_t tIdx, std::size_t v)
noexcept {
164 const T *rowV = xData + (v * d);
165 const T dot = rowsAligned32 ? math::detail::dotRowAligned32Ptr(rowT, rowV, d)
166 : math::detail::dotRowPtr(rowT, rowV, d);
167 return math::detail::sqEuclideanFromDot(rowNorms[tIdx], rowNorms[v], dot);
169 return math::detail::sqEuclideanRowPtr(rowT, xData + (v * d), d);
172 auto relaxRange = [&](std::size_t lo, std::size_t hi, std::int32_t target, std::size_t tIdx,
173 T coreT,
const T *rowT)
noexcept {
174 for (std::size_t v = lo; v < hi; ++v) {
175 if (visited[v] != 0U) {
178 const T sq = sqDistance(rowT, tIdx, v);
183 const T coreV = coreDistData[v];
187 if (w < edgeWeight[v]) {
194 auto relaxRangeAndFindNext = [&](std::size_t lo, std::size_t hi, std::int32_t target,
195 std::size_t tIdx, T coreT,
196 const T *rowT)
noexcept -> std::pair<std::int32_t, T> {
197 std::int32_t bestV = -1;
198 T bestW = std::numeric_limits<T>::max();
199 for (std::size_t v = lo; v < hi; ++v) {
200 if (visited[v] != 0U) {
203 const T sq = sqDistance(rowT, tIdx, v);
208 const T coreV = coreDistData[v];
212 if (w < edgeWeight[v]) {
216 if (edgeWeight[v] < bestW) {
217 bestW = edgeWeight[v];
218 bestV =
static_cast<std::int32_t
>(v);
221 return {bestV, bestW};
224 auto findNext = [&]()
noexcept -> std::pair<std::int32_t, T> {
225 std::int32_t bestV = -1;
226 T bestW = std::numeric_limits<T>::max();
227 for (std::size_t v = 0; v < n; ++v) {
228 if (visited[v] != 0U) {
231 if (edgeWeight[v] < bestW) {
232 bestW = edgeWeight[v];
233 bestV =
static_cast<std::int32_t
>(v);
236 return {bestV, bestW};
239 auto persistentRelaxFrom = [&]() ->
bool {
240 if (!shouldUsePersistentParallelRelax(n, d, useDenseCore, pool)) {
250 struct alignas(64) LocalBest {
251 std::atomic<std::uint32_t> done{0};
252 std::int32_t vertex = -1;
253 T weight = std::numeric_limits<T>::max();
256 const std::size_t workerTasks = pool.
workerCount() - 1;
257 const std::size_t participantCount = workerTasks + 1;
258 std::vector<LocalBest> localBest(participantCount);
260 auto blockBegin = [&](std::size_t id)
noexcept {
return (n *
id) / participantCount; };
261 auto blockEnd = [&](std::size_t id)
noexcept {
return (n * (
id + 1)) / participantCount; };
262 auto relaxBlock = [&](std::size_t id,
263 std::int32_t target)
noexcept -> std::pair<std::int32_t, T> {
264 const auto tIdx =
static_cast<std::size_t
>(target);
265 const T coreT = coreDistData[tIdx];
266 const T *
const rowT = xData + (tIdx * d);
267 std::int32_t bestV = -1;
268 T bestW = std::numeric_limits<T>::max();
269 for (std::size_t v = blockBegin(
id); v < blockEnd(
id); ++v) {
270 if (visited[v] != 0U) {
273 const T sq = sqDistance(rowT, tIdx, v);
278 const T coreV = coreDistData[v];
282 if (w < edgeWeight[v]) {
286 if (edgeWeight[v] < bestW) {
287 bestW = edgeWeight[v];
288 bestV =
static_cast<std::int32_t
>(v);
291 return {bestV, bestW};
294 std::atomic<std::uint32_t> phase{0};
295 std::atomic<std::uint32_t> ready{0};
296 std::atomic<bool> stop{
false};
297 std::int32_t currentTarget = 0;
299 auto workerLoop = [&](std::size_t id) {
300 std::uint32_t seen = phase.load(std::memory_order_acquire);
301 ready.fetch_add(1, std::memory_order_release);
303 std::uint32_t next = phase.load(std::memory_order_acquire);
304 while (next == seen) {
306 next = phase.load(std::memory_order_acquire);
309 if (stop.load(std::memory_order_acquire)) {
312 auto [bv, bw] = relaxBlock(
id, currentTarget);
313 localBest[id].vertex = bv;
314 localBest[id].weight = bw;
315 localBest[id].done.store(seen, std::memory_order_release);
319 std::vector<std::future<void>> futures;
320 futures.reserve(workerTasks);
321 for (std::size_t
id = 1;
id < participantCount; ++id) {
322 futures.emplace_back(pool.
pool->submit_task([&,
id] { workerLoop(id); }));
324 while (ready.load(std::memory_order_acquire) != workerTasks) {
328 auto reduceBest = [&]()
noexcept -> std::pair<std::int32_t, T> {
329 std::int32_t bestV = -1;
330 T bestW = std::numeric_limits<T>::max();
331 for (
const LocalBest &candidate : localBest) {
332 if (candidate.vertex >= 0 && candidate.weight < bestW) {
333 bestW = candidate.weight;
334 bestV = candidate.vertex;
337 return {bestV, bestW};
340 auto relaxRound = [&](std::int32_t target)
noexcept -> std::pair<std::int32_t, T> {
341 currentTarget = target;
342 const std::uint32_t newPhase =
343 phase.fetch_add(1, std::memory_order_acq_rel) + std::uint32_t{1};
344 auto [bv, bw] = relaxBlock(0, target);
345 localBest[0].vertex = bv;
346 localBest[0].weight = bw;
350 for (std::size_t
id = 1;
id < participantCount; ++id) {
351 while (localBest[
id].done.load(std::memory_order_acquire) != newPhase) {
359 edgeWeight[0] = T{0};
360 auto [nextV, nextW] = relaxRound(
static_cast<std::int32_t
>(0));
362 while (out.
edges.size() + 1 < n) {
365 const auto bIdx =
static_cast<std::size_t
>(nextV);
369 if (out.
edges.size() + 1 == n) {
372 auto next = relaxRound(nextV);
377 stop.store(
true, std::memory_order_release);
378 phase.fetch_add(1, std::memory_order_release);
379 for (
auto &future : futures) {
385 if (persistentRelaxFrom()) {
389 auto relaxFrom = [&](std::int32_t target)
noexcept -> std::pair<std::int32_t, T> {
390 const auto tIdx =
static_cast<std::size_t
>(target);
391 const T coreT = coreDistData[tIdx];
392 const T *rowT = xData + (tIdx * d);
397 ->submit_blocks(std::size_t{0}, n,
398 [&](std::size_t lo, std::size_t hi) {
399 relaxRange(lo, hi, target, tIdx, coreT, rowT);
404 return relaxRangeAndFindNext(0, n, target, tIdx, coreT, rowT);
410 edgeWeight[0] = T{0};
411 auto [nextV, nextW] = relaxFrom(
static_cast<std::int32_t
>(0));
413 while (out.
edges.size() + 1 < n) {
419 const auto bIdx =
static_cast<std::size_t
>(nextV);
423 if (out.
edges.size() + 1 == n) {
426 auto next = relaxFrom(nextV);