Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 19 additions & 56 deletions include/dbscan/algo.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ int DBSCAN(intT n, floatT* PF, double epsilon, intT minPts, bool* coreFlagOut, i
intT count = 0;
auto isCore = [&] (pointT *p) {
if(count >= minPts) return true;
if(p->distSqr(P[i]) <= epsSqr) {//todo sqrt opt
if(p->distSqr(P[i]) <= epsSqr) {
count ++;}
return false;};
G->nghPointMap(P[i].coordinate(), isCore);
Expand Down Expand Up @@ -165,67 +165,30 @@ int DBSCAN(intT n, floatT* PF, double epsilon, intT minPts, bool* coreFlagOut, i
delete G;

//improving cluster representation
auto cluster2 = newA(intT, n);
auto flag = newA(intT, n+1);
parallel_for(0, n, [&](intT i){cluster2[i] = cluster[i];});
sampleSort(cluster, n, std::less<intT>());

flag[0] = 1;
parallel_for(1, n, [&](intT i){
if (cluster[i] != cluster[i-1])
flag[i] = 1;
else
flag[i] = 0;
});
flag[n] = sequence::prefixSum(flag, 0, n);

// typedef pair<intT,intT> eType;
struct myPair {
intT first;
intT second;
myPair(intT _first, intT _second): first(_first), second(_second) {}
myPair(): first(-1), second(-1) {}
inline bool operator==(myPair a) {
if(a.first==first && a.second== second)
return true;
else
return false;
}
};

typedef Table<hashSimplePair<myPair>,intT> tableT;
auto T = new tableT(n, hashSimplePair<myPair>());
parallel_for(0, n, [&] (intT i) {
if (flag[i] != flag[i+1]) {
// T->insert(make_pair(cluster[i], flag[i]));
T->insert(myPair(cluster[i], flag[i]));
}
});
// O(n) renumbering: cluster IDs are point indices ∈ [0,n), use prefix sum
// instead of O(n log n) sort + hash table.
auto idMap = newA(intT, n);
parallel_for(0, n, [&](intT i) { idMap[i] = 0; });
parallel_for(0, n, [&](intT i) {
if (cluster[i] >= 0) idMap[cluster[i]] = 1;
});
sequence::prefixSum(idMap, 0, n); // exclusive: idMap[cid] = sequential ID for cid

if(T->find(-1).second < 0) {
parallel_for(0, n, [&](intT i){
cluster2[i] = T->find(cluster2[i]).second;
});
} else {
parallel_for(0, n, [&](intT i){
if (cluster2[i] > 0)
cluster2[i] = T->find(cluster2[i]).second-1;
});
}
// Remap to sequential IDs (separate buffer to avoid read/write conflict)
auto cluster2 = newA(intT, n);
parallel_for(0, n, [&](intT i) {
cluster2[i] = (cluster[i] >= 0) ? idMap[cluster[i]] : cluster[i];
});

//restoring order
parallel_for(0, n, [&](intT i){
cluster[I[i]] = cluster2[i];
});
parallel_for(0, n, [&](intT i){
coreFlagOut[I[i]] = coreFlag[i];
});
parallel_for(0, n, [&](intT i) {
cluster[I[i]] = cluster2[i];
coreFlagOut[I[i]] = coreFlag[i];
});

free(I);
free(cluster2);
free(flag);
T->del(); // Required to clean-up T's internals
delete T;
free(idMap);
free(P);
#ifdef VERBOSE
cout << "output-time = " << tt.stop() << endl;
Expand Down
32 changes: 17 additions & 15 deletions include/dbscan/coreBccp.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,18 @@
#include "pbbs/parallel.h"
#include "pbbs/utils.h"

// r holds squared distance; using distSqr and nodeDistanceSqr avoids sqrt in hot path
template<class nodeT, class objT>
inline void compBcpCoreHSerial(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
if (n1->nodeDistance(n2) > *r) return;
if (n1->nodeDistanceSqr(n2) > *r) return;

if (n1->isLeaf() && n2->isLeaf()) {//basecase
for (intT i=0; i<n1->size(); ++i) {
for (intT j=0; j<n2->size(); ++j) {
auto pi = n1->getItem(i);
auto pj = n2->getItem(j);
if (coreFlag[pi - P] && coreFlag[pj - P]) {
floatT dist = pi->dist(*pj);
floatT dist = pi->distSqr(*pj);
r[0] = min(r[0], dist);
}
}
Expand Down Expand Up @@ -78,30 +79,31 @@ inline void compBcpCoreHSerial(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag,

template<class nodeT, class objT>
inline void compBcpCoreHBase(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
if (n1->nodeDistance(n2) > *r) return;
if (n1->nodeDistanceSqr(n2) > *r) return;

if (n1->isLeaf() && n2->isLeaf()) {//basecase
for (intT i=0; i<n1->size(); ++i) {
for (intT j=0; j<n2->size(); ++j) {
auto pi = n1->getItem(i);
auto pj = n2->getItem(j);
if (coreFlag[pi - P] && coreFlag[pj - P]) {
floatT dist = pi->dist(*pj);
floatT dist = pi->distSqr(*pj);
utils::writeMin(r, dist);
}
}
}
} else {//recursive, todo consider call order, might help
} else {//recursive
if (n1->isLeaf()) {
if (n1->nodeDistance(n2->L()) < n1->nodeDistance(n2->R())) {
// nodeDistanceSqr avoids sqrt; monotonicity preserves ordering
if (n1->nodeDistanceSqr(n2->L()) < n1->nodeDistanceSqr(n2->R())) {
compBcpCoreH(n1, n2->L(), r, coreFlag, P);
compBcpCoreH(n1, n2->R(), r, coreFlag, P);
} else {
compBcpCoreH(n1, n2->R(), r, coreFlag, P);
compBcpCoreH(n1, n2->L(), r, coreFlag, P);
}
} else if (n2->isLeaf()) {
if (n2->nodeDistance(n1->L()) < n2->nodeDistance(n1->R())) {
if (n2->nodeDistanceSqr(n1->L()) < n2->nodeDistanceSqr(n1->R())) {
compBcpCoreH(n2, n1->L(), r, coreFlag, P);
compBcpCoreH(n2, n1->R(), r, coreFlag, P);
} else {
Expand All @@ -115,7 +117,7 @@ inline void compBcpCoreHBase(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, ob
ordering[2] = make_pair(n2->L(), n1->R());
ordering[3] = make_pair(n2->R(), n1->R());
auto bbd = [&](pair<nodeT*,nodeT*> p1, pair<nodeT*,nodeT*> p2) {
return p1.first->nodeDistance(p1.second) < p2.first->nodeDistance(p2.second);};
return p1.first->nodeDistanceSqr(p1.second) < p2.first->nodeDistanceSqr(p2.second);};
quickSortSerial(ordering, 4, bbd);
for (intT o=0; o<4; ++o) {
compBcpCoreH(ordering[o].first, ordering[o].second, r, coreFlag, P);}
Expand All @@ -125,21 +127,21 @@ inline void compBcpCoreHBase(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, ob

template<class nodeT, class objT>
inline void compBcpCoreH(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT* P) {
if (n1->nodeDistance(n2) > *r) return;
if (n1->nodeDistanceSqr(n2) > *r) return;

if ((n1->isLeaf() && n2->isLeaf()) || (n1->size()+n2->size() < 2000)) {
return compBcpCoreHBase(n1, n2, r, coreFlag, P);
} else {//recursive, todo consider call order, might help
} else {//recursive
if (n1->isLeaf()) {
if (n1->nodeDistance(n2->L()) < n1->nodeDistance(n2->R())) {
if (n1->nodeDistanceSqr(n2->L()) < n1->nodeDistanceSqr(n2->R())) {
par_do([&](){compBcpCoreH(n1, n2->L(), r, coreFlag, P);},
[&](){compBcpCoreH(n1, n2->R(), r, coreFlag, P);});
} else {
par_do([&](){compBcpCoreH(n1, n2->R(), r, coreFlag, P);},
[&](){compBcpCoreH(n1, n2->L(), r, coreFlag, P);});
}
} else if (n2->isLeaf()) {
if (n2->nodeDistance(n1->L()) < n2->nodeDistance(n1->R())) {
if (n2->nodeDistanceSqr(n1->L()) < n2->nodeDistanceSqr(n1->R())) {
par_do([&](){compBcpCoreH(n2, n1->L(), r, coreFlag, P);},
[&](){compBcpCoreH(n2, n1->R(), r, coreFlag, P);});
} else {
Expand All @@ -153,7 +155,7 @@ inline void compBcpCoreH(nodeT* n1, nodeT* n2, floatT* r, intT* coreFlag, objT*
ordering[2] = make_pair(n2->L(), n1->R());
ordering[3] = make_pair(n2->R(), n1->R());
auto bbd = [&](pair<nodeT*,nodeT*> p1, pair<nodeT*,nodeT*> p2) {
return p1.first->nodeDistance(p1.second) < p2.first->nodeDistance(p2.second);};
return p1.first->nodeDistanceSqr(p1.second) < p2.first->nodeDistanceSqr(p2.second);};
quickSortSerial(ordering, 4, bbd);
parallel_for (0, 4, [&](intT o) {
compBcpCoreH(ordering[o].first, ordering[o].second, r, coreFlag, P);}, 1);
Expand All @@ -179,11 +181,11 @@ inline bool hasEdge(intT n1, intT n2, intT* coreFlag, objT* P, floatT epsilon, c

if (!trees[n1])
trees[n1] = new treeT(cells[n1].getItem(), cells[n1].size(), false);//todo allocation, parallel
if (!trees[n2])
if (!trees[n2])
trees[n2] = new treeT(cells[n2].getItem(), cells[n2].size(), false);//todo allocation, parallel
floatT r = floatMax();
compBcpCoreH(trees[n1]->rootNode(), trees[n2]->rootNode(), &r, coreFlag, P);
return r <= epsilon;
return r <= epsilon * epsilon; // r holds squared distance now
}

#endif
42 changes: 34 additions & 8 deletions include/dbscan/grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,21 +92,19 @@ struct grid {
cells = newA(cellT, cellCapacity);
nbrCache = newA(cellBuf*, cellCapacity);
cacheLocks = (std::mutex*) malloc(cellCapacity * sizeof(std::mutex));
parallel_for(0, cellCapacity, [&](intT i) {
new (&cacheLocks[i]) std::mutex();
nbrCache[i] = NULL;
cells[i].init();
});
numCells = 0;

myHash = new cellHashT(pMinn, r);
table = new tableT(cellMax*2, cellHash<dim, objT>(myHash));//todo load
// Hash table sized for expected number of cells, with safety rebuild
// in insertParallel if numCells exceeds the estimate.
intT tableHint = std::max((intT)2048, cellMax / 4);
table = new tableT(tableHint, cellHash<dim, objT>(myHash));
}

~grid() {
free(cells);
free(cacheLocks);
parallel_for(0, cellCapacity, [&](intT i) {
parallel_for(0, numCells, [&](intT i) {
if(nbrCache[i]) delete nbrCache[i];
});
free(nbrCache);
Expand Down Expand Up @@ -205,9 +203,24 @@ struct grid {
freeFlag=true;}

parallel_for(0, nn, [&](intT i){I[i] = i;});

// Pre-compute integer cell coordinates to avoid floor() in every sort comparison
auto cellKeys = newA(intT, nn * dim);
floatT invR = 1.0 / r;
parallel_for(0, nn, [&](intT i) {
for (int d = 0; d < dim; d++) {
cellKeys[i * dim + d] = (intT)floor((P[i][d] - pMin[d]) * invR);
}
});
auto ipLess = [&] (intT a, intT b) {
return pointGridCmp<dim, objT, geoPointT>(P[a], P[b], pMin, r);};
for (int d = 0; d < dim; d++) {
intT ca = cellKeys[a * dim + d];
intT cb = cellKeys[b * dim + d];
if (ca != cb) return ca < cb;
}
return false;};
sampleSort(I, nn, ipLess);
free(cellKeys);
parallel_for(0, nn, [&](intT i){PP[i] = P[I[i]];});

flag[0] = 1;
Expand All @@ -224,6 +237,19 @@ struct grid {
if (numCells > cellCapacity) {
cout << "error, grid insert exceeded cell capacity, abort()" << endl;abort();}

// Rebuild hash table if initial estimate was too small
if (numCells > cellCapacity / 8) {
table->del(); delete table;
table = new tableT(numCells * 2, cellHash<dim, objT>(myHash));
}

// Initialize only the cells that will actually be used
parallel_for(0, numCells, [&](intT i) {
new (&cacheLocks[i]) std::mutex();
nbrCache[i] = NULL;
cells[i].init();
});

parallel_for(0, nn, [&](intT i) {
if (flag[i] != flag[i+1]) {
auto c = &cells[flag[i]];
Expand Down
44 changes: 30 additions & 14 deletions include/dbscan/kdNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ class kdNode {
}}

inline void boundingBoxParallel() {
// intT P = getWorkers()*8;
static const intT P = 36 * 8;
intT P = getWorkers() * 8;
intT blockSize = (n+P-1)/P;
pointT localMin[P];
pointT localMax[P];
auto localMin = newA(pointT, P);
auto localMax = newA(pointT, P);
for (intT i=0; i<P; ++i) {
localMin[i] = pointT(items[0]->coordinate());
localMax[i] = pointT(items[0]->coordinate());}
Expand All @@ -68,6 +67,8 @@ class kdNode {
for(intT p=0; p<P; ++p) {
pMin.minCoords(localMin[p].x);
pMax.maxCoords(localMax[p].x);}
free(localMin);
free(localMax);
}

inline intT splitItemSerial(floatT xM) {
Expand Down Expand Up @@ -264,6 +265,20 @@ class kdNode {
return 0; // intersect
}

// squared bb distance — avoids sqrt, safe to compare against rSqr thresholds
// ternary instead of std::max lets compiler emit maxss/fmax without NaN concerns
inline floatT nodeDistanceSqr(nodeT* n2) {
floatT rsqr = 0;
for (int d = 0; d < dim; ++d) {
floatT gapA = pMin[d] - n2->pMax[d];
floatT gapB = n2->pMin[d] - pMax[d];
floatT gap = gapA > gapB ? gapA : gapB;
gap = gap > 0 ? gap : 0;
rsqr += gap * gap;
}
return rsqr;
}

//return the far bb distance between n1 and n2
inline floatT nodeFarDistance(nodeT* n2) {
floatT result = 0;
Expand Down Expand Up @@ -296,44 +311,45 @@ class kdNode {
}

//vecT need to be vector<objT*>
// rSqr is the squared query radius; use distSqr to avoid sqrt per point
template<class vecT>
void rangeNeighbor(pointT queryPt, floatT r, pointT pMin1, pointT pMax1, vecT* accum) {
void rangeNeighbor(pointT queryPt, floatT rSqr, pointT pMin1, pointT pMax1, vecT* accum) {
int relation = boxCompare(pMin1, pMax1, pMin, pMax);
if (relation == boxInclude) {
for(intT i=0; i<n; ++i) {
if (items[i]->getCoordObj()->dist(queryPt) <= r)
if (items[i]->getCoordObj()->distSqr(queryPt) <= rSqr)
accum->push_back(items[i]);
}
} else if (relation == boxOverlap) {
if (isLeaf()) {
for(intT i=0; i<n; ++i) {
if (items[i]->getCoordObj()->dist(queryPt) <= r &&
if (items[i]->getCoordObj()->distSqr(queryPt) <= rSqr &&
itemInBox(pMin1, pMax1, items[i])) accum->push_back(items[i]);
}
} else {
left->rangeNeighbor(queryPt, r, pMin1, pMax1, accum);
right->rangeNeighbor(queryPt, r, pMin1, pMax1, accum);}
left->rangeNeighbor(queryPt, rSqr, pMin1, pMax1, accum);
right->rangeNeighbor(queryPt, rSqr, pMin1, pMax1, accum);}
}
}

template<class func, class func2>
void rangeNeighbor(pointT queryPt, floatT r, pointT pMin1, pointT pMax1, func term, func2 doTerm) {
void rangeNeighbor(pointT queryPt, floatT rSqr, pointT pMin1, pointT pMax1, func term, func2 doTerm) {
if (term()) return;
int relation = boxCompare(pMin1, pMax1, pMin, pMax);
if (relation == boxInclude) {
for(intT i=0; i<n; ++i) {
if (items[i]->getCoordObj()->dist(queryPt) <= r &&
if (items[i]->getCoordObj()->distSqr(queryPt) <= rSqr &&
doTerm(items[i])) break;
}
} else if (relation == boxOverlap) {
if (isLeaf()) {
for(intT i=0; i<n; ++i) {
if (items[i]->getCoordObj()->dist(queryPt) <= r &&
if (items[i]->getCoordObj()->distSqr(queryPt) <= rSqr &&
doTerm(items[i])) break;
}
} else {
left->rangeNeighbor(queryPt, r, pMin1, pMax1, term, doTerm);
right->rangeNeighbor(queryPt, r, pMin1, pMax1, term, doTerm);}
left->rangeNeighbor(queryPt, rSqr, pMin1, pMax1, term, doTerm);
right->rangeNeighbor(queryPt, rSqr, pMin1, pMax1, term, doTerm);}
}
}

Expand Down
Loading
Loading