Skip to content

Commit 58d7edf

Browse files
committed
got it working
1 parent 3ea65d0 commit 58d7edf

File tree

10 files changed

+176
-73
lines changed

10 files changed

+176
-73
lines changed

include/bucket.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <iostream>
55
#include <vector>
6+
#include <bitset>
67

78
#include "types.h"
89

@@ -42,13 +43,12 @@ inline static Depths get_index_depths(vec_t update_idx, size_t seed, col_hash_t
4243
uint64_t depth_hash = col_hash(&update_idx, sizeof(vec_t), seed);
4344
Depths ret;
4445

45-
depth_hash |= (1ull << max_depth); // assert not > max_depth by ORing
46-
ret[0] = __builtin_ctzll(depth_hash);
46+
// assert not > max_depth by ORing
47+
ret[0] = __builtin_ctzll(depth_hash | (1ull << max_depth));
4748

48-
// shift hash over and reassert max_depth
49-
depth_hash >>= 32;
50-
depth_hash |= (1ull << max_depth);
51-
ret[1] = __builtin_ctzll(depth_hash);
49+
// shift hash over, reassert max_depth, and grab another depth
50+
depth_hash >>= max_depth;
51+
ret[1] = __builtin_ctzll(depth_hash | (1ull << max_depth));
5252

5353
return ret;
5454
}

include/cc_sketch_alg.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,17 @@ class CCSketchAlg {
187187
void apply_update_batch(int thr_id, node_id_t src_vertex,
188188
const std::vector<node_id_t> &dst_vertices);
189189

190+
/**
191+
* Update the sketches for a particular vertex, given a batch of edge indices. These indices must
192+
* be constructed using concat_pairing_fn() and must all be associated with a particular graph
193+
* vertex.
194+
* param: thr_id The id of the thread performing the update [0, num_threads)
195+
* param: src_vertex The vertex where the edges originate.
196+
* param: idxs A vector of concatenated edges.
197+
*/
198+
void apply_concat_update_batch(int thr_id, node_id_t src_vertex,
199+
const std::vector<edge_id_t> &idxs);
200+
190201
/**
191202
* Return if we have cached an answer to query.
192203
* This allows the driver to avoid flushing the gutters before calling query functions.

include/dense_sketch.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "bucket.h"
1414
#include "sketch_types.h"
1515

16+
class SparseSketch;
17+
1618
/**
1719
* Sketch for graph processing, either CubeSketch or CameoSketch.
1820
* Sub-linear representation of a vector.
@@ -151,6 +153,7 @@ class DenseSketch {
151153
void zero_contents();
152154

153155
friend bool operator==(const DenseSketch& sketch1, const DenseSketch& sketch2);
156+
friend bool operator==(const SparseSketch& sparse, const DenseSketch& dense);
154157
friend std::ostream& operator<<(std::ostream& os, const DenseSketch& sketch);
155158

156159
/**
@@ -175,7 +178,7 @@ class DenseSketch {
175178

176179
inline const Bucket* get_readonly_bucket_ptr() const { return (const Bucket*) buckets; }
177180
inline uint64_t get_seed() const { return seed; }
178-
inline size_t column_seed(size_t column_idx) const { return seed + column_idx * 5; }
181+
inline size_t column_seed(size_t column_idx) const { return seed + (5 * column_idx); }
179182
inline size_t checksum_seed() const { return seed; }
180183
inline size_t get_columns() const { return num_columns; }
181184
inline size_t get_buckets() const { return num_buckets; }

include/sparse_sketch.h

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#include "bucket.h"
1414
#include "sketch_types.h"
1515

16+
class DenseSketch;
17+
1618
#pragma pack(push,1)
1719
struct SparseBucket {
18-
uint8_t next; // index of next sparse bucket in this column
19-
uint8_t row; // row of sparse bucket
20-
Bucket bkt; // actual bucket content
20+
uint16_t next; // index of next sparse bucket in this column
21+
uint8_t row; // row of sparse bucket
22+
Bucket bkt; // actual bucket content
2123
};
2224
#pragma pack(pop)
2325

@@ -67,9 +69,10 @@ class SparseSketch {
6769
// TODO: evaluate implications of this constant
6870
static constexpr double sparse_bucket_constant = 3; // constant factor c (see diagram)
6971
SparseBucket* sparse_buckets; // a pointer into the buckets array
70-
uint8_t *ll_metadata; // pointer to heads of column LLs
72+
uint16_t *ll_metadata; // pointer to heads of column LLs
7173
size_t number_of_sparse_buckets = 0; // cur number of sparse buckets
7274
size_t sparse_capacity = sparse_bucket_constant * num_columns; // max number of sparse buckets
75+
static constexpr size_t max_columns = uint16_t(-1) / sparse_bucket_constant - 1;
7376

7477
/**
7578
* Reallocates the bucket array if necessary to either grow or shrink the dense region
@@ -80,43 +83,43 @@ class SparseSketch {
8083
// These variables let us know how many Buckets to allocate to make space for the SparseBuckets
8184
// and the LL metadata that will use that space
8285
size_t sparse_data_size = ceil(double(sparse_capacity) * sizeof(SparseBucket) / sizeof(Bucket));
83-
size_t ll_metadata_size = ceil((double(num_columns) + 1) * sizeof(uint8_t) / sizeof(Bucket));
86+
size_t ll_metadata_size = ceil((double(num_columns) + 1) * sizeof(uint16_t) / sizeof(Bucket));
8487

85-
void update_sparse(uint8_t col, const SparseBucket &to_add);
88+
void update_sparse(uint16_t col, const SparseBucket &to_add);
8689
SketchSample sample_sparse(size_t first_col, size_t end_col);
8790

88-
inline uint8_t remove_ll_head(size_t col) {
89-
uint8_t temp = ll_metadata[col];
91+
inline uint16_t remove_ll_head(size_t col) {
92+
uint16_t temp = ll_metadata[col];
9093
ll_metadata[col] = sparse_buckets[ll_metadata[col]].next;
9194
return temp;
9295
}
93-
inline uint8_t claim_free_bucket() {
94-
assert(ll_metadata[num_columns] != uint8_t(-1));
96+
inline uint16_t claim_free_bucket() {
97+
assert(ll_metadata[num_columns] != uint16_t(-1));
9598
return remove_ll_head(num_columns);
9699
}
97-
inline void insert_to_ll_head(size_t col, uint8_t add_idx) {
100+
inline void insert_to_ll_head(size_t col, uint16_t add_idx) {
98101
sparse_buckets[add_idx].next = ll_metadata[col];
99102
ll_metadata[col] = add_idx;
100103
}
101-
inline void free_bucket(uint8_t bkt_idx) {
104+
inline void free_bucket(uint16_t bkt_idx) {
102105
sparse_buckets[bkt_idx].row = 0;
103106
sparse_buckets[bkt_idx].bkt = {0, 0};
104107
insert_to_ll_head(num_columns, bkt_idx);
105108
}
106-
inline void insert_to_ll(uint8_t add_idx, SparseBucket &prev) {
109+
inline void insert_to_ll(uint16_t add_idx, SparseBucket &prev) {
107110
sparse_buckets[add_idx].next = prev.next;
108111
prev.next = add_idx;
109112
}
110113
inline void remove_from_ll(SparseBucket& bkt_to_remove, SparseBucket &prev) {
111114
prev.next = bkt_to_remove.next;
112115
}
113-
inline bool merge_sparse_bkt(uint8_t our_idx, const SparseBucket& oth, uint8_t prev_idx,
116+
inline bool merge_sparse_bkt(uint16_t our_idx, const SparseBucket& oth, uint16_t prev_idx,
114117
size_t col) {
115118
SparseBucket &ours = sparse_buckets[our_idx];
116119
ours.bkt.alpha ^= oth.bkt.alpha;
117120
ours.bkt.gamma ^= oth.bkt.gamma;
118121
if (SketchBucket::is_empty(ours.bkt)) {
119-
if (prev_idx == uint8_t(-1))
122+
if (prev_idx == uint16_t(-1))
120123
remove_ll_head(col);
121124
else
122125
remove_from_ll(ours, sparse_buckets[prev_idx]);
@@ -162,11 +165,11 @@ class SparseSketch {
162165

163166
void upd_sparse_ptrs() {
164167
sparse_buckets = (SparseBucket *) &buckets[calc_sparse_index(num_dense_rows)];
165-
ll_metadata = (uint8_t *) &buckets[calc_metadata_index(num_dense_rows)];
168+
ll_metadata = (uint16_t *) &buckets[calc_metadata_index(num_dense_rows)];
166169
}
167170

168171
// given another SparseSketch column, merge it into ours
169-
void merge_sparse_column(const SparseBucket* oth_sparse_buckets, const uint8_t* oth_ll_metadata,
172+
void merge_sparse_column(const SparseBucket* oth_sparse_buckets, const uint16_t* oth_ll_metadata,
170173
size_t col);
171174
public:
172175
/**
@@ -274,6 +277,7 @@ class SparseSketch {
274277
void zero_contents();
275278

276279
friend bool operator==(const SparseSketch& sketch1, const SparseSketch& sketch2);
280+
friend bool operator==(const SparseSketch& sparse, const DenseSketch& dense);
277281
friend std::ostream& operator<<(std::ostream& os, const SparseSketch& sketch);
278282

279283
/**
@@ -294,7 +298,7 @@ class SparseSketch {
294298
// return the size of a sketch given vector size n and number of samples s
295299
static size_t estimate_bytes(size_t /*n*/, size_t s) {
296300
size_t num_cols = s * default_cols_per_sample;
297-
size_t metadata_size = ceil(double(num_cols + 1) * sizeof(uint8_t) / sizeof(Bucket)) * sizeof(Bucket);
301+
size_t metadata_size = ceil(double(num_cols + 1) * sizeof(uint16_t) / sizeof(Bucket)) * sizeof(Bucket);
298302
size_t sparse_size =
299303
ceil(double(num_cols) * sparse_bucket_constant * sizeof(SparseBucket) / sizeof(Bucket)) *
300304
sizeof(Bucket);

src/cc_sketch_alg.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ inline bool CCSketchAlg::sample_supernode(Sketch &skt) {
146146
Edge e = inv_concat_pairing_fn(sample.idx);
147147
SampleResult result_type = sample.result;
148148

149-
// std::cout << " " << result_type << " e:" << e.src << " " << e.dst << std::endl;
149+
// std::cerr << " " << result_type << " e:" << e.src << " " << e.dst << std::endl;
150150

151151
if (result_type == FAIL) {
152152
modified = true;
@@ -495,7 +495,7 @@ void CCSketchAlg::boruvka_emulation() {
495495
// << std::endl;
496496

497497
while (true) {
498-
// std::cout << " Round: " << round_num << std::endl;
498+
// std::cerr << " Round: " << round_num << std::endl;
499499
// start = std::chrono::steady_clock::now();
500500
modified = perform_boruvka_round(round_num, merge_instr, global_merges);
501501
// std::cout << " perform_boruvka_round = "
@@ -637,12 +637,16 @@ std::vector<SpanningForest> CCSketchAlg::calc_disjoint_spanning_forests(size_t k
637637
size_t max_rounds = 0;
638638

639639
for (size_t i = 0; i < k; i++) {
640+
std::cout << " Spanning forest: " << i << std::endl;
640641
compute_dsu();
641642

642643
SFs.emplace_back(num_vertices, spanning_forest);
643644
max_rounds = std::max(last_query_rounds, max_rounds);
644645

645646
filter_sf_edges(SFs[SFs.size() - 1]);
647+
648+
std::cout << "Spanning Forest " << i << " size = " << SFs[SFs.size() - 1].get_edges().size() << std::endl;
649+
std::cout << "Last query rounds = " << last_query_rounds << std::endl;
646650
if (SFs[SFs.size() - 1].get_edges().size() == 0) break;
647651
}
648652

src/dense_sketch.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,21 @@ void DenseSketch::update(const vec_t update_idx) {
6363
SketchBucket::update(deterministic_bucket(), update_idx, checksum);
6464

6565
// Update higher depth buckets
66-
for (unsigned i = 0; i < num_columns; ++i) {
67-
col_hash_t depth = SketchBucket::get_index_depth(update_idx, column_seed(i), bkt_per_col);
66+
SketchBucket::Depths depths;
67+
for (size_t i = 0; i < num_columns - 1; i += 2) {
68+
depths = SketchBucket::get_index_depths(update_idx, column_seed(i), bkt_per_col);
69+
for (size_t j = 0; j < 2; j++) {
70+
col_hash_t depth = depths[j];
71+
likely_if(depth < bkt_per_col) {
72+
SketchBucket::update(bucket(i + j, depth), update_idx, checksum);
73+
}
74+
}
75+
}
76+
if ((num_columns & 0x1) == 1) {
77+
size_t col = num_columns - 1;
78+
size_t depth = SketchBucket::get_index_depth(update_idx, column_seed(col), bkt_per_col);
6879
likely_if(depth < bkt_per_col) {
69-
SketchBucket::update(bucket(i, depth), update_idx, checksum);
80+
SketchBucket::update(bucket(col, depth), update_idx, checksum);
7081
}
7182
}
7283
}
@@ -223,7 +234,7 @@ bool operator==(const DenseSketch &sketch1, const DenseSketch &sketch2) {
223234
}
224235

225236
std::ostream &operator<<(std::ostream &os, const DenseSketch &sketch) {
226-
Bucket bkt = sketch.buckets[sketch.num_buckets - 1];
237+
Bucket bkt = sketch.deterministic_bucket();
227238
bool good = SketchBucket::is_good(bkt, sketch.checksum_seed());
228239
vec_t a = bkt.alpha;
229240
vec_hash_t c = bkt.gamma;

src/min_cut_sketch_alg.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,16 @@ void MinCutSketchAlg::apply_update_batch(size_t thr_id, node_id_t src_vertex,
137137

138138
std::fill(&num_mapped[0], &num_mapped[max_subgraphs - 1], 0);
139139
for (auto tagged_edge : batch.dsts_data) {
140-
assert(tagged_edge.subgraph < cur_subgraphs);
141-
142-
buffers[tagged_edge.subgraph][num_mapped[tagged_edge.subgraph]++] = tagged_edge.dst;
140+
size_t subgraph = tagged_edge.subgraph;
141+
assert(subgraph < cur_subgraphs);
142+
143+
buffers[subgraph][num_mapped[subgraph]++] = tagged_edge.dst;
144+
assert(num_mapped[subgraph] <= buffers[subgraph].capacity());
145+
146+
unlikely_if (num_mapped[subgraph] >= buffer_elms) {
147+
cc_sketches[subgraph]->apply_update_batch(thr_id, batch.src, buffers[subgraph]);
148+
num_mapped[subgraph] = 0;
149+
}
143150
}
144151

145152
for (size_t i = 1; i < batch.edge_store_subgraph; i++) {

0 commit comments

Comments
 (0)