Skip to content

Commit 6fce9f9

Browse files
authored
Merge pull request #134 from GraphStreamingProject/more_options
Compile time option to run L0 Sampling code and better GraphZeppelin configuration
2 parents 2b2e0f2 + 43f3f10 commit 6fce9f9

File tree

17 files changed

+298
-275
lines changed

17 files changed

+298
-275
lines changed

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,13 @@ set(BUILD_SHARED_LIBS "${SAVED_BUILD_SHARED_LIBS}" CACHE BOOL "" FORCE)
8888

8989

9090
# AVAILABLE COMPILATION DEFINITIONS:
91-
# VERIFY_SAMPLES_F Use a deterministic connected-components
91+
# VERIFY_SAMPLES_F Use a deterministic connected-components
9292
# algorithm to verify post-processing.
93-
# USE_EAGER_DSU Use the eager DSU query optimization if this flag is present.
93+
# USE_EAGER_DSU Use the eager DSU query optimization if
94+
# this flag is present.
95+
# L0_SAMPLING Run the CubeSketch l0 sampling algorithm
96+
# to ensure that we sample uniformly.
97+
# Otherwise, run a support finding algorithm.
9498

9599
add_library(GraphZeppelin
96100
src/graph.cpp

example_streaming.conf

Lines changed: 0 additions & 25 deletions
This file was deleted.

include/graph_configuration.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
// forward declaration
66
class Graph;
77

8-
// TODO: Replace this with an enum defined by GutterTree repo
98
enum GutterSystem {
109
GUTTERTREE,
1110
STANDALONE,
@@ -25,15 +24,15 @@ class GraphConfiguration {
2524
bool _backup_in_mem = true;
2625

2726
// The number of graph workers
28-
size_t _num_groups = 1;
29-
30-
// How many OMP threads each graph worker uses
31-
size_t _group_size = 1;
27+
size_t _num_graph_workers = 1;
3228

3329
// Option to create more sketches than for standard connected components
3430
// Ex factor of 1.5, 1.5 times the sketches
3531
// factor of 1, normal quantity of sketches
36-
double _adtl_skts_factor = 1;
32+
double _sketches_factor = 1;
33+
34+
// Size of update batches as relative to the size of a Supernode
35+
double _batch_factor = 1;
3736

3837
// Configuration for the guttering system
3938
GutteringConfiguration _gutter_conf;
@@ -50,11 +49,11 @@ class GraphConfiguration {
5049

5150
GraphConfiguration& backup_in_mem(bool backup_in_mem);
5251

53-
GraphConfiguration& num_groups(size_t num_groups);
52+
GraphConfiguration& num_graph_workers(size_t num_groups);
5453

55-
GraphConfiguration& group_size(size_t group_size);
54+
GraphConfiguration& sketches_factor(double factor);
5655

57-
GraphConfiguration& adtl_skts_factor(double factor);
56+
GraphConfiguration& batch_factor(double factor);
5857

5958
GutteringConfiguration& gutter_conf();
6059

include/graph_worker.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ class GraphWorker {
3232
// manage configuration
3333
// configuration should be set before calling start_workers
3434
static int get_num_groups() {return num_groups;} // return the number of GraphWorkers
35-
static int get_group_size() {return group_size;} // return the number of threads in each worker
36-
static void set_config(int g, int s) { num_groups = g; group_size = s; }
35+
static void set_config(int g) { num_groups = g; }
3736
private:
3837
/**
3938
* Create a GraphWorker object by setting metadata and spinning up a thread.
@@ -69,7 +68,6 @@ class GraphWorker {
6968

7069
// configuration
7170
static int num_groups;
72-
static int group_size;
7371
static long supernode_size;
7472

7573
// list of all GraphWorkers

include/l0_sampling/sketch.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ enum SampleSketchRet {
2828
*/
2929
class Sketch {
3030
private:
31-
static vec_t failure_factor; // Pr(failure) = 1 / factor. Determines number of columns in sketch.
3231
static vec_t n; // Length of the vector this is sketching.
3332
static size_t num_elems; // length of our actual arrays in number of elements
3433
static size_t num_columns; // Portion of array length, number of columns
@@ -49,7 +48,7 @@ class Sketch {
4948
FRIEND_TEST(EXPR_Parallelism, N10kU100k);
5049

5150
// Buckets of this sketch.
52-
// Length is column_gen(failure_factor) * guess_gen(n).
51+
// Length is num_columns * guess_gen(n).
5352
// For buckets[i * guess_gen(n) + j], the bucket has a 1/2^j probability
5453
// of containing an index. The first two are pointers into the buckets array.
5554
alignas(vec_t) char buckets[];
@@ -83,13 +82,12 @@ class Sketch {
8382

8483
/* configure the static variables of sketches
8584
* @param n Length of the vector to sketch. (static variable)
86-
* @param failure_factor 1/factor = Failure rate for sketch (determines column width)
85+
* @param num_columns Column width, determines the failure probability of the sketch
8786
* @return nothing
8887
*/
89-
inline static void configure(vec_t _n, vec_t _factor) {
88+
inline static void configure(vec_t _n, vec_t _num_columns) {
9089
n = _n;
91-
failure_factor = _factor;
92-
num_columns = column_gen(failure_factor);
90+
num_columns = _num_columns;
9391
num_guesses = guess_gen(n);
9492
num_elems = num_columns * num_guesses + 1; // +1 for zero bucket optimization
9593
}
@@ -103,8 +101,6 @@ class Sketch {
103101
return num_elems * (sizeof(vec_t) + sizeof(vec_hash_t));
104102
}
105103

106-
inline static vec_t get_failure_factor() { return failure_factor; }
107-
108104
inline void reset_queried() { already_queried = false; }
109105

110106
inline static size_t get_columns() { return num_columns; }
@@ -168,7 +164,7 @@ class Sketch {
168164

169165
// max number of non-zeroes in vector is n/2*n/2=n^2/4
170166
static size_t guess_gen(size_t x) { return double_to_ull(log2(x) - 2); }
171-
static size_t column_gen(size_t d) { return double_to_ull((log2(d) + 1)); }
167+
static size_t column_gen(size_t d) { return double_to_ull(ceil(log2(d))); }
172168
};
173169

174170
class MultipleQueryException : public std::exception {

include/supernode.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ class Supernode {
8888

8989
~Supernode();
9090

91-
static inline void configure(uint64_t n, vec_t sketch_fail_factor = default_fail_factor,
91+
static inline void configure(uint64_t n, vec_t sketch_num_columns = default_num_columns,
9292
double skt_factor = 1) {
93-
Sketch::configure(n * n, sketch_fail_factor);
94-
max_sketches = log2(n) / (log2(3) - 1) * skt_factor;
93+
Sketch::configure(n * n, sketch_num_columns);
94+
max_sketches = (log2(n) / num_sketches_div) * skt_factor;
9595
bytes_size = sizeof(Supernode) + max_sketches * Sketch::sketchSizeof();
96-
serialized_size = max_sketches * Sketch::serialized_size();
96+
serialized_size = max_sketches * Sketch::serialized_size() + sizeof(SerialType);
9797
}
9898

9999
static inline size_t get_size() {
@@ -210,7 +210,13 @@ class Supernode {
210210

211211
// void write_sparse_binary_range(std::ostream&binary_out, uint32_t beg, uint32_t end);
212212

213-
static constexpr size_t default_fail_factor = 4;
213+
#ifdef L0_SAMPLING
214+
static constexpr size_t default_num_columns = 7;
215+
static constexpr double num_sketches_div = log2(3) - 1;
216+
#else
217+
static constexpr size_t default_num_columns = 2;
218+
static constexpr double num_sketches_div = log2(3) - 1;
219+
#endif
214220
};
215221

216222

0 commit comments

Comments
 (0)