Skip to content

Commit 4647b2e

Browse files
committed
initial changes for minimum cut
1 parent 38a2c3a commit 4647b2e

File tree

11 files changed

+702
-36
lines changed

11 files changed

+702
-36
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ FetchContent_MakeAvailable(GutterTree StreamingUtilities)
8888

8989
add_library(GraphZeppelin
9090
src/cc_sketch_alg.cpp
91+
src/edge_store.cpp
92+
src/min_cut_sketch_alg.cpp
9193
src/return_types.cpp
9294
src/driver_configuration.cpp
9395
src/cc_alg_configuration.cpp
@@ -102,6 +104,8 @@ target_compile_definitions(GraphZeppelin PUBLIC XXH_INLINE_ALL)
102104

103105
add_library(GraphZeppelinVerifyCC
104106
src/cc_sketch_alg.cpp
107+
src/edge_store.cpp
108+
src/min_cut_sketch_alg.cpp
105109
src/return_types.cpp
106110
src/driver_configuration.cpp
107111
src/cc_alg_configuration.cpp
@@ -119,6 +123,7 @@ if (BUILD_EXE)
119123
add_executable(tests
120124
test/test_runner.cpp
121125
test/cc_alg_test.cpp
126+
test/min_cut_test.cpp
122127
test/sketch_test.cpp
123128
test/dsu_test.cpp
124129
test/util_test.cpp

include/cc_sketch_alg.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct alignas(64) GlobalMergeData {
6060
enum QueryCode {
6161
CONNECTIVITY, // connected components and spanning forest of graph
6262
KSPANNINGFORESTS, // k disjoint spanning forests
63+
MINIMUMCUT, // minimum cut query
6364
};
6465

6566
/**
@@ -71,8 +72,6 @@ class CCSketchAlg {
7172
node_id_t num_vertices;
7273
size_t seed;
7374
bool update_locked = false;
74-
// a set containing one "representative" from each supernode
75-
std::set<node_id_t> *representatives;
7675
Sketch **sketches;
7776
// DSU representation of supernode relationship
7877
DisjointSetUnion_MT<node_id_t> dsu;

include/edge_store.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#pragma once
2+
3+
#include <atomic>
4+
#include <iostream>
5+
#include <mutex>
6+
#include <unordered_map>
7+
#include <vector>
8+
9+
#include "types.h"
10+
11+
class EdgeStore {
12+
private:
13+
static constexpr size_t store_edge_bytes = sizeof(SubgraphTaggedUpdate); // Bytes of one edge
14+
static constexpr double contract_factor = 2; // switch to sketch when within this factor of max
15+
16+
size_t seed;
17+
node_id_t num_vertices;
18+
size_t num_subgraphs;
19+
volatile size_t cur_subgraph = 0; // subgraph depth at which edges enter the edge store
20+
volatile size_t true_min_subgraph = 0; // the minimum subgraph of elements in the store
21+
22+
std::atomic<edge_id_t> num_edges;
23+
std::atomic<node_id_t> needs_contraction;
24+
25+
std::vector<std::vector<SubgraphTaggedUpdate>> adjlist;
26+
27+
// This is a vector of booleans BUT we don't want to use vector<bool> because its not
28+
// multithread friendly
29+
std::vector<char> vertex_contracted;
30+
31+
size_t max_edges; // Bytes of sketch graph
32+
size_t default_buffer_allocation; // size we allocate each buffer in adjlist to
33+
34+
// locks that protect the adjacency list
35+
// we have a single lock for each vertex and a lock for handling contraction logic
36+
std::mutex* adj_mutex;
37+
std::mutex contract_lock;
38+
39+
std::vector<SubgraphTaggedUpdate> vertex_contract(node_id_t src);
40+
void check_if_too_big();
41+
42+
#ifdef VERIFY_SAMPLES_F
43+
void verify_contract_complete();
44+
std::atomic<size_t> num_inserted;
45+
std::atomic<size_t> num_duplicate;
46+
std::atomic<size_t> num_returned;
47+
#endif
48+
public:
49+
// Constructor
50+
EdgeStore(size_t seed, node_id_t num_vertices, size_t sketch_bytes, size_t num_subgraphs,
51+
size_t start_subgraph = 0);
52+
~EdgeStore();
53+
54+
// functions for adding data to the edge store
55+
// may return a vector of edges that need to be applied to
56+
57+
// this first function is only called when there exist no sketch subgraphs
58+
TaggedUpdateBatch insert_adj_edges(node_id_t src, const std::vector<node_id_t>& dst_vertices);
59+
60+
// this function is called when there are some sketch subgraphs.
61+
TaggedUpdateBatch insert_adj_edges(node_id_t src, node_id_t caller_first_es_subgraph,
62+
SubgraphTaggedUpdate* dst_data, size_t dst_data_size);
63+
64+
// contract vertex data by removing all updates bound for lower subgraphs than the store
65+
// is responsible for
66+
TaggedUpdateBatch vertex_advance_subgraph(node_id_t cur_first_es_subgraph);
67+
68+
// Get methods
69+
size_t get_num_edges() { return num_edges; }
70+
size_t get_footprint() { return num_edges * store_edge_bytes; }
71+
size_t get_first_store_subgraph() { return cur_subgraph; }
72+
std::vector<Edge> get_edges();
73+
bool contract_in_progress() { return true_min_subgraph < cur_subgraph; }
74+
};

include/min_cut_sketch_alg.h

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#pragma once
2+
#include <iostream>
3+
#include <vector>
4+
#include <memory>
5+
6+
#include "cc_sketch_alg.h"
7+
#include "edge_store.h"
8+
9+
10+
// Configuration options for the minimum cut sketch algorithm
11+
class MCAlgConfiguration {
12+
private:
13+
// How large to make update batches as factor of sketch size
14+
double _batch_factor = 1;
15+
16+
// Returned min-cut guaranteed to be a +/- epsilon multiplicative approx of the true min cut.
17+
double _epsilon = 0.5;
18+
19+
// Number of subgraphs for which we use a delta sketch
20+
// When applying sketch updates to other subgraphs, apply updates directly to sketch
21+
size_t _num_subgraphs_use_delta = 2;
22+
23+
friend class MinCutSketchAlg;
24+
public:
25+
// setters
26+
MCAlgConfiguration& batch_factor(double batch_factor) {
27+
if (batch_factor <= 0) {
28+
std::cerr << "WARNING: Batch factor in MCAlgConfiguration must be > 0." << std::endl;
29+
std::cerr << " Setting to default value: " << _batch_factor << std::endl;
30+
} else {
31+
_batch_factor = batch_factor;
32+
}
33+
return *this;
34+
}
35+
MCAlgConfiguration& epsilon(double epsilon) {
36+
if (epsilon <= 0 || epsilon > 1) {
37+
std::cerr << "WARNING: MCAlgConfiguration epsilon must be in range (0, 1]." << std::endl;
38+
std::cerr << " Setting to default value: " << _epsilon << std::endl;
39+
} else {
40+
_epsilon = epsilon;
41+
}
42+
return *this;
43+
}
44+
MCAlgConfiguration& num_subgraphs_use_delta(size_t num_subgraphs) {
45+
_num_subgraphs_use_delta = num_subgraphs;
46+
return *this;
47+
}
48+
49+
// getters
50+
double get_batch_factor() { return _batch_factor; }
51+
double get_epsilon() { return _epsilon; }
52+
size_t get_num_subgraphs_use_delta() { return _num_subgraphs_use_delta; }
53+
54+
friend std::ostream& operator<< (std::ostream &out, const MCAlgConfiguration &conf) {
55+
out << "Minimum Cut Algorithm Configuration:" << std::endl;
56+
out << " batch_factor = " << conf._batch_factor << std::endl;
57+
return out;
58+
}
59+
};
60+
61+
// Minimum cut sketch algorithm class
62+
class MinCutSketchAlg {
63+
private:
64+
const node_id_t num_vertices;
65+
const size_t seed;
66+
MCAlgConfiguration config;
67+
const size_t max_subgraphs;
68+
size_t cur_subgraphs;
69+
70+
const double sketch_factor;
71+
const size_t sketch_samples;
72+
73+
CCSketchAlg **cc_sketches;
74+
EdgeStore edge_store;
75+
76+
Sketch *delta_sketches = nullptr;
77+
node_id_t **update_buffers = nullptr;
78+
size_t num_delta_sketches = 0;
79+
size_t num_upd_buffers = 0;
80+
81+
#ifdef VERIFY_SAMPLES_F
82+
std::unique_ptr<GraphVerifier> verifier;
83+
#endif
84+
85+
CCAlgConfiguration cc_config;
86+
public:
87+
/**
88+
* Construct an instance of the Minimum Cut Sketching Algorithm
89+
* param _num_vertices number of graph vertices
90+
* param _seed seed to hash functions
91+
* param _config Configuration options for minimum cut sketch algorithm
92+
*/
93+
MinCutSketchAlg(node_id_t _num_vertices, size_t _seed,
94+
MCAlgConfiguration _config = MCAlgConfiguration());
95+
96+
~MinCutSketchAlg();
97+
98+
/**
99+
* Allocate memory for the worker threads to use when updating this algorithm's sketches
100+
*/
101+
void allocate_worker_memory(size_t num_workers);
102+
103+
/**
104+
* Returns the number of buffered updates we would like to have in the update batches
105+
*/
106+
size_t get_desired_updates_per_batch() {
107+
return config._batch_factor; // TODO: Fill in correctly
108+
}
109+
110+
/**
111+
* Action to take on an update before inserting it to the guttering system.
112+
* We use this function to manage the eager dsu.
113+
*/
114+
void pre_insert(GraphUpdate upd, node_id_t thr_id);
115+
116+
117+
/**
118+
* Update all the sketches for a vertex, given a batch of updates.
119+
* param thr_id The id of the thread performing the update [0, num_threads)
120+
* param src_vertex The vertex where the edges originate.
121+
* param dst_vertices A vector of destinations.
122+
*/
123+
void apply_update_batch(size_t thr_id, node_id_t src_vertex,
124+
const std::vector<node_id_t> &dst_vertices);
125+
126+
/**
127+
* Set the verifier this algorithm will use to check its correctness
128+
* TODO: What is the right way to use verifier for minimum cut?
129+
*/
130+
#ifdef VERIFY_SAMPLES_F
131+
void set_verifier(std::unique_ptr<GraphVerifier> verifier) {
132+
this->verifier = std::move(verifier);
133+
}
134+
#endif
135+
136+
/**
137+
* Main query routine of this algorithm.
138+
* Returns an approximation of the minimum cut of the graph defined by the graph stream
139+
* seen thus far. This approximation is guaranteed to be within 1 +/- epsilon of the true
140+
* minimum cut.
141+
*/
142+
size_t calc_minimum_cut();
143+
144+
/**
145+
* Return if we have cached an answer to query.
146+
* This allows the driver to avoid flushing the gutters before calling query functions.
147+
* TODO: Is there something intelligent we can do here for mincut/k-conn
148+
*/
149+
bool has_cached_query(int query_type) {
150+
if (query_type != MINIMUMCUT) return cc_sketches[0]->has_cached_query(query_type);
151+
return false;
152+
}
153+
154+
/**
155+
* Print the configuration of minimum cut graph sketching algorithm.
156+
*/
157+
void print_configuration() {
158+
std::cout << config << std::endl;
159+
}
160+
161+
node_id_t get_num_vertices() { return num_vertices; }
162+
};

0 commit comments

Comments
 (0)