From 497ff2a542d56fb9b82bf30049fc0810f1d2ed28 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 31 May 2026 16:37:22 -0400 Subject: [PATCH 1/6] feat(generators): add complete_graph K(n) and erdos_renyi_gnm G(n,m) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit complete_graph (include/graph/generators/complete.hpp): - Generates all n*(n-1) ordered pairs (u,v), u != v - Output naturally sorted by source_id then target_id - O(n^2) edge count; size warning in header and user-guide erdos_renyi_gnm (include/graph/generators/gnm.hpp): - Fixed edge count variant of Erdos-Renyi: selects exactly m distinct edges uniformly at random via rejection sampling on the G(n,p) position encoding (pos -> (u,v), same scheme as erdos_renyi.hpp) - Clamps m to n*(n-1) when too large - Output sorted by source_id after sampling Both generators: - Follow the established pattern: edge_list return type, weight_dist enum, uint64_t seed, VId template parameter - Added to generators.hpp umbrella header Tests (tests/generators/test_generators.cpp): - New TEST_CASEs for complete_graph and erdos_renyi_gnm covering: count, no self-loops, ids in range, distinctness, sorted, determinism, K(n) full coverage, gnm clamping - uint64_t template test extended to include both new generators Docs: - docs/user-guide/generators.md: TOC entries, individual-include lines, complete_graph and erdos_renyi_gnm sections with parameter tables, weight_dist options, and usage examples - agents/bgl_migration_strategy.md: mark G(n,m) and K(n) done in §9 table and API block; trim Remaining Gaps; update executive summary bullet, Appendix B Done note, scorecard row (67%->83%), and overall coverage (~46%->~47%) --- agents/bgl_migration_strategy.md | 26 ++++--- docs/user-guide/generators.md | 66 ++++++++++++++++++ include/graph/generators.hpp | 12 ++-- include/graph/generators/complete.hpp | 50 ++++++++++++++ include/graph/generators/gnm.hpp | 79 +++++++++++++++++++++ tests/generators/test_generators.cpp | 98 +++++++++++++++++++++++++++ 6 files changed, 317 insertions(+), 14 deletions(-) create mode 100644 include/graph/generators/complete.hpp create mode 100644 include/graph/generators/gnm.hpp diff --git a/agents/bgl_migration_strategy.md b/agents/bgl_migration_strategy.md index 678800b..32f5c56 100644 --- a/agents/bgl_migration_strategy.md +++ b/agents/bgl_migration_strategy.md @@ -60,7 +60,7 @@ graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3 - Dozens of missing algorithms across flow, matching, coloring, planarity, isomorphism, centrality, layout, and related areas - No `subgraph` hierarchy with descriptor mapping - No DIMACS or METIS I/O -- Graph generators partially implemented (Erdős-Rényi G(n,p), Barabási-Albert, 2D grid, path available; Watts-Strogatz, R-MAT, complete graph still missing) +- Graph generators partially implemented (Erdős-Rényi G(n,p) and G(n,m), Barabási-Albert, 2D grid, path, complete graph available; Watts-Strogatz, R-MAT still missing) - No `adjacency_matrix` container - No `copy_graph` utility with cross-type and property mapping support - No `labeled_graph` adaptor (string labels → vertex mapping) @@ -527,7 +527,7 @@ auto read_graphml(istream& is) -> dynamic_graph; | Generator | BGL Header | graph-v3 | Priority | |-----------|-----------|----------|----------| | **Erdős-Rényi G(n,p)** | `erdos_renyi_generator.hpp` | ✅ `` | ✅ Done | -| **Erdos-Renyi G(n,m)** | (same) | ❌ Not available | 🟡 Medium | +| **Erdos-Renyi G(n,m)** | (same) | ✅ `` | ✅ Done | | **Barabási–Albert (preferential attachment)** | — | ✅ `` | ✅ Done | | **2D Grid (4-connected)** | `mesh_graph_generator.hpp` | ✅ `` | ✅ Done | | **Path graph** | — | ✅ `` | ✅ Done | @@ -535,7 +535,7 @@ auto read_graphml(istream& is) -> dynamic_graph; | **PLOD (Power-Law Out-Degree)** | `plod_generator.hpp` | ❌ Not available (use Barabási–Albert) | 🟡 Medium | | **R-MAT** | `rmat_graph_generator.hpp` | ❌ Not available | 🟡 Medium | | **SSCA#2** | `ssca_graph_generator.hpp` | ❌ Not available | 🟢 Low | -| **Complete Graph K(n)** | — (manual) | ❌ Not available | 🟢 Low | +| **Complete Graph K(n)** | — (manual) | ✅ `` | ✅ Done | ### graph-v3 Generator API @@ -548,6 +548,9 @@ using namespace graph::generators; // Erdős–Rényi G(n,p) — O(E) geometric-skip algorithm (Batagelj & Brandes 2005) auto er = erdos_renyi(10'000u, 8.0 / 10'000); // ~80K directed edges +// Erdős–Rényi G(n,m) — fixed edge count, distinct edges sampled uniformly +auto erm = erdos_renyi_gnm(10'000u, 80'000u); // exactly 80K directed edges + // 2D grid — bidirectional 4-connected, E/V ≈ 4 auto grid = grid_2d(100u, 100u); // 10K vertices, ~40K edges @@ -557,6 +560,9 @@ auto ba = barabasi_albert(10'000u, 4u); // E/V ≈ 8 // Path — 0 → 1 → 2 → … → (n−1), minimum-traffic baseline auto path = path_graph(1'000u); // 999 edges +// Complete K(n) — all ordered pairs (u,v), u ≠ v; dense stress test +auto kn = complete_graph(100u); // 100*99 = 9'900 edges + // Load into any container: compressed_graph g; g.load_edges(er, std::identity{}, 10'000u); @@ -578,10 +584,10 @@ To achieve full BGL parity, the following generators are still needed: | Generator | Notes | |-----------|-------| -| Erdős-Rényi G(n,m) | Fixed edge count variant; wrap existing G(n,p) with rejection or Fisher-Yates | | Watts-Strogatz small world | Ring lattice + random rewiring | | R-MAT | Recursive matrix; important for Graph500 benchmarks | -| Complete graph K(n) | Trivial to implement | +| PLOD | Power-law out-degree; partially served by Barabási–Albert | +| SSCA#2 | Composite clique-based benchmark generator | --- @@ -1196,7 +1202,7 @@ These items block migration for the largest number of BGL users: | **PageRank** | Algorithm | Low | Widely used iterative algorithm | | **DIMACS read/write** | I/O | Low | Required for max-flow benchmark suites | -> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi / Barabási-Albert / 2D grid / path generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. +> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. ### Phase 2: Common Algorithm Coverage @@ -1328,15 +1334,15 @@ The scores below are directional editorial estimates, not audited counts. | **Layout** | 5 algorithms | 0 | 0% | | **Graph adaptors** | 5 adaptors | 3 (transpose, filtered, BGL adaptor) | 60% | | **Graph I/O** | 5 formats | 3 (DOT, GraphML, JSON) | 60% | -| **Graph generators** | 6 generators | 4 (path, grid, Erdős–Rényi, Barabási–Albert) | 67% | +| **Graph generators** | 6 generators | 6 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert) | 83% | | **Visitors** | 5 types + composable adaptors | Concept-checked visitors + composable adaptors (`make_visitor`, `on_*` event wrappers, `predecessor_recorder`, `distance_recorder`, `time_stamper`). The remaining unimplemented visitor events are related to colored tranversal not supported in graph-v3. | 90% | | **Graph mutation** | Full `MutableGraph` concept (CPOs) | Member-function mutation on both `dynamic_graph` and `undirected_adjacency_list`; no mutating CPOs | 70% | -**Overall estimated BGL API coverage: ~46%** +**Overall estimated BGL API coverage: ~47%** -The unweighted average across all 20 scorecard rows is now ~46%, but the picture splits sharply: +The unweighted average across all 20 scorecard rows is now ~47%, but the picture splits sharply: -- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~75%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. +- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~76%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. - **Specialist algorithm domains** (network flow, matching, coloring, planarity, isomorphism, ordering, layout — 7 rows): all at 0%, and these pull the overall figure down significantly. The coverage that exists is architecturally superior (C++20, ranges, concepts, CPOs, zero-config), and the library includes novel features (lazy traversal views, triangle counting, label propagation, Jaccard similarity) not found in BGL. The primary migration barrier is breadth of specialist algorithm coverage. diff --git a/docs/user-guide/generators.md b/docs/user-guide/generators.md index 0093903..731700a 100644 --- a/docs/user-guide/generators.md +++ b/docs/user-guide/generators.md @@ -20,7 +20,9 @@ - [Generators](#generators) - [path_graph](#path_graph) - [grid_graph](#grid_graph) + - [complete_graph](#complete_graph) - [erdos_renyi_graph](#erdos_renyi_graph) + - [erdos_renyi_gnm](#erdos_renyi_gnm) - [barabasi_albert_graph](#barabasi_albert_graph) - [Example: Building and Querying a Generated Graph](#example) @@ -42,7 +44,9 @@ All generators are header-only and require no external dependencies. // Or include individually: #include #include +#include #include +#include #include ``` @@ -95,6 +99,36 @@ auto edges = graph::generators::grid_graph(3u, 4u); --- +### `complete_graph` + +Generates a complete graph K(n): every ordered pair `(u, v)` with `u ≠ v`. + +```cpp +template +auto complete_graph(VId n, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `n` | Number of vertices | +| `seed` | Random seed for reproducible edge weights | +| `wdist` | Edge-weight distribution: `weight_dist::uniform` (U[1,100], default), `weight_dist::exponential` (Exp(0.1)+1), or `weight_dist::constant_one` (1.0) | + +**Returns:** `n * (n-1)` directed edges — the fully-connected graph — sorted by +source id, then target id. + +> **Warning:** the edge count grows as O(n²); generating K(n) for large `n` is +> memory-intensive (e.g. `n = 10'000` yields ~100M edges). + +```cpp +auto edges = graph::generators::complete_graph(100u); +// 100 * 99 = 9'900 directed edges +``` + +--- + ### `erdos_renyi_graph` Generates a random graph using the Erdős–Rényi G(n, p) model. @@ -121,6 +155,38 @@ auto edges = graph::generators::erdos_renyi_graph(100u, 0.05); --- +### `erdos_renyi_gnm` + +Generates a random graph using the Erdős–Rényi G(n, m) model — the +fixed-edge-count companion to `erdos_renyi`. Exactly `m` distinct edges are +selected uniformly at random from the `n * (n-1)` ordered pairs. + +```cpp +template +auto erdos_renyi_gnm(VId n, size_t m, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `n` | Number of vertices | +| `m` | Number of edges to generate (clamped to `n * (n-1)` if larger) | +| `seed` | Random seed for reproducibility | +| `wdist` | Edge-weight distribution: `weight_dist::uniform` (U[1,100], default), `weight_dist::exponential` (Exp(0.1)+1), or `weight_dist::constant_one` (1.0) | + +**Returns:** Exactly `m` distinct directed edges (`u ≠ v`), sorted by source id. +Use this model when a precise edge count is required (e.g. controlling graph +density for benchmarks); use `erdos_renyi` (G(n, p)) when each edge should exist +independently with a fixed probability. + +```cpp +auto edges = graph::generators::erdos_renyi_gnm(100u, 500u); +// exactly 500 distinct directed edges +``` + +--- + ### `barabasi_albert_graph` Generates a scale-free graph using the Barabási–Albert preferential attachment model. diff --git a/include/graph/generators.hpp b/include/graph/generators.hpp index 3018f58..499209e 100644 --- a/include/graph/generators.hpp +++ b/include/graph/generators.hpp @@ -3,10 +3,12 @@ * @brief Convenience umbrella header for all graph generators. * * Include this single header to access all built-in graph generators: - * - erdos_renyi() — Erdős–Rényi G(n, p) random graph - * - grid_2d() — 2D grid with 4-connectivity - * - barabasi_albert() — preferential-attachment (scale-free) - * - path_graph() — simple directed path + * - erdos_renyi() — Erdős–Rényi G(n, p) random graph + * - erdos_renyi_gnm() — Erdős–Rényi G(n, m) fixed-edge-count random graph + * - grid_2d() — 2D grid with 4-connectivity + * - barabasi_albert() — preferential-attachment (scale-free) + * - path_graph() — simple directed path + * - complete_graph() — complete graph K(n) * * All generators return a sorted std::vector> * suitable for loading into any graph container via load_edges(). @@ -16,6 +18,8 @@ #include #include +#include #include #include #include +#include diff --git a/include/graph/generators/complete.hpp b/include/graph/generators/complete.hpp new file mode 100644 index 0000000..37c962c --- /dev/null +++ b/include/graph/generators/complete.hpp @@ -0,0 +1,50 @@ +/** + * @file complete.hpp + * @brief Complete graph generator K(n): every ordered pair (u, v) with u ≠ v. + * + * Produces a fully-connected directed graph with n*(n−1) edges. Useful as a + * dense-graph stress test and as a worst-case input for algorithms whose cost + * scales with edge count. + * + * @warning The edge count grows as O(n²); generating K(n) for large n is + * memory-intensive (e.g. n = 10'000 yields ~100M edges). + */ + +#pragma once + +#include + +#include +#include +#include + +namespace graph::generators { + +/// Generate a complete directed graph K(n): all ordered pairs (u, v), u ≠ v. +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param n Number of vertices. +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id, then target_id). +template +edge_list complete_graph(VId n, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + edge_list generated_edges; + const size_t total = (n > 0) ? static_cast(n) * (n - 1) : 0; + generated_edges.reserve(total); + + for (VId u = 0; u < n; ++u) { + for (VId v = 0; v < n; ++v) { + if (v == u) { + continue; + } + generated_edges.push_back({u, v, sample_weight(rng, wdist)}); + } + } + // Already sorted by source_id (u non-decreasing), then target_id. + return generated_edges; +} + +} // namespace graph::generators diff --git a/include/graph/generators/gnm.hpp b/include/graph/generators/gnm.hpp new file mode 100644 index 0000000..8278278 --- /dev/null +++ b/include/graph/generators/gnm.hpp @@ -0,0 +1,79 @@ +/** + * @file gnm.hpp + * @brief Erdős–Rényi G(n, m) random graph generator (fixed edge count). + * + * The G(n, m) model selects exactly m distinct edges uniformly at random from + * the n*(n−1) ordered (u, v) pairs with u ≠ v. This is the fixed-edge-count + * companion to G(n, p) in @ref erdos_renyi.hpp. + * + * Edges are addressed by the same position encoding used by G(n, p): + * pos ∈ [0, n*(n−1)) + * u = pos / (n−1) + * offset = pos % (n−1) + * v = offset < u ? offset : offset + 1 (skip self-loop) + * + * Distinct positions are drawn by rejection sampling (efficient while + * m ≪ n*(n−1)) and the resulting edge list is sorted by source_id. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace graph::generators { + +/// Generate an Erdős–Rényi G(n, m) directed random graph (no self-loops). +/// +/// Selects exactly `m` distinct edges uniformly at random. If `m` exceeds the +/// maximum possible edge count n*(n−1), it is clamped to that maximum. +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param n Number of vertices. +/// @param m Number of edges to generate. +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id). +template +edge_list erdos_renyi_gnm(VId n, size_t m, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + const size_t total = (n > 1) ? static_cast(n) * (n - 1) : 0; + if (m > total) { + m = total; + } + + edge_list generated_edges; + generated_edges.reserve(m); + if (m == 0) { + return generated_edges; + } + + std::unordered_set chosen; + chosen.reserve(m * 2); + std::uniform_int_distribution pick(0, total - 1); + + while (chosen.size() < m) { + const size_t pos = pick(rng); + if (chosen.insert(pos).second) { + const VId u = static_cast(pos / (n - 1)); + const VId offset = static_cast(pos % (n - 1)); + const VId v = (offset < u) ? offset : offset + 1; + generated_edges.push_back({u, v, sample_weight(rng, wdist)}); + } + } + + std::sort(generated_edges.begin(), generated_edges.end(), + [](const auto& a, const auto& b) { + return (a.source_id != b.source_id) ? a.source_id < b.source_id + : a.target_id < b.target_id; + }); + return generated_edges; +} + +} // namespace graph::generators diff --git a/tests/generators/test_generators.cpp b/tests/generators/test_generators.cpp index ac3e8d5..46b571f 100644 --- a/tests/generators/test_generators.cpp +++ b/tests/generators/test_generators.cpp @@ -225,18 +225,116 @@ TEST_CASE("path_graph: basic properties", "[generators][path]") { } } +// --------------------------------------------------------------------------- +// Erdős–Rényi G(n, m) +// --------------------------------------------------------------------------- + +TEST_CASE("erdos_renyi_gnm: basic properties", "[generators][gnm]") { + constexpr uint32_t N = 100; + constexpr size_t M = 500; + auto edges = erdos_renyi_gnm(N, M); + + SECTION("exactly M edges") { + REQUIRE(edges.size() == M); + } + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("all vertex ids in range [0, N)") { + for (const auto& e : edges) { + REQUIRE(e.source_id < N); + REQUIRE(e.target_id < N); + } + } + + SECTION("edges are distinct") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + REQUIRE(edge_set.size() == edges.size()); + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } + + SECTION("deterministic with same seed") { + auto edges2 = erdos_renyi_gnm(N, M); + REQUIRE(edges.size() == edges2.size()); + for (size_t i = 0; i < edges.size(); ++i) { + REQUIRE(edges[i].source_id == edges2[i].source_id); + REQUIRE(edges[i].target_id == edges2[i].target_id); + } + } + + SECTION("m clamped to n*(n-1) when too large") { + constexpr uint32_t n = 5; + auto full = erdos_renyi_gnm(n, 1000); + REQUIRE(full.size() == static_cast(n) * (n - 1)); + } +} + +// --------------------------------------------------------------------------- +// Complete graph K(n) +// --------------------------------------------------------------------------- + +TEST_CASE("complete_graph: basic properties", "[generators][complete]") { + constexpr uint32_t N = 12; + auto edges = complete_graph(N); + + SECTION("exactly N*(N-1) edges") { + REQUIRE(edges.size() == static_cast(N) * (N - 1)); + } + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("every ordered pair (u, v) with u != v is present exactly once") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + REQUIRE(edge_set.size() == edges.size()); + for (uint32_t u = 0; u < N; ++u) { + for (uint32_t v = 0; v < N; ++v) { + if (u != v) { + REQUIRE(edge_set.count({u, v}) == 1); + } + } + } + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } +} + // --------------------------------------------------------------------------- // Template parameter: custom VId type // --------------------------------------------------------------------------- TEST_CASE("generators work with uint64_t vertex ids", "[generators][template]") { auto er_edges = erdos_renyi(uint64_t{50}, 0.1); + auto gnm_edges = erdos_renyi_gnm(uint64_t{50}, 100); auto grid_edges = grid_2d(uint64_t{5}, uint64_t{5}); auto ba_edges = barabasi_albert(uint64_t{50}, uint64_t{2}); auto path_edges = path_graph(uint64_t{20}); + auto kn_edges = complete_graph(uint64_t{8}); REQUIRE(er_edges.size() > 0); + REQUIRE(gnm_edges.size() == 100); REQUIRE(grid_edges.size() > 0); REQUIRE(ba_edges.size() > 0); REQUIRE(path_edges.size() == 19); + REQUIRE(kn_edges.size() == 8 * 7); } From 9b3102c83897796c7930b0e8bdef54e587b25a6a Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 31 May 2026 16:44:34 -0400 Subject: [PATCH 2/6] feat(generators): add Watts-Strogatz, R-MAT, and PLOD generators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit watts_strogatz (include/graph/generators/watts_strogatz.hpp): - Ring lattice with k nearest neighbours + probability-beta rewiring - Avoids self-loops and duplicate undirected pairs; bidirectional edges - beta=0 yields pure ring lattice (N*K directed edges) rmat (include/graph/generators/rmat.hpp): - Recursive-matrix (Graph500-style) with (a,b,c,d) quadrant probabilities - 2^scale vertices, descends scale levels per edge - Default params are standard Graph500 (0.57,0.19,0.19,0.05) - Self-loops and duplicate directed edges removed plod (include/graph/generators/plod.hpp): - Palmer-Steffan power-law out-degree model - Per-vertex credit drawn from power law, edges to random targets - No self-loops or duplicates All three follow the established pattern (edge_list return, weight_dist enum, uint64_t seed, VId template param) and are added to the generators.hpp umbrella header. Tests: new TEST_CASEs for each covering no-self-loops, id range, distinctness, sorted, determinism, and model-specific invariants (WS ring-lattice count, RMAT m bound). uint64_t template test extended. Docs: - docs/user-guide/generators.md: TOC, includes, and full sections with parameter tables and examples for all three - agents/bgl_migration_strategy.md: mark WS/R-MAT/PLOD done in §9; only SSCA#2 remains; update executive summary, roadmap, Done note, and scorecard (generators 83%->95%, overall ~47%->~48%) --- agents/bgl_migration_strategy.md | 36 +++-- docs/user-guide/generators.md | 105 +++++++++++++ include/graph/generators.hpp | 6 + include/graph/generators/plod.hpp | 86 ++++++++++ include/graph/generators/rmat.hpp | 99 ++++++++++++ include/graph/generators/watts_strogatz.hpp | 94 +++++++++++ tests/generators/test_generators.cpp | 165 ++++++++++++++++++++ 7 files changed, 576 insertions(+), 15 deletions(-) create mode 100644 include/graph/generators/plod.hpp create mode 100644 include/graph/generators/rmat.hpp create mode 100644 include/graph/generators/watts_strogatz.hpp diff --git a/agents/bgl_migration_strategy.md b/agents/bgl_migration_strategy.md index 32f5c56..327ef9d 100644 --- a/agents/bgl_migration_strategy.md +++ b/agents/bgl_migration_strategy.md @@ -60,7 +60,7 @@ graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3 - Dozens of missing algorithms across flow, matching, coloring, planarity, isomorphism, centrality, layout, and related areas - No `subgraph` hierarchy with descriptor mapping - No DIMACS or METIS I/O -- Graph generators partially implemented (Erdős-Rényi G(n,p) and G(n,m), Barabási-Albert, 2D grid, path, complete graph available; Watts-Strogatz, R-MAT still missing) +- Graph generators partially implemented (Erdős-Rényi G(n,p) and G(n,m), Barabási-Albert, Watts-Strogatz, R-MAT, PLOD, 2D grid, path, complete graph available; only SSCA#2 still missing) - No `adjacency_matrix` container - No `copy_graph` utility with cross-type and property mapping support - No `labeled_graph` adaptor (string labels → vertex mapping) @@ -531,9 +531,9 @@ auto read_graphml(istream& is) -> dynamic_graph; | **Barabási–Albert (preferential attachment)** | — | ✅ `` | ✅ Done | | **2D Grid (4-connected)** | `mesh_graph_generator.hpp` | ✅ `` | ✅ Done | | **Path graph** | — | ✅ `` | ✅ Done | -| **Small World (Watts-Strogatz)** | `small_world_generator.hpp` | ❌ Not available | 🟡 Medium | -| **PLOD (Power-Law Out-Degree)** | `plod_generator.hpp` | ❌ Not available (use Barabási–Albert) | 🟡 Medium | -| **R-MAT** | `rmat_graph_generator.hpp` | ❌ Not available | 🟡 Medium | +| **Small World (Watts-Strogatz)** | `small_world_generator.hpp` | ✅ `` | ✅ Done | +| **PLOD (Power-Law Out-Degree)** | `plod_generator.hpp` | ✅ `` | ✅ Done | +| **R-MAT** | `rmat_graph_generator.hpp` | ✅ `` | ✅ Done | | **SSCA#2** | `ssca_graph_generator.hpp` | ❌ Not available | 🟢 Low | | **Complete Graph K(n)** | — (manual) | ✅ `` | ✅ Done | @@ -563,6 +563,15 @@ auto path = path_graph(1'000u); // 999 edges // Complete K(n) — all ordered pairs (u,v), u ≠ v; dense stress test auto kn = complete_graph(100u); // 100*99 = 9'900 edges +// Watts–Strogatz — small-world ring lattice with random rewiring +auto ws = watts_strogatz(1'000u, 6u, 0.1); // degree 6, 10% rewired + +// R-MAT — recursive-matrix, Graph500-style power-law / community structure +auto rm = rmat(16u, 1u << 18); // 65'536 vertices, ~256K edges + +// PLOD — power-law out-degree (BGL parity; prefer Barabási–Albert) +auto pl = plod(1'000u); // power-law out-degree + // Load into any container: compressed_graph g; g.load_edges(er, std::identity{}, 10'000u); @@ -580,14 +589,11 @@ auto edges = erdos_renyi(1'000'000ULL, 0.00001); ### Remaining Gaps -To achieve full BGL parity, the following generators are still needed: +The one remaining BGL generator is the composite benchmark generator: | Generator | Notes | |-----------|-------| -| Watts-Strogatz small world | Ring lattice + random rewiring | -| R-MAT | Recursive matrix; important for Graph500 benchmarks | -| PLOD | Power-law out-degree; partially served by Barabási–Albert | -| SSCA#2 | Composite clique-based benchmark generator | +| SSCA#2 | Composite clique-based HPCS benchmark generator | --- @@ -1202,7 +1208,7 @@ These items block migration for the largest number of BGL users: | **PageRank** | Algorithm | Low | Widely used iterative algorithm | | **DIMACS read/write** | I/O | Low | Required for max-flow benchmark suites | -> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. +> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph / Watts-Strogatz / R-MAT / PLOD generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. ### Phase 2: Common Algorithm Coverage @@ -1231,7 +1237,7 @@ These items block migration for the largest number of BGL users: | **Push-Relabel Max Flow** | Algorithm | High | High-performance max flow | | **Max Cardinality Matching** | Algorithm | Medium | Bipartite matching | | **Layout algorithms** | Algorithm | Medium | Graph visualization | -| **Small World / PLOD generators** | Generator | Low | Synthetic graph generation | +| ~~**Small World / PLOD generators**~~ | ~~Generator~~ | ~~Low~~ | ✅ Done — `watts_strogatz.hpp`, `plod.hpp`, `rmat.hpp` | | ~~**Lambda visitor composition**~~ | ~~API~~ | ~~Low~~ | ✅ Done — `visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper` | | **BGL compatibility header** | Migration | Medium | `graph_traits` shim + name aliases for gradual migration | @@ -1334,15 +1340,15 @@ The scores below are directional editorial estimates, not audited counts. | **Layout** | 5 algorithms | 0 | 0% | | **Graph adaptors** | 5 adaptors | 3 (transpose, filtered, BGL adaptor) | 60% | | **Graph I/O** | 5 formats | 3 (DOT, GraphML, JSON) | 60% | -| **Graph generators** | 6 generators | 6 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert) | 83% | +| **Graph generators** | 6 generators | 9 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert, Watts–Strogatz, R-MAT, PLOD) | 95% | | **Visitors** | 5 types + composable adaptors | Concept-checked visitors + composable adaptors (`make_visitor`, `on_*` event wrappers, `predecessor_recorder`, `distance_recorder`, `time_stamper`). The remaining unimplemented visitor events are related to colored tranversal not supported in graph-v3. | 90% | | **Graph mutation** | Full `MutableGraph` concept (CPOs) | Member-function mutation on both `dynamic_graph` and `undirected_adjacency_list`; no mutating CPOs | 70% | -**Overall estimated BGL API coverage: ~47%** +**Overall estimated BGL API coverage: ~48%** -The unweighted average across all 20 scorecard rows is now ~47%, but the picture splits sharply: +The unweighted average across all 20 scorecard rows is now ~48%, but the picture splits sharply: -- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~76%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. +- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~77%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. - **Specialist algorithm domains** (network flow, matching, coloring, planarity, isomorphism, ordering, layout — 7 rows): all at 0%, and these pull the overall figure down significantly. The coverage that exists is architecturally superior (C++20, ranges, concepts, CPOs, zero-config), and the library includes novel features (lazy traversal views, triangle counting, label propagation, Jaccard similarity) not found in BGL. The primary migration barrier is breadth of specialist algorithm coverage. diff --git a/docs/user-guide/generators.md b/docs/user-guide/generators.md index 731700a..05689ff 100644 --- a/docs/user-guide/generators.md +++ b/docs/user-guide/generators.md @@ -24,6 +24,9 @@ - [erdos_renyi_graph](#erdos_renyi_graph) - [erdos_renyi_gnm](#erdos_renyi_gnm) - [barabasi_albert_graph](#barabasi_albert_graph) + - [watts_strogatz](#watts_strogatz) + - [rmat](#rmat) + - [plod](#plod) - [Example: Building and Querying a Generated Graph](#example) --- @@ -48,6 +51,9 @@ All generators are header-only and require no external dependencies. #include #include #include +#include +#include +#include ``` --- @@ -213,6 +219,105 @@ auto edges = graph::generators::barabasi_albert_graph(1000u, 3u); --- +### `watts_strogatz` + +Generates a small-world graph using the Watts–Strogatz model: a ring lattice +where each vertex connects to its `k` nearest neighbours, with each forward +lattice edge rewired to a random target with probability `beta`. + +```cpp +template +auto watts_strogatz(VId n, VId k, double beta, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `n` | Number of vertices (must be > `k`) | +| `k` | Each vertex connects to its `k` nearest ring neighbours (rounded down to even) | +| `beta` | Rewiring probability in [0, 1]: `0` = pure ring lattice, `1` ≈ random graph | +| `seed` | Random seed for reproducibility | +| `wdist` | Edge-weight distribution: `weight_dist::uniform` (default), `weight_dist::exponential`, or `weight_dist::constant_one` | + +**Returns:** Bidirectional edges (each undirected pair emitted both ways), sorted +by source id. Intermediate `beta` (~0.01–0.1) produces the characteristic +small-world regime: high clustering with short average path length. + +```cpp +auto edges = graph::generators::watts_strogatz(100u, 6u, 0.1); +// ring lattice of degree 6, 10% of edges rewired +``` + +--- + +### `rmat` + +Generates a directed graph using the R-MAT (Recursive MATrix) model, which +produces the power-law / community structure used by the Graph500 benchmark. +Each edge is placed by recursively descending into one of four adjacency-matrix +quadrants with probabilities `(a, b, c, d)`. + +```cpp +template +auto rmat(uint32_t scale, size_t m, + double a = 0.57, double b = 0.19, double c = 0.19, double d = 0.05, + uint64_t seed = 42, weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `scale` | Graph has `2^scale` vertices | +| `m` | Number of directed edges to attempt to place | +| `a, b, c, d` | Quadrant probabilities (should sum to ~1; normalised internally) | +| `seed` | Random seed for reproducibility | +| `wdist` | Edge-weight distribution (see above) | + +**Returns:** Up to `m` distinct directed edges (self-loops and duplicates +removed), sorted by source id. The default `(0.57, 0.19, 0.19, 0.05)` are the +standard Graph500 parameters. + +```cpp +auto edges = graph::generators::rmat(16, 1u << 18); +// 65'536 vertices, ~256K edges, skewed degree distribution +``` + +--- + +### `plod` + +Generates a directed graph with a power-law out-degree distribution +(Palmer–Steffan PLOD model). Each vertex is assigned a target out-degree drawn +from a power law, then edges are placed to random targets. + +```cpp +template +auto plod(VId n, double alpha = 2.5, double beta = 10.0, + uint64_t seed = 42, weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `n` | Number of vertices | +| `alpha` | Power-law exponent (larger ⇒ steeper degree decay) | +| `beta` | Degree scaling factor (larger ⇒ denser graph) | +| `seed` | Random seed for reproducibility | +| `wdist` | Edge-weight distribution (see above) | + +**Returns:** Directed edges (no self-loops or duplicates), sorted by source id. + +> **Note:** For most scale-free use cases [`barabasi_albert_graph`](#barabasi_albert_graph) +> is a better choice; `plod` is provided for BGL parity. + +```cpp +auto edges = graph::generators::plod(1000u, 2.5, 10.0); +// power-law out-degree distribution +``` + +--- + ## Example ```cpp diff --git a/include/graph/generators.hpp b/include/graph/generators.hpp index 499209e..fd72dbf 100644 --- a/include/graph/generators.hpp +++ b/include/graph/generators.hpp @@ -9,6 +9,9 @@ * - barabasi_albert() — preferential-attachment (scale-free) * - path_graph() — simple directed path * - complete_graph() — complete graph K(n) + * - watts_strogatz() — small-world ring lattice with rewiring + * - rmat() — R-MAT recursive-matrix (Graph500-style) + * - plod() — power-law out-degree * * All generators return a sorted std::vector> * suitable for loading into any graph container via load_edges(). @@ -23,3 +26,6 @@ #include #include #include +#include +#include +#include diff --git a/include/graph/generators/plod.hpp b/include/graph/generators/plod.hpp new file mode 100644 index 0000000..bd48141 --- /dev/null +++ b/include/graph/generators/plod.hpp @@ -0,0 +1,86 @@ +/** + * @file plod.hpp + * @brief PLOD (Power-Law Out-Degree) graph generator. + * + * Implements the Palmer–Steffan power-law out-degree model: each vertex i is + * assigned a "credit" (target out-degree) of + * + * credit(i) = floor(beta * x_i^(-alpha)) + * + * where x_i is drawn uniformly from [1, n]. Edges are then placed by + * repeatedly picking a source with remaining credit and a random target, + * decrementing the source's credit. The resulting out-degree distribution + * follows a power law with exponent controlled by `alpha`. + * + * Self-loops and duplicate directed edges are skipped; the returned list is + * sorted by source_id. + * + * @note Barabási–Albert (`barabasi_albert.hpp`) is often a better choice for a + * scale-free graph; PLOD is provided for BGL parity. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace graph::generators { + +/// Generate a PLOD (Power-Law Out-Degree) directed graph. +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param n Number of vertices. +/// @param alpha Power-law exponent (larger ⇒ steeper degree decay). +/// @param beta Degree scaling factor (larger ⇒ denser graph). +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id), no self-loops/duplicates. +template +edge_list plod(VId n, double alpha = 2.5, double beta = 10.0, + uint64_t seed = 42, weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + edge_list generated_edges; + if (n < 2) { + return generated_edges; + } + + // Assign a target out-degree (credit) to each vertex from the power law. + std::vector credit(n, 0); + std::uniform_real_distribution xdist(1.0, static_cast(n)); + for (VId i = 0; i < n; ++i) { + const double x = xdist(rng); + const double c = beta * std::pow(x, -alpha) * static_cast(n); + credit[i] = (c < 1.0) ? size_t{1} : static_cast(c); + } + + std::set> present; + std::uniform_int_distribution pick(0, n - 1); + + for (VId u = 0; u < n; ++u) { + size_t guard = 0; + const size_t max_attempts = credit[u] * 4 + 8; + while (credit[u] > 0 && guard++ < max_attempts) { + VId v = pick(rng); + if (v == u) { + continue; + } + if (present.insert({u, v}).second) { + generated_edges.push_back({u, v, sample_weight(rng, wdist)}); + --credit[u]; + } + } + } + + std::stable_sort(generated_edges.begin(), generated_edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; }); + return generated_edges; +} + +} // namespace graph::generators diff --git a/include/graph/generators/rmat.hpp b/include/graph/generators/rmat.hpp new file mode 100644 index 0000000..054630e --- /dev/null +++ b/include/graph/generators/rmat.hpp @@ -0,0 +1,99 @@ +/** + * @file rmat.hpp + * @brief R-MAT (Recursive MATrix) graph generator. + * + * Generates a directed graph whose edges follow a recursive, self-similar + * degree distribution controlled by four probabilities (a, b, c, d) that + * partition the adjacency matrix into four quadrants: + * + * +-------+-------+ + * | a | b | + * +-------+-------+ + * | c | d | + * +-------+-------+ + * + * Each edge is placed by recursively descending into a quadrant for + * log2(scale) levels. With a skewed (a, b, c, d) this produces the + * power-law / community structure used by the Graph500 benchmark. + * + * The number of vertices is rounded **up** to the next power of two; the + * returned list is sorted by source_id. Duplicate directed edges produced by + * the recursive sampling are removed. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace graph::generators { + +/// Generate an R-MAT directed graph (Graph500-style). +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param scale Graph has 2^scale vertices. +/// @param m Number of (directed) edges to attempt to place. +/// @param a Quadrant probability for the top-left block. +/// @param b Quadrant probability for the top-right block. +/// @param c Quadrant probability for the bottom-left block. +/// @param d Quadrant probability for the bottom-right block (a+b+c+d ≈ 1). +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id), self-loops removed. +template +edge_list rmat(uint32_t scale, size_t m, double a = 0.57, double b = 0.19, + double c = 0.19, double d = 0.05, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + edge_list generated_edges; + if (scale == 0 || m == 0) { + return generated_edges; + } + + const double sum = a + b + c + d; + // Normalise so the four probabilities form a partition of unity. + const double pa = a / sum; + const double pb = pa + b / sum; + const double pc = pb + c / sum; + + std::uniform_real_distribution coin(0.0, 1.0); + std::set> present; + generated_edges.reserve(m); + + for (size_t e = 0; e < m; ++e) { + VId u = 0; + VId v = 0; + for (uint32_t level = 0; level < scale; ++level) { + const VId bit = static_cast(VId{1} << (scale - 1 - level)); + const double r = coin(rng); + if (r < pa) { + // top-left: row bit 0, col bit 0 + } else if (r < pb) { + v = static_cast(v | bit); // top-right: col bit 1 + } else if (r < pc) { + u = static_cast(u | bit); // bottom-left: row bit 1 + } else { + u = static_cast(u | bit); // bottom-right: both bits 1 + v = static_cast(v | bit); + } + } + if (u == v) { + continue; // skip self-loops + } + if (present.insert({u, v}).second) { + generated_edges.push_back({u, v, sample_weight(rng, wdist)}); + } + } + + std::stable_sort(generated_edges.begin(), generated_edges.end(), + [](const auto& lhs, const auto& rhs) { return lhs.source_id < rhs.source_id; }); + return generated_edges; +} + +} // namespace graph::generators diff --git a/include/graph/generators/watts_strogatz.hpp b/include/graph/generators/watts_strogatz.hpp new file mode 100644 index 0000000..dc4b01a --- /dev/null +++ b/include/graph/generators/watts_strogatz.hpp @@ -0,0 +1,94 @@ +/** + * @file watts_strogatz.hpp + * @brief Watts–Strogatz small-world graph generator. + * + * Builds a ring lattice where each vertex is connected to its k nearest + * neighbours (k/2 on each side), then rewires each "forward" lattice edge + * with probability β to a uniformly random target, avoiding self-loops and + * duplicate edges. Low β yields a regular lattice (high clustering, long + * paths); β → 1 approaches an Erdős–Rényi random graph. Intermediate β + * produces the characteristic small-world regime (high clustering, short + * paths). + * + * Edges are emitted in both directions (undirected reachability) and the + * returned list is sorted by source_id. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace graph::generators { + +/// Generate a Watts–Strogatz small-world graph. +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param n Number of vertices (must be > k). +/// @param k Each vertex connects to its k nearest ring neighbours +/// (rounded down to an even number). +/// @param beta Rewiring probability in [0, 1]. +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id). +template +edge_list watts_strogatz(VId n, VId k, double beta, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + edge_list generated_edges; + if (n < 2) { + return generated_edges; + } + + // Force k even and < n. + VId half = k / 2; + if (half == 0) { + return generated_edges; + } + if (2 * half >= n) { + half = static_cast((n - 1) / 2); + } + + // Track undirected pairs to avoid duplicates when rewiring. + std::set> present; + auto ordered = [](VId a, VId b) { + return (a < b) ? std::pair{a, b} : std::pair{b, a}; + }; + + std::uniform_real_distribution coin(0.0, 1.0); + std::uniform_int_distribution pick(0, n - 1); + + for (VId u = 0; u < n; ++u) { + for (VId j = 1; j <= half; ++j) { + VId v = static_cast((u + j) % n); // forward neighbour on the ring + if (coin(rng) < beta) { + // Rewire: choose a new target distinct from u and not already present. + VId w = pick(rng); + int guard = 0; + while ((w == u || present.count(ordered(u, w))) && guard++ < 32) { + w = pick(rng); + } + if (w != u && !present.count(ordered(u, w))) { + v = w; + } + } + auto key = ordered(u, v); + if (u != v && present.insert(key).second) { + generated_edges.push_back({u, v, sample_weight(rng, wdist)}); + generated_edges.push_back({v, u, sample_weight(rng, wdist)}); + } + } + } + + std::stable_sort(generated_edges.begin(), generated_edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; }); + return generated_edges; +} + +} // namespace graph::generators diff --git a/tests/generators/test_generators.cpp b/tests/generators/test_generators.cpp index 46b571f..5693e79 100644 --- a/tests/generators/test_generators.cpp +++ b/tests/generators/test_generators.cpp @@ -319,6 +319,165 @@ TEST_CASE("complete_graph: basic properties", "[generators][complete]") { } } +// --------------------------------------------------------------------------- +// Watts–Strogatz small world +// --------------------------------------------------------------------------- + +TEST_CASE("watts_strogatz: basic properties", "[generators][watts_strogatz]") { + constexpr uint32_t N = 100; + constexpr uint32_t K = 6; + constexpr double beta = 0.1; + auto edges = watts_strogatz(N, K, beta); + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("all vertex ids in range [0, N)") { + for (const auto& e : edges) { + REQUIRE(e.source_id < N); + REQUIRE(e.target_id < N); + } + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } + + SECTION("bidirectional: every (u,v) has matching (v,u)") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + for (const auto& e : edges) { + REQUIRE(edge_set.count({e.target_id, e.source_id}) > 0); + } + } + + SECTION("no duplicate undirected pairs") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + REQUIRE(edge_set.size() == edges.size()); + } + + SECTION("beta = 0 yields a pure ring lattice with N*K directed edges") { + auto lattice = watts_strogatz(N, K, 0.0); + REQUIRE(lattice.size() == static_cast(N) * K); + } + + SECTION("deterministic with same seed") { + auto edges2 = watts_strogatz(N, K, beta); + REQUIRE(edges.size() == edges2.size()); + for (size_t i = 0; i < edges.size(); ++i) { + REQUIRE(edges[i].source_id == edges2[i].source_id); + REQUIRE(edges[i].target_id == edges2[i].target_id); + } + } +} + +// --------------------------------------------------------------------------- +// R-MAT +// --------------------------------------------------------------------------- + +TEST_CASE("rmat: basic properties", "[generators][rmat]") { + constexpr uint32_t scale = 8; // 256 vertices + constexpr size_t M = 2000; + constexpr uint32_t N = 1u << scale; + auto edges = rmat(scale, M); + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("all vertex ids in range [0, 2^scale)") { + for (const auto& e : edges) { + REQUIRE(e.source_id < N); + REQUIRE(e.target_id < N); + } + } + + SECTION("edges are distinct") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + REQUIRE(edge_set.size() == edges.size()); + } + + SECTION("edge count does not exceed requested m") { + REQUIRE(edges.size() <= M); + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } + + SECTION("deterministic with same seed") { + auto edges2 = rmat(scale, M); + REQUIRE(edges.size() == edges2.size()); + for (size_t i = 0; i < edges.size(); ++i) { + REQUIRE(edges[i].source_id == edges2[i].source_id); + REQUIRE(edges[i].target_id == edges2[i].target_id); + } + } +} + +// --------------------------------------------------------------------------- +// PLOD +// --------------------------------------------------------------------------- + +TEST_CASE("plod: basic properties", "[generators][plod]") { + constexpr uint32_t N = 200; + auto edges = plod(N); + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("all vertex ids in range [0, N)") { + for (const auto& e : edges) { + REQUIRE(e.source_id < N); + REQUIRE(e.target_id < N); + } + } + + SECTION("edges are distinct") { + std::set> edge_set; + for (const auto& e : edges) { + edge_set.emplace(e.source_id, e.target_id); + } + REQUIRE(edge_set.size() == edges.size()); + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } + + SECTION("deterministic with same seed") { + auto edges2 = plod(N); + REQUIRE(edges.size() == edges2.size()); + for (size_t i = 0; i < edges.size(); ++i) { + REQUIRE(edges[i].source_id == edges2[i].source_id); + REQUIRE(edges[i].target_id == edges2[i].target_id); + } + } + + SECTION("generates at least some edges") { + REQUIRE(edges.size() > 0); + } +} + // --------------------------------------------------------------------------- // Template parameter: custom VId type // --------------------------------------------------------------------------- @@ -330,6 +489,9 @@ TEST_CASE("generators work with uint64_t vertex ids", "[generators][template]") auto ba_edges = barabasi_albert(uint64_t{50}, uint64_t{2}); auto path_edges = path_graph(uint64_t{20}); auto kn_edges = complete_graph(uint64_t{8}); + auto ws_edges = watts_strogatz(uint64_t{50}, uint64_t{4}, 0.1); + auto rmat_edges = rmat(6, 100); + auto plod_edges = plod(uint64_t{50}); REQUIRE(er_edges.size() > 0); REQUIRE(gnm_edges.size() == 100); @@ -337,4 +499,7 @@ TEST_CASE("generators work with uint64_t vertex ids", "[generators][template]") REQUIRE(ba_edges.size() > 0); REQUIRE(path_edges.size() == 19); REQUIRE(kn_edges.size() == 8 * 7); + REQUIRE(ws_edges.size() > 0); + REQUIRE(rmat_edges.size() > 0); + REQUIRE(plod_edges.size() > 0); } From 2edc555077f727335296e2ed407951deb84346e8 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 31 May 2026 16:48:52 -0400 Subject: [PATCH 3/6] feat(generators): add SSCA#2 clique-based benchmark generator ssca (include/graph/generators/ssca.hpp): - Partitions vertices into consecutive cliques of random size [1, max_clique_size] - Dense intra-clique edges with up to max_parallel_edges parallel edges per pair (a defining SSCA#2 trait) - Sparse inter-clique edges, accept probability decays as 2^-(clique distance) - No self-loops; output sorted by source_id Completes full BGL graph-generator parity. Follows the established pattern (edge_list return, weight_dist enum, uint64_t seed, VId template param); added to generators.hpp umbrella header. Tests: new TEST_CASE covering no-self-loops, id range, sorted, determinism, and the empty-graph edge case (clique size 1, no inter-clique edges). uint64_t template test extended. Docs: - docs/user-guide/generators.md: TOC, include, full ssca section - agents/bgl_migration_strategy.md: mark SSCA#2 done; generators now 100% (full parity); move generators bullet from gaps to strengths in executive summary; clear Remaining Gaps; update API block, roadmap, and Done note --- agents/bgl_migration_strategy.md | 21 +++-- docs/user-guide/generators.md | 37 ++++++++ include/graph/generators.hpp | 2 + include/graph/generators/ssca.hpp | 133 +++++++++++++++++++++++++++ tests/generators/test_generators.cpp | 47 ++++++++++ 5 files changed, 230 insertions(+), 10 deletions(-) create mode 100644 include/graph/generators/ssca.hpp diff --git a/agents/bgl_migration_strategy.md b/agents/bgl_migration_strategy.md index 327ef9d..74dca2a 100644 --- a/agents/bgl_migration_strategy.md +++ b/agents/bgl_migration_strategy.md @@ -55,12 +55,12 @@ graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3 - `filtered_graph` adaptor (vertex/edge predicates) modelling `adjacency_list` - Ready-to-use BGL adaptor (`graph::bgl::graph_adaptor`) for incremental migration - Three I/O formats already implemented (DOT, GraphML, JSON) +- Full BGL graph-generator parity (Erdős-Rényi G(n,p)/G(n,m), Barabási-Albert, Watts-Strogatz, R-MAT, PLOD, SSCA#2, 2D grid, path, complete graph) **Key gaps requiring attention for BGL migration:** - Dozens of missing algorithms across flow, matching, coloring, planarity, isomorphism, centrality, layout, and related areas - No `subgraph` hierarchy with descriptor mapping - No DIMACS or METIS I/O -- Graph generators partially implemented (Erdős-Rényi G(n,p) and G(n,m), Barabási-Albert, Watts-Strogatz, R-MAT, PLOD, 2D grid, path, complete graph available; only SSCA#2 still missing) - No `adjacency_matrix` container - No `copy_graph` utility with cross-type and property mapping support - No `labeled_graph` adaptor (string labels → vertex mapping) @@ -534,7 +534,7 @@ auto read_graphml(istream& is) -> dynamic_graph; | **Small World (Watts-Strogatz)** | `small_world_generator.hpp` | ✅ `` | ✅ Done | | **PLOD (Power-Law Out-Degree)** | `plod_generator.hpp` | ✅ `` | ✅ Done | | **R-MAT** | `rmat_graph_generator.hpp` | ✅ `` | ✅ Done | -| **SSCA#2** | `ssca_graph_generator.hpp` | ❌ Not available | 🟢 Low | +| **SSCA#2** | `ssca_graph_generator.hpp` | ✅ `` | ✅ Done | | **Complete Graph K(n)** | — (manual) | ✅ `` | ✅ Done | ### graph-v3 Generator API @@ -572,6 +572,9 @@ auto rm = rmat(16u, 1u << 18); // 65'536 vertices, ~256K edge // PLOD — power-law out-degree (BGL parity; prefer Barabási–Albert) auto pl = plod(1'000u); // power-law out-degree +// SSCA#2 — clique-based HPCS benchmark (dense cliques + sparse inter-clique) +auto ss = ssca(1'000u, 8u, 0.2); // clustered graph + // Load into any container: compressed_graph g; g.load_edges(er, std::identity{}, 10'000u); @@ -589,11 +592,9 @@ auto edges = erdos_renyi(1'000'000ULL, 0.00001); ### Remaining Gaps -The one remaining BGL generator is the composite benchmark generator: - -| Generator | Notes | -|-----------|-------| -| SSCA#2 | Composite clique-based HPCS benchmark generator | +None — all BGL graph generators now have graph-v3 equivalents. graph-v3 also adds +the Barabási–Albert, complete-graph K(n), and path generators that BGL lacks as +named functions. --- @@ -1208,7 +1209,7 @@ These items block migration for the largest number of BGL users: | **PageRank** | Algorithm | Low | Widely used iterative algorithm | | **DIMACS read/write** | I/O | Low | Required for max-flow benchmark suites | -> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph / Watts-Strogatz / R-MAT / PLOD generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. +> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph / Watts-Strogatz / R-MAT / PLOD / SSCA#2 generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. ### Phase 2: Common Algorithm Coverage @@ -1237,7 +1238,7 @@ These items block migration for the largest number of BGL users: | **Push-Relabel Max Flow** | Algorithm | High | High-performance max flow | | **Max Cardinality Matching** | Algorithm | Medium | Bipartite matching | | **Layout algorithms** | Algorithm | Medium | Graph visualization | -| ~~**Small World / PLOD generators**~~ | ~~Generator~~ | ~~Low~~ | ✅ Done — `watts_strogatz.hpp`, `plod.hpp`, `rmat.hpp` | +| ~~**Small World / PLOD generators**~~ | ~~Generator~~ | ~~Low~~ | ✅ Done — `watts_strogatz.hpp`, `plod.hpp`, `rmat.hpp`, `ssca.hpp` (full BGL generator parity) | | ~~**Lambda visitor composition**~~ | ~~API~~ | ~~Low~~ | ✅ Done — `visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper` | | **BGL compatibility header** | Migration | Medium | `graph_traits` shim + name aliases for gradual migration | @@ -1340,7 +1341,7 @@ The scores below are directional editorial estimates, not audited counts. | **Layout** | 5 algorithms | 0 | 0% | | **Graph adaptors** | 5 adaptors | 3 (transpose, filtered, BGL adaptor) | 60% | | **Graph I/O** | 5 formats | 3 (DOT, GraphML, JSON) | 60% | -| **Graph generators** | 6 generators | 9 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert, Watts–Strogatz, R-MAT, PLOD) | 95% | +| **Graph generators** | 6 generators | 10 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert, Watts–Strogatz, R-MAT, PLOD, SSCA#2) | 100% | | **Visitors** | 5 types + composable adaptors | Concept-checked visitors + composable adaptors (`make_visitor`, `on_*` event wrappers, `predecessor_recorder`, `distance_recorder`, `time_stamper`). The remaining unimplemented visitor events are related to colored tranversal not supported in graph-v3. | 90% | | **Graph mutation** | Full `MutableGraph` concept (CPOs) | Member-function mutation on both `dynamic_graph` and `undirected_adjacency_list`; no mutating CPOs | 70% | diff --git a/docs/user-guide/generators.md b/docs/user-guide/generators.md index 05689ff..9497f5f 100644 --- a/docs/user-guide/generators.md +++ b/docs/user-guide/generators.md @@ -27,6 +27,7 @@ - [watts_strogatz](#watts_strogatz) - [rmat](#rmat) - [plod](#plod) + - [ssca](#ssca) - [Example: Building and Querying a Generated Graph](#example) --- @@ -54,6 +55,7 @@ All generators are header-only and require no external dependencies. #include #include #include +#include ``` --- @@ -318,6 +320,41 @@ auto edges = graph::generators::plod(1000u, 2.5, 10.0); --- +### `ssca` + +Generates an SSCA#2 (HPCS Scalable Synthetic Compact Applications #2) benchmark +graph: randomly-sized cliques connected by sparse inter-clique edges whose +probability decays with the inter-clique id distance. + +```cpp +template +auto ssca(VId n, VId max_clique_size = 8, double prob_inter_clique = 0.2, + int max_parallel_edges = 2, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) + -> std::vector>; +``` + +| Parameter | Description | +|-----------|-------------| +| `n` | Number of vertices | +| `max_clique_size` | Maximum clique size (sizes drawn uniformly from [1, this]) | +| `prob_inter_clique` | Probability a vertex emits an inter-clique edge | +| `max_parallel_edges` | Maximum parallel edges per intra-clique pair | +| `seed` | Random seed for reproducibility | +| `wdist` | Edge-weight distribution (see above) | + +**Returns:** Directed edges sorted by source id. Vertices are partitioned into +consecutive cliques; every ordered pair within a clique is connected (with up to +`max_parallel_edges` parallel edges — a defining SSCA#2 trait), plus sparse +inter-clique links. Self-loops are skipped. + +```cpp +auto edges = graph::generators::ssca(1000u, 8u, 0.2); +// clustered graph: dense cliques + sparse inter-clique edges +``` + +--- + ## Example ```cpp diff --git a/include/graph/generators.hpp b/include/graph/generators.hpp index fd72dbf..9c3ba9d 100644 --- a/include/graph/generators.hpp +++ b/include/graph/generators.hpp @@ -12,6 +12,7 @@ * - watts_strogatz() — small-world ring lattice with rewiring * - rmat() — R-MAT recursive-matrix (Graph500-style) * - plod() — power-law out-degree + * - ssca() — SSCA#2 clique-based benchmark graph * * All generators return a sorted std::vector> * suitable for loading into any graph container via load_edges(). @@ -29,3 +30,4 @@ #include #include #include +#include diff --git a/include/graph/generators/ssca.hpp b/include/graph/generators/ssca.hpp new file mode 100644 index 0000000..3b859ce --- /dev/null +++ b/include/graph/generators/ssca.hpp @@ -0,0 +1,133 @@ +/** + * @file ssca.hpp + * @brief SSCA#2 (HPCS Scalable Synthetic Compact Applications #2) graph generator. + * + * Produces a graph built from randomly-sized cliques connected by sparse + * inter-clique edges, modelling the SSCA#2 graph-analysis benchmark: + * + * 1. Vertices [0, n) are partitioned into consecutive cliques whose sizes are + * drawn uniformly from [1, max_clique_size]. + * 2. Every ordered pair within a clique is connected (dense local structure), + * with up to `max_parallel_edges` parallel edges per pair. + * 3. Each vertex additionally emits inter-clique edges with probability + * `prob_inter_clique`, targeting a vertex in another clique chosen with a + * probability that decays with the inter-clique id distance (2^-distance). + * + * Self-loops are skipped; parallel edges within a clique are intentionally + * permitted (a defining SSCA#2 characteristic). The returned list is sorted by + * source_id. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +namespace graph::generators { + +/// Generate an SSCA#2 clique-based benchmark graph. +/// +/// @tparam VId Vertex id type (default: uint32_t). +/// @param n Number of vertices. +/// @param max_clique_size Maximum clique size (sizes drawn from [1, this]). +/// @param prob_inter_clique Probability a vertex emits an inter-clique edge. +/// @param max_parallel_edges Maximum parallel edges per intra-clique pair. +/// @param seed RNG seed for reproducibility. +/// @param wdist Weight distribution family. +/// @return Sorted edge list (ascending by source_id); parallel intra-clique +/// edges retained, no self-loops. +template +edge_list ssca(VId n, VId max_clique_size = 8, double prob_inter_clique = 0.2, + int max_parallel_edges = 2, uint64_t seed = 42, + weight_dist wdist = weight_dist::uniform) { + std::mt19937_64 rng(seed); + edge_list generated_edges; + if (n == 0) { + return generated_edges; + } + if (max_clique_size < 1) { + max_clique_size = 1; + } + if (max_parallel_edges < 1) { + max_parallel_edges = 1; + } + + // 1. Partition vertices into consecutive cliques of random size. + // clique_of[v] = clique index; clique_first/clique_last delimit each clique. + std::vector clique_of(n, 0); + std::vector clique_first; // first vertex id of each clique + std::vector clique_last; // one-past-last vertex id of each clique + { + std::uniform_int_distribution size_dist(1, max_clique_size); + VId v = 0; + VId cid = 0; + while (v < n) { + VId sz = size_dist(rng); + VId stop = (n - v < sz) ? n : static_cast(v + sz); + clique_first.push_back(v); + clique_last.push_back(stop); + for (VId u = v; u < stop; ++u) { + clique_of[u] = cid; + } + v = stop; + ++cid; + } + } + const VId num_cliques = static_cast(clique_first.size()); + + std::uniform_real_distribution coin(0.0, 1.0); + std::uniform_int_distribution par_dist(1, max_parallel_edges); + + // 2. Intra-clique edges: all ordered pairs, with parallel multiplicity. + for (VId c = 0; c < num_cliques; ++c) { + for (VId u = clique_first[c]; u < clique_last[c]; ++u) { + for (VId w = clique_first[c]; w < clique_last[c]; ++w) { + if (u == w) { + continue; + } + int parallel = par_dist(rng); + for (int p = 0; p < parallel; ++p) { + generated_edges.push_back({u, w, sample_weight(rng, wdist)}); + } + } + } + } + + // 3. Inter-clique edges: probability decays with clique-id distance. + if (num_cliques > 1) { + std::uniform_int_distribution vertex_dist(0, n - 1); + for (VId u = 0; u < n; ++u) { + if (coin(rng) >= prob_inter_clique) { + continue; + } + // Sample a candidate target; accept with probability 2^-(clique distance). + VId w = vertex_dist(rng); + int attempts = 0; + while (attempts++ < 16) { + if (clique_of[w] != clique_of[u]) { + const VId dist = (clique_of[w] > clique_of[u]) + ? static_cast(clique_of[w] - clique_of[u]) + : static_cast(clique_of[u] - clique_of[w]); + const double accept = std::ldexp(1.0, -static_cast(std::min(dist, 30))); + if (coin(rng) < accept) { + break; + } + } + w = vertex_dist(rng); + } + if (clique_of[w] != clique_of[u]) { + generated_edges.push_back({u, w, sample_weight(rng, wdist)}); + } + } + } + + std::stable_sort(generated_edges.begin(), generated_edges.end(), + [](const auto& lhs, const auto& rhs) { return lhs.source_id < rhs.source_id; }); + return generated_edges; +} + +} // namespace graph::generators diff --git a/tests/generators/test_generators.cpp b/tests/generators/test_generators.cpp index 5693e79..e335530 100644 --- a/tests/generators/test_generators.cpp +++ b/tests/generators/test_generators.cpp @@ -478,6 +478,51 @@ TEST_CASE("plod: basic properties", "[generators][plod]") { } } +// --------------------------------------------------------------------------- +// SSCA#2 +// --------------------------------------------------------------------------- + +TEST_CASE("ssca: basic properties", "[generators][ssca]") { + constexpr uint32_t N = 200; + auto edges = ssca(N); + + SECTION("no self-loops") { + for (const auto& e : edges) { + REQUIRE(e.source_id != e.target_id); + } + } + + SECTION("all vertex ids in range [0, N)") { + for (const auto& e : edges) { + REQUIRE(e.source_id < N); + REQUIRE(e.target_id < N); + } + } + + SECTION("sorted by source_id") { + REQUIRE(std::is_sorted(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a.source_id < b.source_id; })); + } + + SECTION("deterministic with same seed") { + auto edges2 = ssca(N); + REQUIRE(edges.size() == edges2.size()); + for (size_t i = 0; i < edges.size(); ++i) { + REQUIRE(edges[i].source_id == edges2[i].source_id); + REQUIRE(edges[i].target_id == edges2[i].target_id); + } + } + + SECTION("generates edges (dense intra-clique structure)") { + REQUIRE(edges.size() > 0); + } + + SECTION("clique size 1 with no inter-clique edges yields empty graph") { + auto isolated = ssca(N, /*max_clique_size*/ 1u, /*prob_inter_clique*/ 0.0); + REQUIRE(isolated.empty()); + } +} + // --------------------------------------------------------------------------- // Template parameter: custom VId type // --------------------------------------------------------------------------- @@ -502,4 +547,6 @@ TEST_CASE("generators work with uint64_t vertex ids", "[generators][template]") REQUIRE(ws_edges.size() > 0); REQUIRE(rmat_edges.size() > 0); REQUIRE(plod_edges.size() > 0); + auto ssca_edges = ssca(uint64_t{50}); + REQUIRE(ssca_edges.size() > 0); } From dd1faec43cb4f73a7d0fcbd7518a8b5e64682d3a Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 31 May 2026 17:10:06 -0400 Subject: [PATCH 4/6] feat(io): add DIMACS, METIS, and adjacency-list-text I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dimacs (include/graph/io/dimacs.hpp): - write_dimacs(os, g, problem="sp") — generic arc list, 1-indexed endpoints, edge weight auto-serialized when EV is formattable - write_dimacs_max_flow(os, g, source, sink, capacity_fn) — emits p/n/a lines - read_dimacs(is) -> dimacs_graph — handles c/p/n/a/e line kinds; normalizes 1-indexed endpoints to 0-indexed ids; parses max-flow, sp, and edge formats metis (include/graph/io/metis.hpp): - write_metis(os, g, with_weights=false) — treats graph as undirected; builds symmetric deduplicated adjacency; sets fmt=001 when with_weights=true and EV is formattable - read_metis(is) -> metis_graph — handles optional fmt 3-digit flag (vertex sizes / vertex weights / edge weights) and ncon field; normalizes to 0-indexed adjacency_list_text (include/graph/io/adjacency_list_text.hpp): - write_adjacency_list_text(os, g) — one line per vertex: ": ..." - read_adjacency_list_text(is) -> adjacency_list_text_graph — accepts colon separator or whitespace-only; preserves vertex set (empty-adjacency lines) include/graph/io.hpp: extended umbrella to include all six I/O headers. Tests (tests/io/test_io.cpp): 15 new test cases covering write, read, roundtrip, and edge cases for all three formats (167 assertions in 34 test cases, all pass). Docs: - docs/user-guide/io.md: updated overview table, headers section, added full sections (API signatures, examples, reader return fields) for DIMACS, METIS, and adjacency-list-text - agents/bgl_migration_strategy.md: mark all three formats done in §8 table; remove "No DIMACS or METIS I/O" from gaps; move I/O to strengths (full BGL parity + JSON); strike roadmap items; Graph I/O scorecard 60% -> 100%; overall coverage ~48% -> ~50% --- agents/bgl_migration_strategy.md | 32 ++- docs/user-guide/io.md | 154 +++++++++++++- include/graph/io.hpp | 12 +- include/graph/io/adjacency_list_text.hpp | 121 +++++++++++ include/graph/io/dimacs.hpp | 214 ++++++++++++++++++++ include/graph/io/metis.hpp | 214 ++++++++++++++++++++ tests/io/test_io.cpp | 245 +++++++++++++++++++++++ 7 files changed, 968 insertions(+), 24 deletions(-) create mode 100644 include/graph/io/adjacency_list_text.hpp create mode 100644 include/graph/io/dimacs.hpp create mode 100644 include/graph/io/metis.hpp diff --git a/agents/bgl_migration_strategy.md b/agents/bgl_migration_strategy.md index 74dca2a..ab8b50d 100644 --- a/agents/bgl_migration_strategy.md +++ b/agents/bgl_migration_strategy.md @@ -41,7 +41,7 @@ This guide serves two audiences: ## 1. Executive Summary -graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3131, P3337). It replaces BGL's iterator-pair/tag-dispatch/property-list architecture with CPOs, ranges, concepts, and structured bindings. The redesign is fundamentally cleaner but still covers only a modest fraction of classic BGL's total algorithm surface area and lacks several adaptors, I/O formats, and generators that BGL users rely on. +graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3131, P3337). It replaces BGL's iterator-pair/tag-dispatch/property-list architecture with CPOs, ranges, concepts, and structured bindings. The redesign is fundamentally cleaner but still covers only a modest fraction of classic BGL's total algorithm surface area and lacks several adaptors and algorithms that BGL users rely on. **Key strengths of graph-v3 over BGL:** - Zero-config adaptation of standard containers (`vector>` is a valid graph) @@ -54,13 +54,12 @@ graph-v3 is a ground-up C++20 redesign targeting ISO standardization (P3126–P3 - `transpose_view` is a zero-cost adaptor (no wrapper descriptor types) - `filtered_graph` adaptor (vertex/edge predicates) modelling `adjacency_list` - Ready-to-use BGL adaptor (`graph::bgl::graph_adaptor`) for incremental migration -- Three I/O formats already implemented (DOT, GraphML, JSON) +- Full BGL I/O format parity plus JSON (DOT, GraphML, JSON, DIMACS, METIS, adjacency-list text) - Full BGL graph-generator parity (Erdős-Rényi G(n,p)/G(n,m), Barabási-Albert, Watts-Strogatz, R-MAT, PLOD, SSCA#2, 2D grid, path, complete graph) **Key gaps requiring attention for BGL migration:** - Dozens of missing algorithms across flow, matching, coloring, planarity, isomorphism, centrality, layout, and related areas - No `subgraph` hierarchy with descriptor mapping -- No DIMACS or METIS I/O - No `adjacency_matrix` container - No `copy_graph` utility with cross-type and property mapping support - No `labeled_graph` adaptor (string labels → vertex mapping) @@ -411,14 +410,14 @@ graph-v3's lazy view system is a significant advancement over BGL: |--------|-----|----------|----------| | **DOT / GraphViz** | `read_graphviz()`, `write_graphviz()` | ✅ `write_dot()`, `read_dot()` | 🔴 High — most common format | | **GraphML (XML)** | `read_graphml()`, `write_graphml()` | ✅ `write_graphml()`, `read_graphml()` | 🟡 Medium | -| **DIMACS** | `read_dimacs_max_flow()`, `write_dimacs_max_flow()` | ❌ None | 🟡 Medium (needed for flow algorithms) | -| **METIS** | `metis_reader` class | ❌ None | 🟢 Low | -| **Adjacency List Text** | `operator<<` / `operator>>` | ❌ None | 🟢 Low | +| **DIMACS** | `read_dimacs_max_flow()`, `write_dimacs_max_flow()` | ✅ `write_dimacs()`, `write_dimacs_max_flow()`, `read_dimacs()` | 🟡 Medium (needed for flow algorithms) | +| **METIS** | `metis_reader` class | ✅ `write_metis()`, `read_metis()` | 🟢 Low | +| **Adjacency List Text** | `operator<<` / `operator>>` | ✅ `write_adjacency_list_text()`, `read_adjacency_list_text()` | 🟢 Low | | **JSON** | None | ✅ `write_json()`, `read_json()` | 🟡 Medium (modern format) | -**Status:** DOT, GraphML, and JSON readers/writers are implemented and shipped. Headers live under `include/graph/io/` (`dot.hpp`, `graphml.hpp`, `json.hpp`). Reader functions return type-tagged graph objects (`dot_graph`, `graphml_graph`, `json_graph`) suitable for use with all graph-v3 algorithms. +**Status:** All six interchange formats are implemented and shipped. Headers live under `include/graph/io/` (`dot.hpp`, `graphml.hpp`, `json.hpp`, `dimacs.hpp`, `metis.hpp`, `adjacency_list_text.hpp`). Writers are generic over any graph satisfying the adjacency-list concepts; readers return lightweight type-tagged parsed structures (`dot_graph`, `graphml_graph`, `json_graph`, `dimacs_graph`, `metis_graph`, `adjacency_list_text_graph`) suitable for post-processing into any graph-v3 container. DIMACS supports the max-flow, shortest-path, and edge (clique/coloring) variants; METIS handles the optional `fmt`/`ncon` weight flags; both normalize the file's 1-indexed ids to 0-indexed. -**Recommendation:** Implement DIMACS next — needed for the standard max-flow benchmark suite once flow algorithms land. METIS and adjacency-list text are low priority. +**Recommendation:** None outstanding — graph-v3 now matches BGL's I/O format coverage and adds JSON on top. Future work is limited to broadening the parsed subsets (e.g. DOT subgraphs, GraphML nested graphs) as needs arise. ### DOT API — `std::format`-Based (implemented) @@ -1207,9 +1206,8 @@ These items block migration for the largest number of BGL users: | **`copy_graph` utility** | Utility | Low | Cross-type graph copy with property mapping | | **Betweenness Centrality** | Algorithm | Medium | Core network analysis metric | | **PageRank** | Algorithm | Low | Widely used iterative algorithm | -| **DIMACS read/write** | I/O | Low | Required for max-flow benchmark suites | -> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O, Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph / Watts-Strogatz / R-MAT / PLOD / SSCA#2 generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. +> **Done since the previous revision of this plan:** `filtered_graph` adaptor, DOT/GraphML/JSON I/O plus full BGL I/O parity (DIMACS via `dimacs.hpp`, METIS via `metis.hpp`, adjacency-list text via `adjacency_list_text.hpp`), Erdős-Rényi G(n,p)/G(n,m) / Barabási-Albert / 2D grid / path / complete-graph / Watts-Strogatz / R-MAT / PLOD / SSCA#2 generators, `kosaraju` + `tarjan_scc`, `afforest`, library-shipped BGL adaptor (`include/graph/adaptors/bgl/`), composable visitor toolkit (`visitor_factory.hpp`: `make_visitor`, single-event adaptors, `predecessor_recorder`, `distance_recorder`, `time_stamper`), `valid_visitor` strict concept with `static_assert` diagnostics in BFS/DFS/Dijkstra/Bellman-Ford. ### Phase 2: Common Algorithm Coverage @@ -1224,7 +1222,7 @@ These items block migration for the largest number of BGL users: | **Transitive Closure/Reduction** | Algorithm | Medium | DAG analysis | | **Core Numbers (k-core)** | Algorithm | Medium | Network analysis | | **Cuthill-McKee Ordering** | Algorithm | Medium | Sparse matrix bandwidth reduction | -| **DIMACS I/O** | I/O | Low | Needed for flow algorithm benchmarks | +| ~~**DIMACS I/O**~~ | ~~I/O~~ | ~~Low~~ | ✅ Done — `dimacs.hpp` (`write_dimacs`, `write_dimacs_max_flow`, `read_dimacs`) | ### Phase 3: Advanced Features @@ -1246,7 +1244,7 @@ These items block migration for the largest number of BGL users: | Item | Type | Effort | Rationale | |------|------|--------|-----------| -| **METIS I/O** | I/O | Low | Legacy partitioning format | +| ~~**METIS I/O**~~ | ~~I/O~~ | ~~Low~~ | ✅ Done — `metis.hpp` (`write_metis`, `read_metis`) | | **Parallel algorithms** | Algorithm | High | Parallel BFS, CC, PageRank | | **`grid_graph`** | Container | Medium | Implicit N-dimensional grid | | **Condensation graph** | Algorithm | Low | DAG from SCC | @@ -1307,7 +1305,7 @@ Why this strengthens — rather than competes with — the `std::graph` goal: - Present the Boost-facing name (e.g. `boost::graph2` or `boost::graph::v2`) as an **alias / inline-namespace layer** over the canonical namespace, **not** a rename or fork. One source of truth, two presented names. This keeps the eventual `std::graph` migration a near-mechanical re-alias. - Use Boost's pre-acceptance "no stability guarantee" window to keep refining the design while usage data accrues, so a Boost release does not prematurely calcify the API/ABI that the proposal depends on. -**Scope discipline:** the standardization core is the container / concept / CPO / traversal foundation (the core categories average ~75% in [Appendix C](#appendix-c-migration-readiness-scorecard)). Ship that foundation to Boost to validate it; the specialist algorithm domains still at 0% in Appendix A (flow, matching, coloring, planarity, isomorphism, ordering, layout) can land incrementally and block neither the Boost review nor the proposal. +**Scope discipline:** the standardization core is the container / concept / CPO / traversal foundation (the core categories average ~78% in [Appendix C](#appendix-c-migration-readiness-scorecard)). Ship that foundation to Boost to validate it; the specialist algorithm domains still at 0% in Appendix A (flow, matching, coloring, planarity, isomorphism, ordering, layout) can land incrementally and block neither the Boost review nor the proposal. **Sequencing:** @@ -1340,16 +1338,16 @@ The scores below are directional editorial estimates, not audited counts. | **Ordering/bandwidth** | 8 algorithms | 0 | 0% | | **Layout** | 5 algorithms | 0 | 0% | | **Graph adaptors** | 5 adaptors | 3 (transpose, filtered, BGL adaptor) | 60% | -| **Graph I/O** | 5 formats | 3 (DOT, GraphML, JSON) | 60% | +| **Graph I/O** | 5 formats | 6 (DOT, GraphML, JSON, DIMACS, METIS, adjacency-list text) | 100% | | **Graph generators** | 6 generators | 10 (path, grid, complete, Erdős–Rényi G(n,p)/G(n,m), Barabási–Albert, Watts–Strogatz, R-MAT, PLOD, SSCA#2) | 100% | | **Visitors** | 5 types + composable adaptors | Concept-checked visitors + composable adaptors (`make_visitor`, `on_*` event wrappers, `predecessor_recorder`, `distance_recorder`, `time_stamper`). The remaining unimplemented visitor events are related to colored tranversal not supported in graph-v3. | 90% | | **Graph mutation** | Full `MutableGraph` concept (CPOs) | Member-function mutation on both `dynamic_graph` and `undirected_adjacency_list`; no mutating CPOs | 70% | -**Overall estimated BGL API coverage: ~48%** +**Overall estimated BGL API coverage: ~50%** -The unweighted average across all 20 scorecard rows is now ~48%, but the picture splits sharply: +The unweighted average across all 20 scorecard rows is now ~50%, but the picture splits sharply: -- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~77%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. +- **Core/everyday categories** (graph types, architecture, properties, traversal, MST, connectivity, I/O, adaptors, generators, visitors, mutation — 12 rows): average ~80%. For a BGL user doing graph construction, traversal, shortest paths, MST, or connectivity work, graph-v3 covers the vast majority of the API surface. - **Specialist algorithm domains** (network flow, matching, coloring, planarity, isomorphism, ordering, layout — 7 rows): all at 0%, and these pull the overall figure down significantly. The coverage that exists is architecturally superior (C++20, ranges, concepts, CPOs, zero-config), and the library includes novel features (lazy traversal views, triangle counting, label propagation, Jaccard similarity) not found in BGL. The primary migration barrier is breadth of specialist algorithm coverage. diff --git a/docs/user-guide/io.md b/docs/user-guide/io.md index 452e0e7..ab51987 100644 --- a/docs/user-guide/io.md +++ b/docs/user-guide/io.md @@ -4,7 +4,7 @@ # Graph I/O -> Read and write graphs in DOT, GraphML, and JSON formats. +> Read and write graphs in DOT, GraphML, JSON, DIMACS, METIS, and adjacency-list-text formats. @@ -20,31 +20,40 @@ - [DOT (GraphViz)](#dot-graphviz) - [GraphML (XML)](#graphml-xml) - [JSON](#json) +- [DIMACS](#dimacs) +- [METIS](#metis) +- [Adjacency List Text](#adjacency-list-text) - [Design Philosophy](#design-philosophy) --- ## Overview -The `graph::io` namespace provides readers and writers for three graph interchange formats. The writers work with any graph satisfying the adjacency list concepts; the readers return lightweight parsed structures. +The `graph::io` namespace provides readers and writers for six graph interchange formats. The writers work with any graph satisfying the adjacency list concepts; the readers return lightweight parsed structures. | Format | Writer | Reader | Use Case | |--------|--------|--------|----------| | **DOT** | `write_dot()` | `read_dot()` | Visualization (GraphViz), debugging | | **GraphML** | `write_graphml()` | `read_graphml()` | XML-based interchange, tool ecosystems | | **JSON** | `write_json()` | `read_json()` | Web applications, REST APIs, modern tooling | +| **DIMACS** | `write_dimacs()`, `write_dimacs_max_flow()` | `read_dimacs()` | Network-flow / shortest-path benchmark suites | +| **METIS** | `write_metis()` | `read_metis()` | Graph partitioning (METIS/ParMETIS) | +| **Adjacency List Text** | `write_adjacency_list_text()` | `read_adjacency_list_text()` | Quick structural dumps, debugging | --- ## Headers ```cpp -#include // umbrella — includes all three formats +#include // umbrella — includes all six formats // Or include individually: #include #include #include +#include +#include +#include ``` All functions live in `namespace graph::io`. @@ -189,11 +198,148 @@ auto result = graph::io::read_json(input_stream); --- +## DIMACS + +The DIMACS family is line-oriented; each line starts with a kind character: `c` (comment), `p` (problem), `n` (node descriptor), `a` (arc), `e` (edge). Vertex ids in the file are **1-indexed**; the reader normalizes them to **0-indexed** ids. + +### Writing + +```cpp +// Generic arc list — emits "p " then "a u v [w]" lines. +template +void write_dimacs(std::ostream& os, const G& g, std::string_view problem = "sp"); + +// Max-flow problem — emits "p max", "n s", "n t", and "a u v ". +template +void write_dimacs_max_flow(std::ostream& os, const G& g, + vertex_id_t source, vertex_id_t sink, + CapacityFn capacity_fn); +``` + +**Example:** + +```cpp +#include + +// Shortest-path style arc list (edge value used as weight when formattable) +graph::io::write_dimacs(std::cout, g); + +// Max-flow problem with an explicit capacity accessor +graph::io::write_dimacs_max_flow(std::cout, g, /*source=*/0u, /*sink=*/3u, + [](const auto& gr, auto uv) { return graph::edge_value(gr, uv); }); +``` + +Output (max-flow): +``` +c Generated by graph-v3 +p max 4 5 +n 1 s +n 4 t +a 1 2 10 +... +``` + +### Reading + +```cpp +auto result = graph::io::read_dimacs(input_stream); +// result.problem — problem type from the "p" line (e.g. "max", "sp", "edge") +// result.num_vertices — declared vertex count +// result.num_arcs — declared arc/edge count +// result.nodes — vector of {id (0-indexed), designation} e.g. {0, "s"} +// result.edges — vector of {source, target (0-indexed), weight (text)} +``` + +--- + +## METIS + +The METIS graph format describes an **undirected** graph used by the METIS/ParMETIS partitioners. A header line ` [fmt] [ncon]` is followed by one adjacency line per vertex (1-indexed). `m` counts each undirected edge once. The optional 3-digit `fmt` flag encodes vertex sizes / vertex weights / edge weights (hundreds / tens / ones). + +### Writing + +```cpp +// Treats the graph as undirected: each edge (u,v) is listed under both u and v. +template +void write_metis(std::ostream& os, const G& g, bool with_weights = false); +``` + +**Example:** + +```cpp +#include + +graph::io::write_metis(std::cout, g); // structure only +graph::io::write_metis(std::cout, g, /*weights=*/true); // sets fmt=001 +``` + +Output: +``` +% Generated by graph-v3 +3 3 +2 3 +1 3 +1 2 +``` + +### Reading + +```cpp +auto result = graph::io::read_metis(input_stream); +// result.num_vertices — declared vertex count +// result.num_edges — declared (undirected) edge count +// result.fmt — format flag (0 if absent) +// result.ncon — number of vertex weights (0 if absent) +// result.adjacency — adjacency[i] = vector of {neighbor (0-indexed), weight} +// result.vertex_weights — vertex_weights[i] = vector of vertex weights +``` + +--- + +## Adjacency List Text + +A plain whitespace-delimited structural dump in the spirit of BGL's `operator<<` / `operator>>`. **It is not CSV.** One line per vertex: the vertex id, a `:` separator, then its out-neighbours separated by spaces. It carries structure only (no values), and vertices with no out-edges still produce a line so the vertex set round-trips. + +### Writing + +```cpp +template +void write_adjacency_list_text(std::ostream& os, const G& g); +``` + +**Example:** + +```cpp +#include + +graph::io::write_adjacency_list_text(std::cout, g); +``` + +Output: +``` +0: 1 2 +1: 2 +2: 0 3 +3: +``` + +### Reading + +```cpp +auto result = graph::io::read_adjacency_list_text(input_stream); +// result.vertex_ids — vector of vertex ID strings in declaration order +// result.edges — vector of {source, target} +``` + +The reader also accepts lines with a whitespace-only separator (`0 1 2`), treating the first token as the source vertex. + +--- + ## Design Philosophy **`std::format`-based auto-detection.** If your vertex or edge value type has a `std::formatter` specialization, the writers automatically serialize it as a label — zero configuration needed. -**No external dependencies.** The GraphML and JSON parsers are self-contained lightweight implementations sufficient for graph interchange. They handle the core subset of each format without requiring libxml2 or nlohmann/json. +**No external dependencies.** The GraphML, JSON, DIMACS, METIS, and adjacency-list-text parsers are self-contained lightweight implementations sufficient for graph interchange. They handle the core subset of each format without requiring libxml2, nlohmann/json, or Boost. **Separation of concerns.** Writers are generic (work with any graph satisfying adjacency list concepts). Readers return simple POD-like structures that you can post-process into any graph type. diff --git a/include/graph/io.hpp b/include/graph/io.hpp index 6840b94..026d0f5 100644 --- a/include/graph/io.hpp +++ b/include/graph/io.hpp @@ -3,9 +3,12 @@ * @brief Convenience umbrella header for all graph I/O formats. * * Include this single header to access all built-in graph I/O: - * - DOT (GraphViz): write_dot(), read_dot() - * - GraphML (XML): write_graphml(), read_graphml() - * - JSON: write_json(), read_json() + * - DOT (GraphViz): write_dot(), read_dot() + * - GraphML (XML): write_graphml(), read_graphml() + * - JSON: write_json(), read_json() + * - DIMACS: write_dimacs(), write_dimacs_max_flow(), read_dimacs() + * - METIS: write_metis(), read_metis() + * - Adjacency List Text: write_adjacency_list_text(), read_adjacency_list_text() * * All writers use std::format for zero-config value serialization when the * value type satisfies std::formatter. Custom attribute functions can @@ -14,6 +17,9 @@ #pragma once +#include +#include #include #include #include +#include diff --git a/include/graph/io/adjacency_list_text.hpp b/include/graph/io/adjacency_list_text.hpp new file mode 100644 index 0000000..3a00667 --- /dev/null +++ b/include/graph/io/adjacency_list_text.hpp @@ -0,0 +1,121 @@ +/** + * @file adjacency_list_text.hpp + * @brief Adjacency-list text graph I/O — write and read. + * + * Provides: + * - write_adjacency_list_text(os, g) Emit a graph as a textual adjacency list + * - read_adjacency_list_text(is) Parse a textual adjacency list + * + * This is the plain whitespace-delimited adjacency dump in the spirit of BGL's + * `operator<<` / `operator>>` for graphs. It is NOT CSV: one line per vertex, + * the vertex id, a `:` separator, then its out-neighbours separated by spaces: + * + * 0: 1 2 + * 1: 2 + * 2: 0 3 + * 3: + * + * The format carries structure only (no vertex/edge values). Vertices with no + * out-edges still produce a line (`:`) so the vertex set is preserved. + * + * NOTE: Self-contained — no external dependencies. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace graph::io { + +// --------------------------------------------------------------------------- +// write_adjacency_list_text +// --------------------------------------------------------------------------- + +/** + * @brief Write a graph as a textual adjacency list. + * + * Emits one line per vertex: `: ...`. Vertices with no + * out-edges produce a trailing-colon line so the full vertex set round-trips. + * + * @param os Output stream. + * @param g Graph satisfying adjacency_list. + */ +template +void write_adjacency_list_text(std::ostream& os, const G& g) { + for (auto u : vertices(g)) { + auto uid = vertex_id(g, u); + os << uid << ':'; + for (auto uv : edges(g, u)) { + os << ' ' << target_id(g, uv); + } + os << '\n'; + } +} + +// --------------------------------------------------------------------------- +// read_adjacency_list_text +// --------------------------------------------------------------------------- + +/// A single parsed adjacency-list-text edge. +struct adjacency_list_text_edge { + std::string source; + std::string target; +}; + +/// Parsed adjacency-list-text graph. +struct adjacency_list_text_graph { + std::vector vertex_ids; ///< Vertices in declaration order + std::vector edges; ///< All edges +}; + +/** + * @brief Parse a textual adjacency list into an adjacency_list_text_graph. + * + * Each non-empty line is `: ...`. The `:` separator is + * optional — a line may also be a whitespace-separated ` ...`. + * Both the source vertex and every target are registered as vertices. + * + * @param is Input stream containing adjacency-list text. + * @return Parsed adjacency_list_text_graph. + */ +inline adjacency_list_text_graph read_adjacency_list_text(std::istream& is) { + adjacency_list_text_graph result; + std::string line; + + auto ensure_vertex = [&](const std::string& id) { + for (const auto& existing : result.vertex_ids) { + if (existing == id) return; + } + result.vertex_ids.push_back(id); + }; + + while (std::getline(is, line)) { + // Normalize: replace a leading "id:" colon with a space separator. + auto colon = line.find(':'); + if (colon != std::string::npos) line[colon] = ' '; + + std::istringstream ls(line); + std::string source; + if (!(ls >> source)) continue; // blank line + + ensure_vertex(source); + + std::string target; + while (ls >> target) { + ensure_vertex(target); + result.edges.push_back({source, target}); + } + } + + return result; +} + +} // namespace graph::io diff --git a/include/graph/io/dimacs.hpp b/include/graph/io/dimacs.hpp new file mode 100644 index 0000000..6522839 --- /dev/null +++ b/include/graph/io/dimacs.hpp @@ -0,0 +1,214 @@ +/** + * @file dimacs.hpp + * @brief DIMACS graph I/O — write and read. + * + * Provides: + * - write_dimacs(os, g, problem = "sp") Generic DIMACS arc list + * - write_dimacs_max_flow(os, g, src, snk, capfn) DIMACS max-flow problem + * - read_dimacs(is) Parse DIMACS into dimacs_graph + * + * The DIMACS family of formats is line oriented; each line begins with a + * single character describing its kind: + * + * c comment line (ignored) + * p problem line (exactly one, first) + * n node descriptor (e.g. source/sink for max) + * a [] arc descriptor (1-indexed endpoints) + * e edge descriptor (clique / coloring format) + * + * Vertex ids in DIMACS files are 1-indexed. The reader normalizes endpoints + * to 0-indexed ids in the returned structure so they can be used directly + * with graph-v3 containers. + * + * Reference (max-flow): ftp://dimacs.rutgers.edu/pub/netflow/general-info/ + * + * NOTE: Self-contained — no external dependencies. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace graph::io { + +// --------------------------------------------------------------------------- +// write_dimacs — generic arc list (shortest-path style: "p sp n m") +// --------------------------------------------------------------------------- + +/** + * @brief Write a graph in generic DIMACS arc-list format. + * + * Emits a problem line `p ` followed by one `a u v [w]` + * line per edge. Endpoints are written 1-indexed (DIMACS convention). When + * the edge value type is formattable it is written as the arc weight. + * + * @param os Output stream. + * @param g Graph satisfying adjacency_list. + * @param problem Problem descriptor placed on the `p` line (e.g. "sp", "max"). + */ +template +void write_dimacs(std::ostream& os, const G& g, std::string_view problem = "sp") { + auto n = static_cast(num_vertices(g)); + + std::uint64_t m = 0; + for (auto u : vertices(g)) { + for (auto uv : edges(g, u)) { + (void)uv; + ++m; + } + } + + os << "c Generated by graph-v3\n"; + os << "p " << problem << ' ' << n << ' ' << m << '\n'; + + for (auto u : vertices(g)) { + auto uid = static_cast(vertex_id(g, u)); + for (auto uv : edges(g, u)) { + auto tid = static_cast(target_id(g, uv)); + os << "a " << (uid + 1) << ' ' << (tid + 1); + if constexpr (detail::has_edge_value) { + using EV = std::remove_cvref_t; + if constexpr (detail::formattable) { + os << ' ' << std::format("{}", graph::edge_value(g, uv)); + } + } + os << '\n'; + } + } +} + +// --------------------------------------------------------------------------- +// write_dimacs_max_flow — max-flow problem with source/sink descriptors +// --------------------------------------------------------------------------- + +/** + * @brief Write a graph in DIMACS max-flow format. + * + * Emits: + * p max + * n s + * n t + * a u v (one per edge) + * + * @param os Output stream. + * @param g Graph satisfying adjacency_list. + * @param source 0-indexed source vertex id. + * @param sink 0-indexed sink vertex id. + * @param capacity_fn Callable (const G&, edge_t) -> capacity value. + */ +template +void write_dimacs_max_flow(std::ostream& os, const G& g, + vertex_id_t source, vertex_id_t sink, + CapacityFn capacity_fn) { + auto n = static_cast(num_vertices(g)); + + std::uint64_t m = 0; + for (auto u : vertices(g)) { + for (auto uv : edges(g, u)) { + (void)uv; + ++m; + } + } + + os << "c Generated by graph-v3\n"; + os << "p max " << n << ' ' << m << '\n'; + os << "n " << (static_cast(source) + 1) << " s\n"; + os << "n " << (static_cast(sink) + 1) << " t\n"; + + for (auto u : vertices(g)) { + auto uid = static_cast(vertex_id(g, u)); + for (auto uv : edges(g, u)) { + auto tid = static_cast(target_id(g, uv)); + os << "a " << (uid + 1) << ' ' << (tid + 1) << ' ' + << std::format("{}", capacity_fn(g, uv)) << '\n'; + } + } +} + +// --------------------------------------------------------------------------- +// read_dimacs — parser +// --------------------------------------------------------------------------- + +/// A single parsed DIMACS arc/edge (endpoints normalized to 0-indexed). +struct dimacs_edge { + std::uint64_t source{0}; + std::uint64_t target{0}; + std::string weight; ///< Arc weight/capacity as text (empty if none) +}; + +/// A parsed DIMACS node descriptor (id normalized to 0-indexed). +struct dimacs_node { + std::uint64_t id{0}; + std::string designation; ///< e.g. "s" (source) or "t" (sink) +}; + +/// Parsed DIMACS graph. +struct dimacs_graph { + std::string problem; ///< Problem type from the `p` line + std::uint64_t num_vertices{0}; ///< Declared vertex count + std::uint64_t num_arcs{0}; ///< Declared arc/edge count + std::vector nodes; ///< Node descriptors (`n` lines) + std::vector edges; ///< Arc/edge descriptors +}; + +/** + * @brief Parse a DIMACS file (max-flow, shortest-path, or edge format). + * + * Recognizes `c`, `p`, `n`, `a`, and `e` line kinds. Endpoints are converted + * from the file's 1-indexed convention to 0-indexed ids. Unknown line kinds + * are ignored. + * + * @param is Input stream containing DIMACS text. + * @return Parsed dimacs_graph. + */ +inline dimacs_graph read_dimacs(std::istream& is) { + dimacs_graph result; + std::string line; + + while (std::getline(is, line)) { + std::istringstream ls(line); + char kind = 0; + if (!(ls >> kind)) continue; // blank line + + switch (kind) { + case 'c': + break; // comment + case 'p': { + ls >> result.problem >> result.num_vertices >> result.num_arcs; + break; + } + case 'n': { + std::uint64_t id = 0; + std::string designation; + ls >> id >> designation; + result.nodes.push_back({id > 0 ? id - 1 : 0, std::move(designation)}); + break; + } + case 'a': + case 'e': { + std::uint64_t u = 0, v = 0; + ls >> u >> v; + std::string weight; + ls >> weight; // optional; empty if absent + result.edges.push_back({u > 0 ? u - 1 : 0, v > 0 ? v - 1 : 0, std::move(weight)}); + break; + } + default: + break; // ignore unknown line kinds + } + } + + return result; +} + +} // namespace graph::io diff --git a/include/graph/io/metis.hpp b/include/graph/io/metis.hpp new file mode 100644 index 0000000..602143f --- /dev/null +++ b/include/graph/io/metis.hpp @@ -0,0 +1,214 @@ +/** + * @file metis.hpp + * @brief METIS graph I/O — write and read. + * + * Provides: + * - write_metis(os, g) Emit a graph in METIS adjacency format + * - read_metis(is) Parse a METIS file into metis_graph + * + * The METIS graph format describes an *undirected* graph: + * + * % comment line (ignored) + * [fmt] [ncon] header: vertices, edges, format flags + * one line per vertex (1-indexed) + * + * ... + * + * `m` counts each undirected edge once. The optional 3-digit `fmt` flag + * encodes whether vertex sizes / vertex weights / edge weights are present + * (hundreds = vertex sizes, tens = vertex weights, ones = edge weights). + * When edge weights are present, each neighbour is followed by its weight. + * + * The writer treats the input graph as undirected: each edge (u,v) contributes + * v to u's list and u to v's list, deduplicated, so the output is symmetric. + * The reader normalizes neighbour ids from the file's 1-indexed convention to + * 0-indexed ids. + * + * Reference: METIS manual, "Graph Input File" section. + * + * NOTE: Self-contained — no external dependencies. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace graph::io { + +// --------------------------------------------------------------------------- +// write_metis +// --------------------------------------------------------------------------- + +/** + * @brief Write a graph in METIS adjacency format (treated as undirected). + * + * Builds a symmetric adjacency: every edge (u,v) lists v under u and u under v + * (deduplicated, no self-loops). Vertex ids are written 1-indexed. When + * @p with_weights is true and the edge value type is formattable, each + * neighbour is followed by its weight and the `fmt` header flag is set to 001. + * + * @param os Output stream. + * @param g Graph satisfying adjacency_list. + * @param with_weights Emit edge weights (requires a formattable edge value). + */ +template +void write_metis(std::ostream& os, const G& g, bool with_weights = false) { + const auto n = static_cast(num_vertices(g)); + + // Build symmetric, deduplicated adjacency. Store (neighbour, weight-text). + std::vector>> adj(n); + std::vector> seen(n); + + bool emit_weights = false; + if constexpr (detail::has_edge_value) { + using EV = std::remove_cvref_t>()))>; + emit_weights = with_weights && detail::formattable; + } + + auto add = [&](std::uint64_t a, std::uint64_t b, const std::string& w) { + if (a == b) return; // skip self-loops (invalid in METIS) + if (a >= n) return; + if (seen[a].insert(b).second) adj[a].push_back({b, w}); + }; + + for (auto u : vertices(g)) { + auto uid = static_cast(vertex_id(g, u)); + for (auto uv : edges(g, u)) { + auto tid = static_cast(target_id(g, uv)); + std::string w; + if constexpr (detail::has_edge_value) { + using EV = std::remove_cvref_t; + if constexpr (detail::formattable) { + if (emit_weights) w = std::format("{}", graph::edge_value(g, uv)); + } + } + add(uid, tid, w); + add(tid, uid, w); + } + } + + std::uint64_t edge_count = 0; + for (const auto& a : adj) edge_count += a.size(); + edge_count /= 2; // each undirected edge counted twice + + os << "% Generated by graph-v3\n"; + os << n << ' ' << edge_count; + if (emit_weights) os << " 001"; + os << '\n'; + + for (std::size_t i = 0; i < n; ++i) { + bool first = true; + for (const auto& [nbr, w] : adj[i]) { + if (!first) os << ' '; + first = false; + os << (nbr + 1); // 1-indexed + if (emit_weights) os << ' ' << (w.empty() ? std::string("1") : w); + } + os << '\n'; + } +} + +// --------------------------------------------------------------------------- +// read_metis +// --------------------------------------------------------------------------- + +/// A single METIS adjacency entry (neighbour normalized to 0-indexed). +struct metis_adjacency { + std::uint64_t neighbor{0}; + std::string weight; ///< Edge weight as text (empty if format has none) +}; + +/// Parsed METIS graph. +struct metis_graph { + std::uint64_t num_vertices{0}; ///< Declared vertex count + std::uint64_t num_edges{0}; ///< Declared (undirected) edge count + int fmt{0}; ///< Format flag from the header (0 if absent) + int ncon{0}; ///< Number of vertex weights (0 if absent) + /// adjacency[i] holds the neighbours of vertex i (0-indexed). + std::vector> adjacency; + /// vertex_weights[i] holds the weights of vertex i (empty if none). + std::vector> vertex_weights; +}; + +/** + * @brief Parse a METIS graph file into a metis_graph structure. + * + * Handles the optional 3-digit `fmt` flag (vertex sizes / vertex weights / + * edge weights) and the optional `ncon` field. Lines beginning with `%` are + * comments. Neighbour ids are converted from 1-indexed to 0-indexed. + * + * @param is Input stream containing METIS text. + * @return Parsed metis_graph. + */ +inline metis_graph read_metis(std::istream& is) { + metis_graph result; + std::string line; + + auto next_data_line = [&](std::string& out) -> bool { + while (std::getline(is, out)) { + auto first = out.find_first_not_of(" \t\r"); + if (first == std::string::npos) continue; // blank + if (out[first] == '%') continue; // comment + return true; + } + return false; + }; + + // Header line. + if (!next_data_line(line)) return result; + { + std::istringstream hs(line); + hs >> result.num_vertices >> result.num_edges; + hs >> result.fmt; // optional + hs >> result.ncon; // optional + } + + const bool has_vertex_sizes = (result.fmt / 100) % 10 != 0; + const bool has_vertex_weight = (result.fmt / 10) % 10 != 0; + const bool has_edge_weight = (result.fmt % 10) != 0; + const int ncon = result.ncon > 0 ? result.ncon : (has_vertex_weight ? 1 : 0); + + result.adjacency.resize(result.num_vertices); + result.vertex_weights.resize(result.num_vertices); + + for (std::uint64_t i = 0; i < result.num_vertices; ++i) { + if (!next_data_line(line)) break; + std::istringstream ls(line); + + if (has_vertex_sizes) { + std::uint64_t vsize = 0; + ls >> vsize; // consume and discard vertex size + } + for (int c = 0; c < ncon; ++c) { + std::uint64_t vw = 0; + if (ls >> vw) result.vertex_weights[i].push_back(vw); + } + + std::uint64_t nbr = 0; + while (ls >> nbr) { + metis_adjacency entry; + entry.neighbor = nbr > 0 ? nbr - 1 : 0; + if (has_edge_weight) { + std::string w; + ls >> w; + entry.weight = std::move(w); + } + result.adjacency[i].push_back(std::move(entry)); + } + } + + return result; +} + +} // namespace graph::io diff --git a/tests/io/test_io.cpp b/tests/io/test_io.cpp index 8b60f19..39d50b4 100644 --- a/tests/io/test_io.cpp +++ b/tests/io/test_io.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -365,3 +366,247 @@ TEST_CASE("write_graphml: XML-escapes values", "[io][graphml]") { REQUIRE(output.find("a < b & c > d") != std::string::npos); } + +// =========================================================================== +// DIMACS tests +// =========================================================================== + +TEST_CASE("write_dimacs: generic arc list", "[io][dimacs]") { + auto g = make_test_graph(); + std::ostringstream os; + write_dimacs(os, g); + std::string output = os.str(); + + REQUIRE(output.find("p sp 3 3") != std::string::npos); + // 1-indexed endpoints: 0->1 becomes "a 1 2", weight 1.5 + REQUIRE(output.find("a 1 2 1.5") != std::string::npos); + REQUIRE(output.find("a 1 3 2.5") != std::string::npos); + REQUIRE(output.find("a 2 3 3.5") != std::string::npos); +} + +TEST_CASE("write_dimacs: custom problem type", "[io][dimacs]") { + auto g = make_plain_graph(); + std::ostringstream os; + write_dimacs(os, g, "max"); + std::string output = os.str(); + + REQUIRE(output.find("p max 3 4") != std::string::npos); +} + +TEST_CASE("write_dimacs_max_flow: source/sink descriptors", "[io][dimacs]") { + auto g = make_test_graph(); + std::ostringstream os; + write_dimacs_max_flow(os, g, 0u, 2u, + [](const auto& gr, auto uv) { return edge_value(gr, uv); }); + std::string output = os.str(); + + REQUIRE(output.find("p max 3 3") != std::string::npos); + REQUIRE(output.find("n 1 s") != std::string::npos); + REQUIRE(output.find("n 3 t") != std::string::npos); + REQUIRE(output.find("a 1 2 1.5") != std::string::npos); +} + +TEST_CASE("read_dimacs: parse max-flow problem", "[io][dimacs]") { + std::istringstream is(R"(c sample max-flow problem +p max 4 5 +n 1 s +n 4 t +a 1 2 10 +a 1 3 5 +a 2 4 8 +a 3 4 7 +a 2 3 3 +)"); + + auto result = read_dimacs(is); + REQUIRE(result.problem == "max"); + REQUIRE(result.num_vertices == 4); + REQUIRE(result.num_arcs == 5); + REQUIRE(result.nodes.size() == 2); + // ids normalized to 0-indexed + REQUIRE(result.nodes[0].id == 0); + REQUIRE(result.nodes[0].designation == "s"); + REQUIRE(result.nodes[1].id == 3); + REQUIRE(result.nodes[1].designation == "t"); + REQUIRE(result.edges.size() == 5); + REQUIRE(result.edges[0].source == 0); + REQUIRE(result.edges[0].target == 1); + REQUIRE(result.edges[0].weight == "10"); +} + +TEST_CASE("read_dimacs: edge format (e lines)", "[io][dimacs]") { + std::istringstream is(R"(c clique format +p edge 3 2 +e 1 2 +e 2 3 +)"); + + auto result = read_dimacs(is); + REQUIRE(result.problem == "edge"); + REQUIRE(result.edges.size() == 2); + REQUIRE(result.edges[0].source == 0); + REQUIRE(result.edges[0].target == 1); + REQUIRE(result.edges[1].source == 1); + REQUIRE(result.edges[1].target == 2); + REQUIRE(result.edges[0].weight.empty()); +} + +TEST_CASE("DIMACS roundtrip: write then read", "[io][dimacs]") { + auto g = make_test_graph(); + std::ostringstream oss; + write_dimacs(oss, g); + + std::istringstream iss(oss.str()); + auto parsed = read_dimacs(iss); + + REQUIRE(parsed.num_vertices == 3); + REQUIRE(parsed.num_arcs == 3); + REQUIRE(parsed.edges.size() == 3); + REQUIRE(parsed.edges[0].source == 0); + REQUIRE(parsed.edges[0].target == 1); +} + +// =========================================================================== +// METIS tests +// =========================================================================== + +TEST_CASE("write_metis: undirected adjacency", "[io][metis]") { + auto g = make_plain_graph(); // 0->1, 0->2, 1->2, 2->0 + std::ostringstream os; + write_metis(os, g); + std::string output = os.str(); + + // 3 vertices; undirected edges {0,1},{0,2},{1,2} => 3 edges + REQUIRE(output.find("3 3") != std::string::npos); + // vertex 1 (id 0) is adjacent to 2 and 3 (ids 1,2) + std::istringstream iss(output); + std::string line; + std::getline(iss, line); // comment + std::getline(iss, line); // header + std::getline(iss, line); // vertex 1 line + REQUIRE(line.find("2") != std::string::npos); + REQUIRE(line.find("3") != std::string::npos); +} + +TEST_CASE("write_metis: with edge weights", "[io][metis]") { + auto g = make_test_graph(); + std::ostringstream os; + write_metis(os, g, /*with_weights=*/true); + std::string output = os.str(); + + REQUIRE(output.find("001") != std::string::npos); // fmt flag + REQUIRE(output.find("1.5") != std::string::npos); +} + +TEST_CASE("read_metis: unweighted graph", "[io][metis]") { + std::istringstream is(R"(% sample +4 5 +2 3 +1 3 4 +1 2 4 +2 3 +)"); + + auto result = read_metis(is); + REQUIRE(result.num_vertices == 4); + REQUIRE(result.num_edges == 5); + REQUIRE(result.adjacency.size() == 4); + // vertex 0 neighbours: 2,3 (file) -> 1,2 (0-indexed) + REQUIRE(result.adjacency[0].size() == 2); + REQUIRE(result.adjacency[0][0].neighbor == 1); + REQUIRE(result.adjacency[0][1].neighbor == 2); + // vertex 1 neighbours: 1,3,4 -> 0,2,3 + REQUIRE(result.adjacency[1].size() == 3); + REQUIRE(result.adjacency[1][0].neighbor == 0); + REQUIRE(result.adjacency[1][2].neighbor == 3); +} + +TEST_CASE("read_metis: weighted graph (fmt=001)", "[io][metis]") { + std::istringstream is(R"(% weighted +3 2 001 +2 5 3 7 +1 5 +1 7 +)"); + + auto result = read_metis(is); + REQUIRE(result.num_vertices == 3); + REQUIRE(result.fmt == 1); + REQUIRE(result.adjacency[0].size() == 2); + REQUIRE(result.adjacency[0][0].neighbor == 1); + REQUIRE(result.adjacency[0][0].weight == "5"); + REQUIRE(result.adjacency[0][1].neighbor == 2); + REQUIRE(result.adjacency[0][1].weight == "7"); +} + +TEST_CASE("METIS roundtrip: write then read", "[io][metis]") { + auto g = make_plain_graph(); + std::ostringstream oss; + write_metis(oss, g); + + std::istringstream iss(oss.str()); + auto parsed = read_metis(iss); + + REQUIRE(parsed.num_vertices == 3); + REQUIRE(parsed.num_edges == 3); + // symmetric: total adjacency entries == 2 * edges + std::uint64_t total = 0; + for (const auto& a : parsed.adjacency) total += a.size(); + REQUIRE(total == 6); +} + +// =========================================================================== +// Adjacency List Text tests +// =========================================================================== + +TEST_CASE("write_adjacency_list_text: structure dump", "[io][adjtext]") { + auto g = make_plain_graph(); // 0->1, 0->2, 1->2, 2->0 + std::ostringstream os; + write_adjacency_list_text(os, g); + std::string output = os.str(); + + REQUIRE(output.find("0: 1 2") != std::string::npos); + REQUIRE(output.find("1: 2") != std::string::npos); + REQUIRE(output.find("2: 0") != std::string::npos); +} + +TEST_CASE("read_adjacency_list_text: parse with colon", "[io][adjtext]") { + std::istringstream is(R"(0: 1 2 +1: 2 +2: 0 3 +3: +)"); + + auto result = read_adjacency_list_text(is); + REQUIRE(result.vertex_ids.size() == 4); + REQUIRE(result.edges.size() == 5); + REQUIRE(result.edges[0].source == "0"); + REQUIRE(result.edges[0].target == "1"); + REQUIRE(result.edges[4].source == "2"); + REQUIRE(result.edges[4].target == "3"); +} + +TEST_CASE("read_adjacency_list_text: whitespace-only separator", "[io][adjtext]") { + std::istringstream is(R"(A B C +B C +C A +)"); + + auto result = read_adjacency_list_text(is); + REQUIRE(result.vertex_ids.size() == 3); + REQUIRE(result.edges.size() == 4); + REQUIRE(result.edges[0].source == "A"); + REQUIRE(result.edges[0].target == "B"); +} + +TEST_CASE("Adjacency List Text roundtrip: write then read", "[io][adjtext]") { + auto g = make_plain_graph(); + std::ostringstream oss; + write_adjacency_list_text(oss, g); + + std::istringstream iss(oss.str()); + auto parsed = read_adjacency_list_text(iss); + + REQUIRE(parsed.vertex_ids.size() == 3); + REQUIRE(parsed.edges.size() == 4); +} From bae7f74032889ffd86e80c3b3b1e79c6916e8df9 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Mon, 1 Jun 2026 10:20:22 -0400 Subject: [PATCH 5/6] Fix MSVC debug EHsc and warning-as-error test/build issues --- cmake/StandardProjectSettings.cmake | 11 ++++++++++- include/graph/container/compressed_graph.hpp | 6 +++--- include/graph/io/adjacency_list_text.hpp | 14 +++++++------- .../dynamic_graph/test_dynamic_graph_dod.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_dofl.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_dol.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_dov.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_vod.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_vofl.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_vol.cpp | 8 ++++---- .../dynamic_graph/test_dynamic_graph_vov.cpp | 8 ++++---- 11 files changed, 52 insertions(+), 43 deletions(-) diff --git a/cmake/StandardProjectSettings.cmake b/cmake/StandardProjectSettings.cmake index 6065148..4581e33 100644 --- a/cmake/StandardProjectSettings.cmake +++ b/cmake/StandardProjectSettings.cmake @@ -15,7 +15,16 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Enable folder organization in IDEs set_property(GLOBAL PROPERTY USE_FOLDERS ON) -# Enable parallel compilation for MSVC (cl.exe only; clang-cl doesn't support /MP) +# MSVC exception model: always enable standard C++ EH unwind semantics. +# +# This avoids warning C4530 ("C++ exception handler used, but unwind semantics +# are not enabled") in Debug builds, especially from third-party targets such as +# Catch2, and keeps behavior consistent across all targets. +if(MSVC) + add_compile_options(/EHsc) +endif() + +# Enable parallel compilation for MSVC cl.exe (clang-cl doesn't support /MP) if(MSVC AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # Use /MP to enable parallel compilation with all available cores add_compile_options(/MP) diff --git a/include/graph/container/compressed_graph.hpp b/include/graph/container/compressed_graph.hpp index 7907f01..fe4a312 100644 --- a/include/graph/container/compressed_graph.hpp +++ b/include/graph/container/compressed_graph.hpp @@ -965,8 +965,8 @@ class compressed_graph_base if (id >= size()) return std::views::iota(edge_index_type{0}, edge_index_type{0}); - auto start_idx = row_index_[id].index; - auto end_idx = row_index_[id + 1].index; + auto start_idx = row_index_[static_cast(id)].index; + auto end_idx = row_index_[static_cast(id + 1)].index; return std::views::iota(start_idx, end_idx); } @@ -1018,7 +1018,7 @@ class compressed_graph_base * @note No bounds checking is performed. The caller must ensure edge_id is valid. */ [[nodiscard]] constexpr vertex_id_type target_id(edge_id_type edge_id) const noexcept { - return col_index_[edge_id].index; + return col_index_[static_cast(edge_id)].index; } /** diff --git a/include/graph/io/adjacency_list_text.hpp b/include/graph/io/adjacency_list_text.hpp index 3a00667..86571e9 100644 --- a/include/graph/io/adjacency_list_text.hpp +++ b/include/graph/io/adjacency_list_text.hpp @@ -103,15 +103,15 @@ inline adjacency_list_text_graph read_adjacency_list_text(std::istream& is) { if (colon != std::string::npos) line[colon] = ' '; std::istringstream ls(line); - std::string source; - if (!(ls >> source)) continue; // blank line + std::string src; + if (!(ls >> src)) continue; // blank line - ensure_vertex(source); + ensure_vertex(src); - std::string target; - while (ls >> target) { - ensure_vertex(target); - result.edges.push_back({source, target}); + std::string dst; + while (ls >> dst) { + ensure_vertex(dst); + result.edges.push_back({src, dst}); } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_dod.cpp b/tests/container/dynamic_graph/test_dynamic_graph_dod.cpp index cb30c4c..8792a27 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_dod.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_dod.cpp @@ -1890,8 +1890,8 @@ TEST_CASE("dod graph properties and queries", "[dynamic_graph][dod][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2485,8 +2485,8 @@ TEST_CASE("dod complete workflow scenarios", "[dynamic_graph][dod][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_dofl.cpp b/tests/container/dynamic_graph/test_dynamic_graph_dofl.cpp index 3820e3d..3ade9ff 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_dofl.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_dofl.cpp @@ -215,8 +215,8 @@ TEST_CASE("dofl properties", "[dynamic_graph][dofl][properties]") { } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2542,8 +2542,8 @@ TEST_CASE("dofl complete workflow scenarios", "[dynamic_graph][dofl][workflow]") } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_dol.cpp b/tests/container/dynamic_graph/test_dynamic_graph_dol.cpp index 7d12a85..f8d06c2 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_dol.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_dol.cpp @@ -1913,8 +1913,8 @@ TEST_CASE("dol graph properties and queries", "[dynamic_graph][dol][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2508,8 +2508,8 @@ TEST_CASE("dol complete workflow scenarios", "[dynamic_graph][dol][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_dov.cpp b/tests/container/dynamic_graph/test_dynamic_graph_dov.cpp index 8427f00..454a06d 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_dov.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_dov.cpp @@ -1895,8 +1895,8 @@ TEST_CASE("dov graph properties and queries", "[dynamic_graph][dov][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2490,8 +2490,8 @@ TEST_CASE("dov complete workflow scenarios", "[dynamic_graph][dov][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_vod.cpp b/tests/container/dynamic_graph/test_dynamic_graph_vod.cpp index 781b3c1..e212df1 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_vod.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_vod.cpp @@ -1901,8 +1901,8 @@ TEST_CASE("vod graph properties and queries", "[dynamic_graph][vod][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2496,8 +2496,8 @@ TEST_CASE("vod complete workflow scenarios", "[dynamic_graph][vod][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_vofl.cpp b/tests/container/dynamic_graph/test_dynamic_graph_vofl.cpp index 65d7ca5..174172a 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_vofl.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_vofl.cpp @@ -215,8 +215,8 @@ TEST_CASE("vofl properties", "[dynamic_graph][vofl][properties]") { } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2542,8 +2542,8 @@ TEST_CASE("vofl complete workflow scenarios", "[dynamic_graph][vofl][workflow]") } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_vol.cpp b/tests/container/dynamic_graph/test_dynamic_graph_vol.cpp index ad6c911..4bb156f 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_vol.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_vol.cpp @@ -1913,8 +1913,8 @@ TEST_CASE("vol graph properties and queries", "[dynamic_graph][vol][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2508,8 +2508,8 @@ TEST_CASE("vol complete workflow scenarios", "[dynamic_graph][vol][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } diff --git a/tests/container/dynamic_graph/test_dynamic_graph_vov.cpp b/tests/container/dynamic_graph/test_dynamic_graph_vov.cpp index 932d36c..12efd45 100644 --- a/tests/container/dynamic_graph/test_dynamic_graph_vov.cpp +++ b/tests/container/dynamic_graph/test_dynamic_graph_vov.cpp @@ -1894,8 +1894,8 @@ TEST_CASE("vov graph properties and queries", "[dynamic_graph][vov][properties]" } REQUIRE(sinks.size() == 2); - REQUIRE(std::find(sinks.begin(), sinks.end(), 2) != sinks.end()); - REQUIRE(std::find(sinks.begin(), sinks.end(), 3) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{2}) != sinks.end()); + REQUIRE(std::find(sinks.begin(), sinks.end(), size_t{3}) != sinks.end()); } SECTION("compute out-degree for each vertex") { @@ -2489,8 +2489,8 @@ TEST_CASE("vov complete workflow scenarios", "[dynamic_graph][vov][workflow]") { } REQUIRE(ready_tasks.size() == 2); - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 0) != ready_tasks.end()); // Task A - REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), 3) != ready_tasks.end()); // Task D + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{0}) != ready_tasks.end()); // Task A + REQUIRE(std::find(ready_tasks.begin(), ready_tasks.end(), size_t{3}) != ready_tasks.end()); // Task D } } From b328f3d1539947158155f5d61e4914830d7aea30 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Mon, 1 Jun 2026 10:28:05 -0400 Subject: [PATCH 6/6] Move PageRank into examples and update CMake --- examples/CMakeLists.txt | 1 + examples/PageRank/CMakeLists.txt | 7 +++++++ {PageRank => examples/PageRank}/pagerank.hpp | 0 {PageRank => examples/PageRank}/pagerank_tests.cpp | 0 4 files changed, 8 insertions(+) create mode 100644 examples/PageRank/CMakeLists.txt rename {PageRank => examples/PageRank}/pagerank.hpp (100%) rename {PageRank => examples/PageRank}/pagerank_tests.cpp (100%) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 18f9034..801d964 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -13,6 +13,7 @@ target_include_directories(dijkstra_example PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(CppCon2021) add_subdirectory(CppCon2022) add_subdirectory(BGLWorkshop2026) +add_subdirectory(PageRank) # BGL adaptor example (requires Boost headers) option(BUILD_BGL_EXAMPLES "Build BGL adaptor examples (requires Boost headers)" OFF) diff --git a/examples/PageRank/CMakeLists.txt b/examples/PageRank/CMakeLists.txt new file mode 100644 index 0000000..aba9017 --- /dev/null +++ b/examples/PageRank/CMakeLists.txt @@ -0,0 +1,7 @@ +add_library(pagerank_example INTERFACE) +target_link_libraries(pagerank_example INTERFACE graph3) +target_sources(pagerank_example INTERFACE + FILE_SET HEADERS + BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + FILES ${CMAKE_CURRENT_SOURCE_DIR}/pagerank.hpp +) diff --git a/PageRank/pagerank.hpp b/examples/PageRank/pagerank.hpp similarity index 100% rename from PageRank/pagerank.hpp rename to examples/PageRank/pagerank.hpp diff --git a/PageRank/pagerank_tests.cpp b/examples/PageRank/pagerank_tests.cpp similarity index 100% rename from PageRank/pagerank_tests.cpp rename to examples/PageRank/pagerank_tests.cpp