From 14e8ecb109ca1f43ad71cde3560b030fec62cced Mon Sep 17 00:00:00 2001 From: Belyanin Georgiy Date: Thu, 4 Jul 2024 15:51:30 +0300 Subject: [PATCH 1/4] Add regular path query algorithm This commit adds an implementation of the regular path query algorithm based on linear-algebra graph processing approach. The algorithm finds a set of nodes in a edge-labelled directed graph. These nodes are reachable by paths starting from one of source nodes and having edges labels conform a word from the specified regular language. This algorithm is based on the bread-first-search algorithm over the adjacency matrices. Regular languages are defined by non-deterministic finite automaton. The algorithm considers the paths on which "label words" are accepted by the specified NFA. The algorithm is used with the following inputs: * A regular automaton adjacency matrix decomposition. * A graph adjacency matrix decomposition. * An array of the starting node indices. It results with a vector, having v[i] = 1 iff the node is reachable by a path satisfying the provided regular constraints. --- experimental/algorithm/LAGraph_RegularPathQuery.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/experimental/algorithm/LAGraph_RegularPathQuery.c b/experimental/algorithm/LAGraph_RegularPathQuery.c index 8173daadb6..6a528f18e5 100644 --- a/experimental/algorithm/LAGraph_RegularPathQuery.c +++ b/experimental/algorithm/LAGraph_RegularPathQuery.c @@ -255,7 +255,11 @@ int LAGraph_RegularPathQuery } // Check source nodes in the graph +<<<<<<< HEAD for (size_t i = 0 ; i < ns ; i++) +======= + for (GrB_Index i = 0; i < ns; i++) +>>>>>>> 30e086c8 (Add regular path query algorithm) { GrB_Index s = S [i] ; LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; From d7b36c8cd31749166aa0a1a558b18b8e0a442994 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sat, 7 Dec 2024 18:36:50 +0300 Subject: [PATCH 2/4] Make the RPQ algorithm evaluate 2-RPQs This patch is used to make the regular path query algorithm work with 2-RPQs. 2-RPQs represent RPQs extended with possibility of traversing graphs into the directions opposite to the presented edges. E.g. SPARQL 2-RPQ `Alice ^ ?x` could be used to find Alice and all of her sisters by getting all Alice mother's daughters. 2-RPQ support is provided by adding two extra parameters to the RPQ algorithm. One of them is used to specify some of the provided labels as inversed. The second one inverses the whole query allowing to execute single-destination RPQs (e.g. `?x Bob` gets Bob's parents). --- .../algorithm/LAGraph_RegularPathQuery.c | 74 ++++++++++++++----- experimental/test/test_RegularPathQuery.c | 42 +++++++++-- include/LAGraphX.h | 4 +- 3 files changed, 97 insertions(+), 23 deletions(-) diff --git a/experimental/algorithm/LAGraph_RegularPathQuery.c b/experimental/algorithm/LAGraph_RegularPathQuery.c index 6a528f18e5..8e117f7fa8 100644 --- a/experimental/algorithm/LAGraph_RegularPathQuery.c +++ b/experimental/algorithm/LAGraph_RegularPathQuery.c @@ -1,5 +1,5 @@ //------------------------------------------------------------------------------ -// LAGraph_RegularPathQuery.c: regular path query +// LAGraph_2RegularPathQuery.c: 2-way regular path query //------------------------------------------------------------------------------ // // LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. @@ -10,6 +10,9 @@ //------------------------------------------------------------------------------ +// TODO: This is a copy-pasted description of the original RPQ algorithm with +// support for 2-RPQs. +// // For an edge-labelled directed graph the algorithm computes the set of nodes // for which these conditions are held: // * The node is reachable by a path from one of the source nodes. @@ -96,7 +99,7 @@ #include "LG_internal.h" #include "LAGraphX.h" -int LAGraph_RegularPathQuery +int LAGraph_2RegularPathQuery ( // output: GrB_Vector *reachable, // reachable(i) = true if node i is reachable @@ -105,6 +108,7 @@ int LAGraph_RegularPathQuery // input: LAGraph_Graph *R, // input non-deterministic finite automaton // adjacency matrix decomposition + bool *inverse_labels, // inversed labels size_t nl, // total label count, # of matrices graph and // NFA adjacency matrix decomposition const GrB_Index *QS, // starting states in NFA @@ -114,6 +118,7 @@ int LAGraph_RegularPathQuery LAGraph_Graph *G, // input graph adjacency matrix decomposition const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices + bool inverse, // inverse the whole query char *msg // LAGraph output message ) { @@ -137,6 +142,7 @@ int LAGraph_RegularPathQuery GrB_Index ng = 0 ; // # nodes in the graph GrB_Index nr = 0 ; // # states in the NFA + GrB_Index nv = 0 ; // # pair count in the frontier GrB_Index states = ns ; // # pairs in the current // correspondence between the graph and // the NFA @@ -146,6 +152,7 @@ int LAGraph_RegularPathQuery GrB_Index vals = 0 ; // utility matrix value count GrB_Matrix *A = NULL ; + GrB_Matrix *AT = NULL ; GrB_Matrix *B = NULL ; GrB_Matrix *BT = NULL ; @@ -169,16 +176,29 @@ int LAGraph_RegularPathQuery } LG_TRY (LAGraph_Malloc ((void **) &A, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &AT, nl, sizeof (GrB_Matrix), msg)) ; for (size_t i = 0 ; i < nl ; i++) { if (G[i] == NULL) { A[i] = NULL ; + AT[i] = NULL ; continue ; } A[i] = G[i]->A ; + if (G[i]->kind == LAGraph_ADJACENCY_UNDIRECTED || + G[i]->is_symmetric_structure == LAGraph_TRUE) + { + AT[i] = A[i] ; + } + else + { + // AT[i] could be NULL and the matrix will be transposed by a + // descriptor + AT[i] = G[i]->AT ; + } } LG_TRY (LAGraph_Malloc ((void **) &B, nl, sizeof (GrB_Matrix), msg)) ; @@ -191,6 +211,7 @@ int LAGraph_RegularPathQuery if (R[i] == NULL) { B[i] = NULL ; + BT[i] = NULL ; continue ; } @@ -255,11 +276,7 @@ int LAGraph_RegularPathQuery } // Check source nodes in the graph -<<<<<<< HEAD for (size_t i = 0 ; i < ns ; i++) -======= - for (GrB_Index i = 0; i < ns; i++) ->>>>>>> 30e086c8 (Add regular path query algorithm) { GrB_Index s = S [i] ; LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; @@ -318,20 +335,43 @@ int LAGraph_RegularPathQuery // Traverse the NFA // Try to use a provided transposed matrix or use the descriptor - if (BT[i] != NULL) - { - GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, - GrB_LOR_LAND_SEMIRING_BOOL, BT[i], frontier, GrB_DESC_R)) ; - } - else - { - GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, - GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_RT0)) ; + if (!inverse) { + if (BT[i] != NULL) + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + GrB_LOR_LAND_SEMIRING_BOOL, BT[i], frontier, GrB_DESC_R)) ; + } + else + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_RT0)) ; + } + } else { + GRB_TRY (GrB_mxm (symbol_frontier, NULL, NULL, GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_R )) ; } + GrB_Matrix_nvals( &nv, symbol_frontier); + if (nv == 0) + continue; + // Traverse the graph - GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, - GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + if (!inverse_labels[i]) { + if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + } else if (AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, AT[i], GrB_DESC_SC)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SCT1)) ; + } + } else { + if (!inverse && AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, AT[i], GrB_DESC_SC)) ; + } else if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SCT1)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ; + } + } } // Accumulate the new state <-> node correspondence diff --git a/experimental/test/test_RegularPathQuery.c b/experimental/test/test_RegularPathQuery.c index f5e04c20a3..f2e0981fd0 100644 --- a/experimental/test/test_RegularPathQuery.c +++ b/experimental/test/test_RegularPathQuery.c @@ -211,11 +211,43 @@ void test_RegularPathQueryBasic (void) OK (LAGraph_Delete (&(G[i]), msg)) ; } - for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) - { - if (R[i] == NULL) continue ; - OK (LAGraph_Delete (&(R[i]), msg)) ; - } + // TODO: Use 2RPQ HERE. + //OK (LAGraph_RegularPathQuery (&r, R, MAX_LABELS, QS, nqs, + // QF, nqf, G, S, ns, msg)) ; + + // Extract results from the output vector + GrB_Index *reachable ; + bool *values ; + + GrB_Index nvals ; + GrB_Vector_nvals (&nvals, r) ; + + OK (LAGraph_Malloc ((void **) &reachable, MAX_RESULTS, sizeof (GrB_Index), msg)) ; + OK (LAGraph_Malloc ((void **) &values, MAX_RESULTS, sizeof (GrB_Index), msg)) ; + + GrB_Vector_extractTuples (reachable, values, &nvals, r) ; + + // Compare the results with expected values + TEST_CHECK (nvals == files[k].expected_count) ; + for (uint64_t i = 0 ; i < nvals ; i++) + TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + // Cleanup + OK (LAGraph_Free ((void **) &values, NULL)) ; + OK (LAGraph_Free ((void **) &reachable, NULL)) ; + + OK (GrB_free (&r)) ; + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++) + { + if (G[i] == NULL) continue ; + OK (LAGraph_Delete (&(G[i]), msg)) ; + } + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) + { + if (R[i] == NULL) continue ; + OK (LAGraph_Delete (&(R[i]), msg)) ; } } diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 3355addb2e..202d19a67e 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -834,7 +834,7 @@ int LAGraph_scc ( //**************************************************************************** LAGRAPHX_PUBLIC -int LAGraph_RegularPathQuery // nodes reachable from the starting by the +int LAGraph_2RegularPathQuery // nodes reachable from the starting by the // path satisfying regular expression ( // output: @@ -844,6 +844,7 @@ int LAGraph_RegularPathQuery // nodes reachable from the starting by the // input: LAGraph_Graph *R, // input non-deterministic finite automaton // adjacency matrix decomposition + bool *inverse_labels, // inversed labels size_t nl, // total label count, # of matrices graph and // NFA adjacency matrix decomposition const GrB_Index *QS, // starting states in NFA @@ -853,6 +854,7 @@ int LAGraph_RegularPathQuery // nodes reachable from the starting by the LAGraph_Graph *G, // input graph adjacency matrix decomposition const GrB_Index *S, // source vertices to start searching paths size_t ns, // number of source vertices + bool inverse, // inverse the whole query char *msg // LAGraph output message ); //**************************************************************************** From 62d6641c37df148ccf179268d99812fd3ff27ae6 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sun, 8 Dec 2024 11:04:53 +0300 Subject: [PATCH 3/4] Make MM reader for BOOL matrices ignore dups This patch provides a workaround for benchmarking 2-RPQ algorithm on a few real-world datasets like Wikidata or yago-2s by allowing duplicates in MatrixMarket files corresponding to boolean matrices since most of the publicly available graphs likely to have duplicates. --- src/utility/LAGraph_MMRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utility/LAGraph_MMRead.c b/src/utility/LAGraph_MMRead.c index 5aa9e390cb..e524570f49 100644 --- a/src/utility/LAGraph_MMRead.c +++ b/src/utility/LAGraph_MMRead.c @@ -989,7 +989,7 @@ int LAGraph_MMRead if (type == GrB_BOOL) { - GRB_TRY (GrB_Matrix_build_BOOL (*A, I, J, (bool *) X, nvals2, NULL)) ; + GRB_TRY (GrB_Matrix_build_BOOL (*A, I, J, (bool *) X, nvals2, GxB_IGNORE_DUP)) ; } else if (type == GrB_INT8) { From 051251cb3e5d3c1007061ae91f1fd6151044a15f Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 3 Jan 2025 13:52:15 +0300 Subject: [PATCH 4/4] Add regular path query algorithm for all paths [WIP] Full description TBD. --- experimental/algorithm/LAGraph_2Rpq.c | 770 ++++++++++++++++++++++++++ experimental/test/test_2Rpq.c | 249 +++++++++ include/LAGraphX.h | 93 ++++ 3 files changed, 1112 insertions(+) create mode 100644 experimental/algorithm/LAGraph_2Rpq.c create mode 100644 experimental/test/test_2Rpq.c diff --git a/experimental/algorithm/LAGraph_2Rpq.c b/experimental/algorithm/LAGraph_2Rpq.c new file mode 100644 index 0000000000..6d01d89d01 --- /dev/null +++ b/experimental/algorithm/LAGraph_2Rpq.c @@ -0,0 +1,770 @@ +// +// Different RPQ semantics +// + +#define LG_FREE_WORK \ +{ \ +} +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ +} + +#include "LG_internal.h" +#include "LAGraphX.h" +#include +#include + +#define PATH_LIMIT 100000 + +typedef struct { + Path paths[QUICK_PATH_COUNT]; + size_t path_count; + Path *extra_paths; +} MultiplePaths ; + +MultiplePaths multiple_paths_identity ; + +void Path_print (const Path *x) +{ + if (x->vertex_count == 0) + { + printf ("empty path \n") ; + return ; + } + + for (size_t i = 0 ; i < x->vertex_count ; i++) + { + // Increase the vertex by 1 since usually user expects the same + // numbering as in the input determined by MTX file in which the + // entries are enumerated starting from 1. + printf ("(%ld)", (i < QUICK_PATH_LENGTH ? x->vertices[i] : x->extra_vertices[i - QUICK_PATH_LENGTH]) + 1) ; + + if (i != x->vertex_count - 1) + { + printf ("-") ; + } + } + + printf ("\n") ; +} + +static void MultiplePaths_print (const MultiplePaths *x) +{ + printf("Multiple paths:\n") ; + for (size_t i = 0 ; i < x->path_count ; i++) + { + + printf("\t Path %ld: ", i) ; + Path_print (&x->paths[i]) ; + } + printf("\n") ; +} + +GrB_Type multiple_paths ; +GrB_BinaryOp combine_multiple_paths_op ; +GrB_Monoid combine_multiple_paths ; +GrB_BinaryOp first_multiple_paths ; +GrB_BinaryOp second_multiple_paths ; +GrB_Semiring first_combine_multiple_paths ; +GrB_Semiring second_combine_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_paths ; +GrB_IndexUnaryOp extend_multiple_simple ; +GrB_IndexUnaryOp extend_multiple_trails ; + +void first_multiple_paths_f(MultiplePaths *z, MultiplePaths *x, bool *_y) +{ + *z = *x; +} +void second_multiple_paths_f(MultiplePaths *z, bool *_x, MultiplePaths *y) +{ + *z = *y; +} + +void combine_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, const MultiplePaths *y) +{ + z->path_count = x->path_count + y->path_count ; + assert (z->path_count < QUICK_PATH_COUNT) ; + + for (size_t i = 0 ; i < x->path_count ; i++) + { + z->paths[i] = x->paths[i] ; + } + + for (size_t i = 0 ; i < y->path_count ; i++) + { + z->paths[x->path_count + i] = y->paths[i] ; + } + + // TODO: Support more than QUICK_PATH_COUNT paths. +} + +static inline void path_extend(Path *path, Vertex vertex) +{ + if (path->vertex_count == 0) + { + return ; + } + + if (path->vertex_count < QUICK_PATH_LENGTH) + { + path->vertices[path->vertex_count++] = vertex ; + } + else + { + if (path->extra_vertices == NULL) + { + LG_TRY (LAGraph_Calloc ((void **) &path->extra_vertices, 64, sizeof (Vertex), NULL)) ; + } + path->extra_vertices [(path->vertex_count++) - QUICK_PATH_LENGTH] = vertex ; + } + + // TODO: Support more than QUICK_PATH_LENGTH vertices. +} + +static inline bool path_is_empty(Path *path) +{ + return path->vertex_count == 0; +} + + +static inline void multiple_paths_append(MultiplePaths *multiple_paths, const Path *path) +{ + multiple_paths->paths[multiple_paths->path_count++] = *path ; + + // TODO: Support more than QUICK_PATH_COUNT paths. +} + +// +// ALL PATHS. +// + +// NB: Using this semantic without a length limit makes the code behave like a +// procedure for searching all paths satisfying the constraints. +// It means it may not finish if there is loops. + +void extend_multiple_paths_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) +{ + /*if (z != x) + for (size_t i = 0 ; i < x->path_count ; i++) + { + multiple_paths_append(z, &x->paths[i]) ; + path_extend (&z->paths[i], col) ; + } + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + path_extend (&z->paths[i], col) ; + } + //} +} + +// +// ALL SIMPLE +// + +static inline bool path_extending_will_add_repeated_non_starting_vertex(const Path *path, Vertex vertex) +{ + if (path->vertex_count <= 1) + { + return false ; + } + + for (size_t i = 1 ; i < path->vertex_count ; i++) + { + if (path->vertices[i] == vertex) + { + return true ; + } + } + + Vertex last_vertex = path->vertices[path->vertex_count - 1] ; + + return path->vertices[0] == last_vertex; +} + +void extend_multiple_simple_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *_y) +{ + /*if (z != x) + { + for (size_t i = 0 ; i < x->path_count ; i++) + { + const Path *path = &x->paths[i] ; + if (path_has_loop_at_end (path)) + { + continue; + } + + multiple_paths_append(z, path) ; + path_extend (&z->paths[i], col) ; + } + } + else + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + if (path_extending_will_add_repeated_non_starting_vertex (path, col)) + { + path->vertex_count = 0 ; + continue ; + } + + path_extend (&z->paths[i], col) ; + } + //} +} + +// +// ALL TRAILS +// + +static inline bool path_extending_will_add_repeated_edge(const Path *path, Vertex vertex_2) +{ + if (path->vertex_count == 0) + { + return false ; + } + + // We identify edges as pairs of vertices. + Vertex vertex_1 = path->vertices[path->vertex_count - 1] ; + + for (size_t i = 0 ; i < path->vertex_count - 1; i++) + { + if (path->vertices[i] == vertex_1 && path->vertices[i + 1] == vertex_2) + { + return true ; + } + } + + return false ; +} +void extend_multiple_trails_f(MultiplePaths *z, const MultiplePaths *x, GrB_Index _row, GrB_Index col, const void *y) +{ + /*if (z != x) + { + z->path_count = x->path_count ; + + for (size_t i = 0 ; i < x->path_count ; i++) + { + const Path *path = &x->paths[i] ; + if (path_extending_will_add_repeated_edge (path, col)) + { + continue ; + } + + multiple_paths_append(z, path) ; + path_extend (&z->paths[i], col) ; + } + } + else + {*/ + for (size_t i = 0 ; i < z->path_count ; i++) + { + Path *path = &z->paths[i] ; + if (path_extending_will_add_repeated_edge (path, col)) + { + path->vertex_count = 0 ; + continue ; + } + + path_extend (&z->paths[i], col) ; + } + //} +} + +#define LG_FREE_WORK \ +{ \ + GrB_free (&frontier) ; \ + GrB_free (&next_frontier) ; \ + GrB_free (&symbol_frontier) ; \ + GrB_free (&final_reducer) ; \ + LAGraph_Free ((void **) &A, NULL) ; \ + LAGraph_Free ((void **) &B, NULL) ; \ + LAGraph_Free ((void **) &BT, NULL) ; \ +} + +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ + LAGraph_Free ((void **) paths, NULL) ; \ +} + +static int LAGraph_2Rpq +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg, // LAGraph output message + GrB_IndexUnaryOp op // index unary op for a specific semantic +) +{ + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + LG_CLEAR_MSG ; + + GrB_Matrix frontier = NULL ; // traversal frontier representing + // correspondence between NFA states + // and graph vertices + GrB_Matrix symbol_frontier = NULL ; // part of the new frontier for the + // specific label + GrB_Matrix next_frontier = NULL ; // frontier value on the next + // traversal step + GrB_Vector final_reducer = NULL ; // auxiliary vector for reducing the + // visited matrix to an answer + + GrB_Index ng = 0 ; // # nodes in the graph + GrB_Index nr = 0 ; // # states in the NFA + GrB_Index nv = 0 ; // # pair count in the frontier + GrB_Index states = ns ; // # pairs in the current + // correspondence between the graph and + // the NFA + + GrB_Index rows = 0 ; // utility matrix row count + GrB_Index cols = 0 ; // utility matrix column count + GrB_Index vals = 0 ; // utility matrix value count + + // TODO: This names might be too short. + GrB_Semiring sr1 = first_combine_multiple_paths ; + GrB_Semiring sr2 = second_combine_multiple_paths ; + GrB_BinaryOp acc = combine_multiple_paths_op ; + + GrB_Matrix *A = NULL ; + GrB_Matrix *AT = NULL ; + GrB_Matrix *B = NULL ; + GrB_Matrix *BT = NULL ; + + LG_ASSERT (paths != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (path_count != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (G != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (R != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (S != NULL, GrB_NULL_POINTER) ; + LG_ASSERT (op != NULL, GrB_NULL_POINTER) ; + + (*paths) = NULL ; + (*path_count) = 0 ; + + for (size_t i = 0 ; i < nl ; i++) + { + if (G[i] == NULL) continue ; + LG_TRY (LAGraph_CheckGraph (G[i], msg)) ; + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (R[i] == NULL) continue ; + LG_TRY (LAGraph_CheckGraph (R[i], msg)) ; + } + + LG_TRY (LAGraph_Malloc ((void **) &A, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &AT, nl, sizeof (GrB_Matrix), msg)) ; + + for (size_t i = 0 ; i < nl ; i++) + { + if (G[i] == NULL) + { + A[i] = NULL ; + AT[i] = NULL ; + continue ; + } + + A[i] = G[i]->A ; + if (G[i]->kind == LAGraph_ADJACENCY_UNDIRECTED || + G[i]->is_symmetric_structure == LAGraph_TRUE) + { + AT[i] = A[i] ; + } + else + { + // AT[i] could be NULL and the matrix will be transposed by a + // descriptor + AT[i] = G[i]->AT ; + } + } + + LG_TRY (LAGraph_Malloc ((void **) &B, nl, sizeof (GrB_Matrix), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &BT, nl, sizeof (GrB_Matrix), msg)) ; + + for (size_t i = 0 ; i < nl ; i++) + { + BT[i] = NULL ; + + if (R[i] == NULL) + { + B[i] = NULL ; + BT[i] = NULL ; + continue ; + } + + B[i] = R[i]->A ; + if (R[i]->is_symmetric_structure == LAGraph_TRUE) + { + BT[i] = B[i] ; + } + else + { + // BT[i] could be NULL and the matrix will be transposed by a + // descriptor + BT[i] = R[i]->AT ; + } + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&ng, A[i])) ; + break ; + } + + for (size_t i = 0 ; i < nl ; i++) + { + if (B[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&nr, B[i])) ; + break ; + } + + // Check all the matrices in graph adjacency matrix decomposition are + // square and of the same dimensions + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL) continue ; + + GRB_TRY (GrB_Matrix_nrows (&rows, A[i])) ; + GRB_TRY (GrB_Matrix_ncols (&cols, A[i])) ; + + LG_ASSERT_MSG (rows == ng && cols == ng, LAGRAPH_NOT_CACHED, + "all the matrices in the graph adjacency matrix decomposition " + "should have the same dimensions and be square") ; + } + + // Check all the matrices in NFA adjacency matrix decomposition are + // square and of the same dimensions + for (size_t i = 0 ; i < nl ; i++) + { + if (B[i] == NULL) continue ; + + GrB_Index rows = 0 ; + GrB_Index cols = 0 ; + + GRB_TRY (GrB_Matrix_nrows (&rows, B[i])) ; + GRB_TRY (GrB_Matrix_ncols (&cols, B[i])) ; + + LG_ASSERT_MSG (rows == nr && cols == nr, LAGRAPH_NOT_CACHED, + "all the matrices in the NFA adjacency matrix decomposition " + "should have the same dimensions and be square") ; + } + + // Check source nodes in the graph + for (size_t i = 0 ; i < ns ; i++) + { + GrB_Index s = S [i] ; + LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ; + } + + // Check starting states of the NFA + for (size_t i = 0 ; i < nqs ; i++) + { + GrB_Index qs = QS [i] ; + LG_ASSERT_MSG (qs < nr, GrB_INVALID_INDEX, + "invalid NFA starting state") ; + } + + // Check final states of the NFA + for (size_t i = 0 ; i < nqf ; i++) + { + GrB_Index qf = QF [i] ; + LG_ASSERT_MSG (qf < nr, GrB_INVALID_INDEX, "invalid NFA final state") ; + } + + // ------------------------------------------------------------------------- + // initialization + // ------------------------------------------------------------------------- + + LG_TRY (LAGraph_Calloc ((void **) paths, PATH_LIMIT, sizeof (Path), msg)) ; + + GRB_TRY (GrB_Vector_new (&final_reducer, GrB_BOOL, nr)) ; + + // Initialize matrix for reducing the result + GRB_TRY (GrB_assign (final_reducer, NULL, NULL, true, QF, nqf, NULL)) ; + + GRB_TRY (GrB_Matrix_new (&next_frontier, multiple_paths, nr, ng)) ; + + // Initialize frontier with the source nodes + + for (size_t i = 0 ; i < ns ; i++) + { + GrB_Index s = S[i] ; + MultiplePaths value = { + .paths = { + { + .vertices = { s }, + .vertex_count = 1 + } + }, + .path_count = 1 + }; + + for (size_t j = 0 ; j < nqs ; j++) + { + GrB_Index qs = QS[j] ; + + GRB_TRY (GrB_Matrix_setElement_UDT (next_frontier, &value, qs, s)) ; + } + } + + // Initialize a few utility matrices + GRB_TRY (GrB_Matrix_new (&frontier, multiple_paths, nr, ng)) ; + GRB_TRY (GrB_Matrix_new (&symbol_frontier, multiple_paths, nr, ng)) ; + + // Main loop + while (true) + { + //printf("Iteration\n"); + GrB_Index nvals = 0 ; + GRB_TRY (GrB_Matrix_nvals (&nvals, next_frontier)) ; + + MultiplePaths *X ; + GrB_Index *I ; + bool had_non_empty_path = false ; + + //MultiplePaths *X; + LG_TRY (LAGraph_Calloc ((void **) &X, nvals, sizeof (MultiplePaths), msg)) ; + LG_TRY (LAGraph_Calloc ((void **) &I, nvals, sizeof (GrB_Index), msg)) ; + + // TODO: Change to a generic call. + GRB_TRY (GrB_Matrix_extractTuples_UDT (I, GrB_NULL, (void**) X, &nvals, next_frontier)) ; + //printf("Next frontier with %d entries\n", nvals); + + for (size_t i = 0 ; i < nvals ; i++) + { + for (size_t j = 0 ; j < X[i].path_count ; j++) + { + if (!path_is_empty(&X[i].paths[j])) + { + had_non_empty_path = true; + break; + } + } + + //MultiplePaths_print (&X[i]) ; + bool final = false ; + for (size_t j = 0 ; j < nqf ; j++) + { + if (I[i] == QF[j]) + { + final = true ; + break ; + } + } + //printf("Path at %ld final is %b", I[i], final) ; + + if (!final) + { + continue ; + } + + //printf("Found final paths!\n"); + for (size_t j = 0 ; j < X[i].path_count && (*path_count) < limit ; j++) + { + const Path *path = &X[i].paths[j] ; + if (!path_is_empty(path)) + { + (*paths)[(*path_count)++] = *path ; + } + } + } + + if (!had_non_empty_path || (*path_count) == limit) + { + //printf("breaking\n"); + break; + } + + GrB_Matrix old_frontier = frontier ; + frontier = next_frontier ; + next_frontier = old_frontier ; + + GRB_TRY (GrB_Matrix_clear(next_frontier)) ; + + // Obtain a new relation between the NFA states and the graph nodes + for (size_t i = 0 ; i < nl ; i++) + { + if (A[i] == NULL || B[i] == NULL) continue ; + + // Traverse the NFA + // Try to use a provided transposed matrix or use the descriptor + if (!inverse) { + if (BT[i] != NULL) + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + sr2, BT[i], frontier, GrB_DESC_R)) ; + } + else + { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, + sr2, B[i], frontier, GrB_DESC_RT0)) ; + } + } else { + GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL, sr2, B[i], frontier, GrB_DESC_R )) ; + } + + // TODO: Skip the iteration if symbol_frontier is already empty. + + // Traverse the graph + if (!inverse_labels[i]) { + if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + } else if (AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + } + } else { + if (!inverse && AT[i]) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, AT[i], GrB_NULL)) ; + } else if (!inverse) { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_DESC_T1)) ; + } else { + GRB_TRY (GrB_mxm (next_frontier, GrB_NULL, acc, sr1, symbol_frontier, A[i], GrB_NULL)) ; + } + } + } + + GRB_TRY (GrB_apply (next_frontier, GrB_NULL, GrB_NULL, op, next_frontier, false, GrB_NULL)) ; + + } + + //LG_FREE_WORK ; + return (GrB_SUCCESS) ; +} + + +int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular + // expression. Simple paths are paths without + // loops or the ones with the same starting + // and final nodes. +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_simple) ; +} + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllTrails // All trails satisfying regular expression. + // Trails are paths without repeated edges. +( + // output: + Path **paths, // trails from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, ULLONG_MAX, msg, extend_multiple_trails) ; +} + +int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg // LAGraph output message + ) +{ + return LAGraph_2Rpq(paths, path_count, R, inverse_labels, nl, QS, nqs, QF, nqf, G, S, ns, inverse, limit, msg, extend_multiple_paths) ; +} + +#define LG_FREE_WORK \ +{ \ +} +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ +} + +int LAGraph_Rpq_initialize(char *msg) +{ + GRB_TRY (GrB_Type_new (&multiple_paths, sizeof(MultiplePaths))) ; + + GRB_TRY (GrB_BinaryOp_new (&combine_multiple_paths_op, (GxB_binary_function) &combine_multiple_paths_f, multiple_paths, multiple_paths, multiple_paths)) ; + GRB_TRY (GrB_BinaryOp_new (&first_multiple_paths, (GxB_binary_function) &first_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_BinaryOp_new (&second_multiple_paths, (GxB_binary_function) &second_multiple_paths_f, multiple_paths, GrB_BOOL, multiple_paths)) ; + GRB_TRY (GrB_Monoid_new (&combine_multiple_paths, combine_multiple_paths_op, (void*) &multiple_paths_identity)) ; + + GRB_TRY (GrB_Semiring_new (&first_combine_multiple_paths, combine_multiple_paths, first_multiple_paths)) ; + GRB_TRY (GrB_Semiring_new (&second_combine_multiple_paths, combine_multiple_paths, second_multiple_paths)) ; + + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_paths, (GxB_index_unary_function) &extend_multiple_paths_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_simple, (GxB_index_unary_function) &extend_multiple_simple_f, multiple_paths, multiple_paths, GrB_BOOL)) ; + GRB_TRY (GrB_IndexUnaryOp_new (&extend_multiple_trails, (GxB_index_unary_function) &extend_multiple_trails_f, multiple_paths, multiple_paths, GrB_BOOL)) ; +} diff --git a/experimental/test/test_2Rpq.c b/experimental/test/test_2Rpq.c new file mode 100644 index 0000000000..173f88a9cf --- /dev/null +++ b/experimental/test/test_2Rpq.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include + +#define LEN 512 +#define MAX_LABELS 3 +#define MAX_RESULTS 2000000 + +char msg [LAGRAPH_MSG_LEN] ; +LAGraph_Graph G[MAX_LABELS] ; +LAGraph_Graph R[MAX_LABELS] ; +GrB_Matrix A ; + +char testcase_name [LEN+1] ; +char filename [LEN+1] ; + +typedef struct +{ + const char* name ; + const char* graphs[MAX_LABELS] ; + const char* fas[MAX_LABELS] ; + const char* fa_meta ; + const char* sources ; + const GrB_Index expected[MAX_RESULTS] ; + const size_t expected_count ; +} +matrix_info ; + +const matrix_info files [ ] = +{ + {"simple 1 or more", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/1_a.mtx", NULL }, // Regex: a+ + "rpq_data/1_meta.txt", + "rpq_data/1_sources.txt", + {2, 4, 6, 7}, 4}, + {"simple kleene star", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/2_a.mtx", "rpq_data/2_b.mtx", NULL}, // Regex: (a b)* + "rpq_data/2_meta.txt", + "rpq_data/2_sources.txt", + {2, 6, 8}, 3}, + {"kleene star of the conjunction", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"rpq_data/3_a.mtx", "rpq_data/3_b.mtx", NULL}, // Regex: (a | b)* + "rpq_data/3_meta.txt", + "rpq_data/3_sources.txt", + {3, 6}, 2}, + {"simple repeat from n to m times", + {"rpq_data/a.mtx", "rpq_data/b.mtx", NULL}, + {"", "rpq_data/4_b.mtx", NULL}, // Regex: b b b (b b)? + "rpq_data/4_meta.txt", + "rpq_data/4_sources.txt", + {3, 4, 6}, 3}, + {NULL, NULL, NULL, NULL}, +} ; + +//**************************************************************************** +void test_Rpq_Simple (void) +{ + LAGraph_Init (msg) ; + LAGraph_Rpq_initialize (msg) ; + + for (int k = 0 ; ; k++) + { + if (files[k].sources == NULL) break ; + + snprintf (testcase_name, LEN, "basic regular path query %s", files[k].name) ; + TEST_CASE (testcase_name) ; + + // Load graph from MTX files representing its adjacency matrix + // decomposition + for (int i = 0 ; ; i++) + { + const char *name = files[k].graphs[i] ; + + if (name == NULL) break ; + if (strlen(name) == 0) continue ; + + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + OK (LAGraph_MMRead (&A, f, msg)) ; + OK (fclose (f)); + + OK (LAGraph_New (&(G[i]), &A, LAGraph_ADJACENCY_DIRECTED, msg)) ; + + TEST_CHECK (A == NULL) ; + } + + // Load NFA from MTX files representing its adjacency matrix + // decomposition + for (int i = 0 ; ; i++) + { + const char *name = files[k].fas[i] ; + + if (name == NULL) break ; + if (strlen(name) == 0) continue ; + + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + OK (LAGraph_MMRead (&A, f, msg)) ; + OK (fclose (f)) ; + + OK (LAGraph_New (&(R[i]), &A, LAGraph_ADJACENCY_DIRECTED, msg)) ; + OK (LAGraph_Cached_AT (R[i], msg)) ; + + TEST_CHECK (A == NULL) ; + } + + // Note the matrix rows/cols are enumerated from 0 to n-1. Meanwhile, in + // MTX format they are enumerated from 1 to n. Thus, when + // loading/comparing the results these values should be + // decremented/incremented correspondingly. + + // Load graph source nodes from the sources file + GrB_Index s ; + GrB_Index S[16] ; + size_t ns = 0 ; + + const char *name = files[k].sources ; + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + + while (fscanf(f, "%ld", &s) != EOF) + S[ns++] = s - 1 ; + + OK (fclose(f)) ; + + // Load NFA starting states from the meta file + GrB_Index qs ; + GrB_Index QS[16] ; + size_t nqs = 0 ; + + name = files[k].fa_meta ; + snprintf (filename, LEN, LG_DATA_DIR "%s", name) ; + f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + + TEST_CHECK (fscanf(f, "%ld", &nqs) != EOF) ; + + for (uint64_t i = 0; i < nqs; i++) { + TEST_CHECK (fscanf(f, "%ld", &qs) != EOF) ; + QS[i] = qs - 1 ; + } + + // Load NFA final states from the same file + uint64_t qf ; + uint64_t QF[16] ; + size_t nqf = 0 ; + + TEST_CHECK (fscanf(f, "%ld", &nqf) != EOF) ; + + for (uint64_t i = 0; i < nqf; i++) { + TEST_CHECK (fscanf(f, "%ld", &qf) != EOF) ; + QF[i] = qf - 1 ; + } + + OK (fclose(f)) ; + + // Evaluate the algorithm + GrB_Vector r = NULL ; + + bool inverse_labels[] = {false, false, false, false, false, false, false, false, false, false, false, false, false}; + bool inverse = false; + + Path *paths ; + size_t path_count ; + int res = LAGraph_2Rpq_AllSimple (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL SIMPLE:\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + // Cleanup + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + res = LAGraph_2Rpq_AllTrails (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL TRAILS:\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + // Cleanup + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + res = LAGraph_2Rpq_AllPaths (&paths, &path_count, R, inverse_labels, + MAX_LABELS, QS, nqs, QF, nqf, G, S, ns, + inverse, 10, msg) ; + + // Compare the results with expected values + //TEST_CHECK (nvals == files[k].expected_count) ; + //for (uint64_t i = 0 ; i < nvals ; i++) + // TEST_CHECK (reachable[i] + 1 == files[k].expected[i]) ; + + printf("ALL PATHS (LIMIT = 10):\n"); + for (size_t i = 0 ; i < path_count ; i++) + { + Path_print (&paths[i]); + } + printf("\n"); + + OK (LAGraph_Free ((void **) &paths, NULL)) ; + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++) + { + if (G[i] == NULL) continue ; + OK (LAGraph_Delete (&(G[i]), msg)) ; + } + + for (uint64_t i = 0 ; i < MAX_LABELS ; i++ ) + { + if (R[i] == NULL) continue ; + OK (LAGraph_Delete (&(R[i]), msg)) ; + } + } + + LAGraph_Finalize (msg) ; +} + +TEST_LIST = { + {"Rpq_Simple", test_Rpq_Simple}, + {NULL, NULL} +}; diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 202d19a67e..3f9a6dadfa 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -858,6 +858,99 @@ int LAGraph_2RegularPathQuery // nodes reachable from the starting by the char *msg // LAGraph output message ); //**************************************************************************** +#define QUICK_PATH_LENGTH 14 +#define QUICK_PATH_COUNT 2 + +typedef uint64_t Vertex ; + +typedef struct { + Vertex vertices[QUICK_PATH_LENGTH]; + size_t vertex_count; + Vertex *extra_vertices; +} Path ; + +void Path_print (const Path *x); + +LAGRAPHX_PUBLIC +int LAGraph_Rpq_initialize (char *msg); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllSimple // All simple paths satisfying regular + // expression. Simple paths are paths without + // loops or the ones with the same starting + // and final nodes. +( + // output: + Path **paths, // simple paths from one of the starting + // nodes satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllTrails // All trails satisfying regular expression. + // Trails are paths without repeated edges. +( + // output: + Path **paths, // trails from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + char *msg // LAGraph output message +); + +LAGRAPHX_PUBLIC +int LAGraph_2Rpq_AllPaths // All paths satisfying regular expression +( + // output: + Path **paths, // paths from one of the starting nodes + // satisfying regular constraints + size_t *path_count, // resulting path count + // input: + LAGraph_Graph *R, // input non-deterministic finite automaton + // adjacency matrix decomposition + bool *inverse_labels, // inversed labels + size_t nl, // total label count, # of matrices graph and + // NFA adjacency matrix decomposition + const GrB_Index *QS, // starting states in NFA + size_t nqs, // number of starting states in NFA + const GrB_Index *QF, // final states in NFA + size_t nqf, // number of final states in NFA + LAGraph_Graph *G, // input graph adjacency matrix decomposition + const GrB_Index *S, // source vertices to start searching paths + size_t ns, // number of source vertices + bool inverse, // inverse the whole query + uint64_t limit, // maximum path count + char *msg // LAGraph output message +); +//**************************************************************************** LAGRAPHX_PUBLIC int LAGraph_VertexCentrality_Triangle // vertex triangle-centrality (