From 1b1e7727c6513477aab86c5af7002252e9f4290f Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Wed, 13 May 2026 21:50:04 +0700 Subject: [PATCH] Added support for LLC metric in R source files. Processing source files in the R programming language was already supported but the computation of the logical line count was not implemented due to a missing parser implementation. This solution uses the one from the R infrastructure repositories. [CL]: Added support for LLC metric in R source files. Signed-off-by: Phil Gaiser --- Dependencies.cmake | 7 + Dependencies.cmake.sha256 | 2 +- cmake/ConfigTree-sitter-r.cmake | 6 + src/lib/CMakeLists.txt | 1 + src/lib/c/factories.c | 10 +- src/lib/c/lang_r.c | 105 ++++++++ src/lib/tests/CMakeLists.txt | 7 + src/lib/tests/integration/c/test_fileio.c | 3 +- src/lib/tests/res/r/sample.R | 227 ++++++++++++++++++ src/lib/tests/res/r/sample_annotated.R | 227 ++++++++++++++++++ src/lib/tests/res/r/sample_min_formatting.R | 32 +++ src/lib/tests/unit/c/test_lang_r.c | 180 ++++++++++++++ src/lib/tests/unit/c/test_statistics.c | 4 +- src/scount/c/print.c | 1 + .../functionality/res/expected/mixed.txt | 8 +- 15 files changed, 809 insertions(+), 11 deletions(-) create mode 100644 cmake/ConfigTree-sitter-r.cmake create mode 100644 src/lib/c/lang_r.c create mode 100644 src/lib/tests/res/r/sample.R create mode 100644 src/lib/tests/res/r/sample_annotated.R create mode 100644 src/lib/tests/res/r/sample_min_formatting.R create mode 100644 src/lib/tests/unit/c/test_lang_r.c diff --git a/Dependencies.cmake b/Dependencies.cmake index 45e58a7..5989752 100644 --- a/Dependencies.cmake +++ b/Dependencies.cmake @@ -53,6 +53,13 @@ dependency( DEPENDENCY_LINK_TARGETS tree-sitter-typescript ) +dependency( + DEPENDENCY_NAME tree-sitter-r + DEPENDENCY_RESOURCE kilo52/tree-sitter-r + DEPENDENCY_VERSION v1.2.0-patch-cmakeliststxt + DEPENDENCY_LINK_TARGETS tree-sitter-r +) + dependency( DEPENDENCY_NAME utf8proc DEPENDENCY_RESOURCE JuliaStrings/utf8proc diff --git a/Dependencies.cmake.sha256 b/Dependencies.cmake.sha256 index 935a344..5e09a0b 100644 --- a/Dependencies.cmake.sha256 +++ b/Dependencies.cmake.sha256 @@ -1 +1 @@ -8648e7957901a3029837287b247ad68b2b2dce86ab816eaa4fc18505449f6701 \ No newline at end of file +a2eff11c1a578a2fa505398b70101f4b92a544c7bb692daaf612f79204a9342f \ No newline at end of file diff --git a/cmake/ConfigTree-sitter-r.cmake b/cmake/ConfigTree-sitter-r.cmake new file mode 100644 index 0000000..fd59379 --- /dev/null +++ b/cmake/ConfigTree-sitter-r.cmake @@ -0,0 +1,6 @@ +# Configuration options for the Tree-Sitter-R dependency + +# Define the program variable to avoid errors when custom command +# is executed since the CLI might not actually be available. +set(TREE_SITTER_CLI "") +set(BUILD_TESTING OFF) diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 88bfaf2..eb3fc56 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -38,6 +38,7 @@ target_sources( "c/lang_python.c" "c/lang_javascript.c" "c/lang_typescript.c" + "c/lang_r.c" "c/lang_bash.c" "c/logical.c" "c/physical.c" diff --git a/src/lib/c/factories.c b/src/lib/c/factories.c index e993c72..e909d24 100644 --- a/src/lib/c/factories.c +++ b/src/lib/c/factories.c @@ -27,6 +27,7 @@ TSParser* createParserJava(void); TSParser* createParserPython(void); TSParser* createParserJavaScript(void); TSParser* createParserTypeScript(void); +TSParser* createParserR(void); TSParser* createParserBash(void); void evaluateNodeC(TSNode node, NodeEvalTrace* trace); @@ -34,6 +35,7 @@ void evaluateNodeJava(TSNode node, NodeEvalTrace* trace); void evaluateNodePython(TSNode node, NodeEvalTrace* trace); void evaluateNodeJavaScript(TSNode node, NodeEvalTrace* trace); void evaluateNodeTypeScript(TSNode node, NodeEvalTrace* trace); +void evaluateNodeR(TSNode node, NodeEvalTrace* trace); void evaluateNodeBash(TSNode node, NodeEvalTrace* trace); TSParser* createParser(RcnTextFormat language) { @@ -48,6 +50,8 @@ TSParser* createParser(RcnTextFormat language) { return createParserJavaScript(); case RCN_LANG_TYPESCRIPT: return createParserTypeScript(); + case RCN_LANG_R: + return createParserR(); case RCN_LANG_BASH: return createParserBash(); default: @@ -67,6 +71,8 @@ NodeVisitor createEvaluationFunction(RcnTextFormat language) { return evaluateNodeJavaScript; case RCN_LANG_TYPESCRIPT: return evaluateNodeTypeScript; + case RCN_LANG_R: + return evaluateNodeR; case RCN_LANG_BASH: return evaluateNodeBash; default: @@ -77,6 +83,7 @@ NodeVisitor createEvaluationFunction(RcnTextFormat language) { const char* getInlineSourceCommentString(RcnTextFormat language) { switch (language) { case RCN_LANG_PYTHON: + case RCN_LANG_R: case RCN_LANG_BASH: return "#"; case RCN_LANG_C: @@ -158,9 +165,8 @@ SourceFormatDetection detectSourceFormat(const RcnSourceFile* file) { detection.format = RCN_TEXT_YAML; } else if (strcmp(extension, "R") == 0 || strcmp(extension, "r") == 0) { - // isProgrammingLanguage is false for R format due to missing - // support for logical line counting in R files detection.isSupportedFormat = true; + detection.isProgrammingLanguage = true; detection.format = RCN_LANG_R; } else if (strcmp(extension, "txt") == 0) { detection.isSupportedFormat = true; diff --git a/src/lib/c/lang_r.c b/src/lib/c/lang_r.c new file mode 100644 index 0000000..68af9ae --- /dev/null +++ b/src/lib/c/lang_r.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2026 Raven Computing + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "tree_sitter/api.h" + +#include "reckon/reckon.h" +#include "reckon_export.h" +#include "evaluation.h" + +RECKON_NO_EXPORT const TSLanguage* tree_sitter_r(void); + +/** + * These are the symbol identifiers as defined by the R language parser + * of tree-sitter. We have only copied the symbol identifiers that we are + * interested in evaluating or counting. Others do not contribute to the weight + * of a node in the AST. + */ +enum SymbolIdentifiersR { + sym_return = 52, + sym_next = 53, + sym_break = 54, + sym_program = 81, + sym_function_definition = 82, + sym_if_statement = 88, + sym_for_statement = 89, + sym_while_statement = 90, + sym_repeat_statement = 91, + sym_braced_expression = 92, + sym_parenthesized_expression = 93, + sym_call = 94, + sym_unary_operator = 104, + sym_binary_operator = 105, +}; + +TSParser* createParserR(void) { + TSParser* parser = ts_parser_new(); + if (parser) { + if (!ts_parser_set_language(parser, tree_sitter_r())) { + // LCOV_EXCL_START + ts_parser_delete(parser); + return NULL; + // LCOV_EXCL_STOP + } + } + return parser; +} + +/** + * Checks whether the given node's immediate parent is the program node + * or a braced expression, i.e. the node is at statement level. + */ +static bool isAtStatementLevel(TSNode node) { + TSNode parent = ts_node_parent(node); + if (ts_node_is_null(parent)) { + return false; + } + TSSymbol sym = ts_node_grammar_symbol(parent); + return sym == sym_program || sym == sym_braced_expression; +} + +static RcnCount evaluateNodeWeightRImpl(TSNode node, NodeEvalTrace* trace) { + RcnCount weight = 0; + TSSymbol sym = ts_node_grammar_symbol(node); + switch (sym) { + case sym_binary_operator: + case sym_call: + case sym_unary_operator: + case sym_function_definition: + case sym_parenthesized_expression: + case sym_if_statement: + case sym_for_statement: + case sym_while_statement: + case sym_repeat_statement: + case sym_next: + case sym_break: + case sym_return: + if (isAtStatementLevel(node)) { + weight += 1; + } + break; + default: + break; + } + return weight; +} + +void evaluateNodeR(TSNode node, NodeEvalTrace* trace) { + trace->result->count += evaluateNodeWeightRImpl(node, trace); + trace->idx++; +} diff --git a/src/lib/tests/CMakeLists.txt b/src/lib/tests/CMakeLists.txt index cdc00dc..f5f6dc2 100644 --- a/src/lib/tests/CMakeLists.txt +++ b/src/lib/tests/CMakeLists.txt @@ -120,6 +120,13 @@ add_test_suite( TEST_SUITE_LINK ${RECKON_TARGET_LIB_OBJ} ) +add_test_suite( + TEST_SUITE_NAME RLanguageUnitTest + TEST_SUITE_TARGET test_lang_r + TEST_SUITE_SOURCE unit/c/test_lang_r.c + TEST_SUITE_LINK ${RECKON_TARGET_LIB_OBJ} +) + add_compile_definitions( RECKON_TEST_PATH_RES_BASE="${CMAKE_CURRENT_SOURCE_DIR}/res" ) diff --git a/src/lib/tests/integration/c/test_fileio.c b/src/lib/tests/integration/c/test_fileio.c index 5b2ca83..43da786 100644 --- a/src/lib/tests/integration/c/test_fileio.c +++ b/src/lib/tests/integration/c/test_fileio.c @@ -239,8 +239,7 @@ void testDetectSourceFormatR(void) { TEST_ASSERT_NOT_NULL(file); SourceFormatDetection detection = detectSourceFormat(file); TEST_ASSERT_TRUE(detection.isSupportedFormat); - // Special case: Missing support for LLC metric. - TEST_ASSERT_FALSE(detection.isProgrammingLanguage); + TEST_ASSERT_TRUE(detection.isProgrammingLanguage); TEST_ASSERT_EQUAL_INT(RCN_LANG_R, detection.format); freeSourceFile(file); } diff --git a/src/lib/tests/res/r/sample.R b/src/lib/tests/res/r/sample.R new file mode 100644 index 0000000..6654e19 --- /dev/null +++ b/src/lib/tests/res/r/sample.R @@ -0,0 +1,227 @@ +# Golden sample: Contains R language features and constructs + +# ============================================================================ +# Assignments +# ============================================================================ + +x <- 1 +y <- 2.5 +label <- "Alice" +flag <- FALSE +nums <- c(1L, 2L, 3L, 4L, 5L) +mat <- matrix(1:6, nrow = 2L) +100 -> rhs_val +extra = "extra" + +# ============================================================================ +# Function calls +# ============================================================================ + +print(x) +cat("Label:", label, "\n") +message("Debug info") + +# ============================================================================ +# Function definitions +# ============================================================================ + +square <- function(n) { + n * n +} + +add <- function(a, b) { + result <- a + b + return(result) +} + +greet <- function(nm, prefix = "Hi") { + msg <- paste(prefix, nm) + cat(msg, "\n") +} + +is_positive <- function(n) { + if (n > 0) { + return(TRUE) + } + return(FALSE) +} + +# ============================================================================ +# If statements +# ============================================================================ + +if (x > 0) { + cat("positive\n") +} + +if (flag) { + cat("true\n") +} else { + cat("false\n") +} + +if (x < 0) { + cat("neg\n") +} else if (x == 0) { + cat("zero\n") +} else { + cat("pos\n") +} + +status <- if (x > 0) "high" else "low" + +# ============================================================================ +# For loops +# ============================================================================ + +for (i in 1:5L) { + cat(i, "\n") +} + +for (val in nums) { + if (val > 3L) { + cat("big:", val, "\n") + } +} + +for (i in 1:3L) { + for (j in 1:3L) { + cat(i * j, " ") + } + cat("\n") +} + +# ============================================================================ +# While loops +# ============================================================================ + +k <- 1L +while (k <= 5L) { + cat(k, "\n") + k <- k + 1L +} + +count <- 0L +while (TRUE) { + count <- count + 1L + if (count >= 3L) { + break + } +} + +# ============================================================================ +# Repeat loops +# ============================================================================ + +rep_count <- 0L +repeat { + rep_count <- rep_count + 1L + if (rep_count >= 5L) { + break + } +} + +# ============================================================================ +# Next keyword +# ============================================================================ + +for (i in 1:10L) { + if (i %% 2L == 0L) { + next + } + cat(i, "\n") +} + +# ============================================================================ +# Higher-order functions +# ============================================================================ + +squares <- sapply(1:5, function(n) n ^ 2) + +doubled_list <- lapply(nums, function(n) { + n * 2L +}) + +sums <- vapply(1:3, function(i) sum(1:i), numeric(1)) + +# ============================================================================ +# String operations +# ============================================================================ + +greeting <- paste("Hello", "World") +upper_label <- toupper(label) +formatted <- sprintf("x = %.2f", x) +nchar(greeting) + +# ============================================================================ +# Vector operations +# ============================================================================ + +filtered <- nums[nums > 2L] +nums[1] <- 10L +total <- sum(nums) +avg <- mean(nums) +doubled_v <- nums * 2L +sorted_v <- sort(nums, decreasing = TRUE) + +# ============================================================================ +# Pipe operator +# ============================================================================ + +pipe_result <- 1:10 |> sum() +pipe2 <- nums |> rev() |> cumsum() + +# ============================================================================ +# Error handling +# ============================================================================ + +safe_log <- tryCatch({ + log(10) +}, error = function(e) { + message(conditionMessage(e)) + return(NA_real_) +}) + +withCallingHandlers({ + sqrt(4) +}, warning = function(w) { + message("Warning:", conditionMessage(w)) + invokeRestart("muffleWarning") +}) + +# ============================================================================ +# Environments +# ============================================================================ + +env <- new.env() +env$value <- 42L +assign("g_val", 100L) +rm("g_val") + +# ============================================================================ +# Closures +# ============================================================================ + +make_counter <- function() { + count <- 0L + list( + increment = function() { + count <<- count + 1L + }, + get = function() count + ) +} + +counter <- make_counter() +counter$increment() +val <- counter$get() + +# ============================================================================ +# Unary operators and misc +# ============================================================================ + +!flag +-x +is.numeric(x) +Sys.time() +invisible(NULL) diff --git a/src/lib/tests/res/r/sample_annotated.R b/src/lib/tests/res/r/sample_annotated.R new file mode 100644 index 0000000..e624d8b --- /dev/null +++ b/src/lib/tests/res/r/sample_annotated.R @@ -0,0 +1,227 @@ +# Golden sample: Contains R language features and constructs + +# ============================================================================ +# Assignments +# ============================================================================ + +x <- 1 # +1 (binary operator) +y <- 2.5 # +1 (binary operator) +label <- "Alice" # +1 (binary operator) +flag <- FALSE # +1 (binary operator) +nums <- c(1L, 2L, 3L, 4L, 5L) # +1 (binary operator) +mat <- matrix(1:6, nrow = 2L) # +1 (binary operator) +100 -> rhs_val # +1 (binary operator) +extra = "extra" # +1 (binary operator) + +# ============================================================================ +# Function calls +# ============================================================================ + +print(x) # +1 (call) +cat("Label:", label, "\n") # +1 (call) +message("Debug info") # +1 (call) + +# ============================================================================ +# Function definitions +# ============================================================================ + +square <- function(n) { # +1 (binary operator) + n * n # +1 (binary operator) +} + +add <- function(a, b) { # +1 (binary operator) + result <- a + b # +1 (binary operator) + return(result) # +1 (call) +} + +greet <- function(nm, prefix = "Hi") { # +1 (binary operator) + msg <- paste(prefix, nm) # +1 (binary operator) + cat(msg, "\n") # +1 (call) +} + +is_positive <- function(n) { # +1 (binary operator) + if (n > 0) { # +1 (if statement) + return(TRUE) # +1 (call) + } + return(FALSE) # +1 (call) +} + +# ============================================================================ +# If statements +# ============================================================================ + +if (x > 0) { # +1 (if statement) + cat("positive\n") # +1 (call) +} + +if (flag) { # +1 (if statement) + cat("true\n") # +1 (call) +} else { + cat("false\n") # +1 (call) +} + +if (x < 0) { # +1 (if statement) + cat("neg\n") # +1 (call) +} else if (x == 0) { + cat("zero\n") # +1 (call) +} else { + cat("pos\n") # +1 (call) +} + +status <- if (x > 0) "high" else "low" # +1 (binary operator) + +# ============================================================================ +# For loops +# ============================================================================ + +for (i in 1:5L) { # +1 (for statement) + cat(i, "\n") # +1 (call) +} + +for (val in nums) { # +1 (for statement) + if (val > 3L) { # +1 (if statement) + cat("big:", val, "\n") # +1 (call) + } +} + +for (i in 1:3L) { # +1 (for statement) + for (j in 1:3L) { # +1 (for statement) + cat(i * j, " ") # +1 (call) + } + cat("\n") # +1 (call) +} + +# ============================================================================ +# While loops +# ============================================================================ + +k <- 1L # +1 (binary operator) +while (k <= 5L) { # +1 (while statement) + cat(k, "\n") # +1 (call) + k <- k + 1L # +1 (binary operator) +} + +count <- 0L # +1 (binary operator) +while (TRUE) { # +1 (while statement) + count <- count + 1L # +1 (binary operator) + if (count >= 3L) { # +1 (if statement) + break # +1 (break) + } +} + +# ============================================================================ +# Repeat loops +# ============================================================================ + +rep_count <- 0L # +1 (binary operator) +repeat { # +1 (repeat statement) + rep_count <- rep_count + 1L # +1 (binary operator) + if (rep_count >= 5L) { # +1 (if statement) + break # +1 (break) + } +} + +# ============================================================================ +# Next keyword +# ============================================================================ + +for (i in 1:10L) { # +1 (for statement) + if (i %% 2L == 0L) { # +1 (if statement) + next # +1 (next) + } + cat(i, "\n") # +1 (call) +} + +# ============================================================================ +# Higher-order functions +# ============================================================================ + +squares <- sapply(1:5, function(n) n ^ 2) # +1 (binary operator) + +doubled_list <- lapply(nums, function(n) { # +1 (binary operator) + n * 2L # +1 (binary operator) +}) + +sums <- vapply(1:3, function(i) sum(1:i), numeric(1)) # +1 (binary operator) + +# ============================================================================ +# String operations +# ============================================================================ + +greeting <- paste("Hello", "World") # +1 (binary operator) +upper_label <- toupper(label) # +1 (binary operator) +formatted <- sprintf("x = %.2f", x) # +1 (binary operator) +nchar(greeting) # +1 (call) + +# ============================================================================ +# Vector operations +# ============================================================================ + +filtered <- nums[nums > 2L] # +1 (binary operator) +nums[1] <- 10L # +1 (binary operator) +total <- sum(nums) # +1 (binary operator) +avg <- mean(nums) # +1 (binary operator) +doubled_v <- nums * 2L # +1 (binary operator) +sorted_v <- sort(nums, decreasing = TRUE) # +1 (binary operator) + +# ============================================================================ +# Pipe operator +# ============================================================================ + +pipe_result <- 1:10 |> sum() # +1 (binary operator) +pipe2 <- nums |> rev() |> cumsum() # +1 (binary operator) + +# ============================================================================ +# Error handling +# ============================================================================ + +safe_log <- tryCatch({ # +1 (binary operator) + log(10) # +1 (call) +}, error = function(e) { + message(conditionMessage(e)) # +1 (call) + return(NA_real_) # +1 (call) +}) + +withCallingHandlers({ # +1 (call) + sqrt(4) # +1 (call) +}, warning = function(w) { + message("Warning:", conditionMessage(w)) # +1 (call) + invokeRestart("muffleWarning") # +1 (call) +}) + +# ============================================================================ +# Environments +# ============================================================================ + +env <- new.env() # +1 (binary operator) +env$value <- 42L # +1 (binary operator) +assign("g_val", 100L) # +1 (call) +rm("g_val") # +1 (call) + +# ============================================================================ +# Closures +# ============================================================================ + +make_counter <- function() { # +1 (binary operator) + count <- 0L # +1 (binary operator) + list( # +1 (call) + increment = function() { + count <<- count + 1L # +1 (binary operator) + }, + get = function() count + ) +} + +counter <- make_counter() # +1 (binary operator) +counter$increment() # +1 (call) +val <- counter$get() # +1 (binary operator) + +# ============================================================================ +# Unary operators and misc +# ============================================================================ + +!flag # +1 (unary operator) +-x # +1 (unary operator) +is.numeric(x) # +1 (call) +Sys.time() # +1 (call) +invisible(NULL) # +1 (call) diff --git a/src/lib/tests/res/r/sample_min_formatting.R b/src/lib/tests/res/r/sample_min_formatting.R new file mode 100644 index 0000000..6eae288 --- /dev/null +++ b/src/lib/tests/res/r/sample_min_formatting.R @@ -0,0 +1,32 @@ +x<-1;y<-2.5;label<-"Alice";flag<-FALSE +nums<-c(1L,2L,3L,4L,5L);mat<-matrix(1:6,nrow=2L);100->rhs_val;extra="extra" +print(x);cat("Label:",label,"\n");message("Debug info") +square<-function(n){n*n} +add<-function(a,b){result<-a+b;return(result)} +greet<-function(nm,prefix="Hi"){msg<-paste(prefix,nm);cat(msg,"\n")} +is_positive<-function(n){if(n>0){return(TRUE)};return(FALSE)} +if(x>0){cat("positive\n")} +if(flag){cat("true\n")}else{cat("false\n")} +if(x<0){cat("neg\n")}else if(x==0){cat("zero\n")}else{cat("pos\n")} +status<-if(x>0)"high"else"low" +for(i in 1:5L){cat(i,"\n")} +for(val in nums){if(val>3L){cat("big:",val,"\n")}} +for(i in 1:3L){for(j in 1:3L){cat(i*j," ")};cat("\n")} +k<-1L;while(k<=5L){cat(k,"\n");k<-k+1L} +count<-0L;while(TRUE){count<-count+1L;if(count>=3L){break}} +rep_count<-0L;repeat{rep_count<-rep_count+1L;if(rep_count>=5L){break}} +for(i in 1:10L){if(i%%2L==0L){next};cat(i,"\n")} +squares<-sapply(1:5,function(n)n^2) +doubled_list<-lapply(nums,function(n){n*2L}) +sums<-vapply(1:3,function(i)sum(1:i),numeric(1)) +greeting<-paste("Hello","World");upper_label<-toupper(label) +formatted<-sprintf("x = %.2f",x);nchar(greeting) +filtered<-nums[nums>2L];nums[1]<-10L;total<-sum(nums);avg<-mean(nums) +doubled_v<-nums*2L;sorted_v<-sort(nums,decreasing=TRUE) +pipe_result<-1:10|>sum();pipe2<-nums|>rev()|>cumsum() +safe_log<-tryCatch({log(10)},error=function(e){message(conditionMessage(e));return(NA_real_)}) +withCallingHandlers({sqrt(4)},warning=function(w){message("Warning:",conditionMessage(w));invokeRestart("muffleWarning")}) +env<-new.env();env$value<-42L;assign("g_val",100L);rm("g_val") +make_counter<-function(){count<-0L;list(increment=function(){count<<-count+1L},get=function()count)} +counter<-make_counter();counter$increment();val<-counter$get() +!flag;-x;is.numeric(x);Sys.time();invisible(NULL) diff --git a/src/lib/tests/unit/c/test_lang_r.c b/src/lib/tests/unit/c/test_lang_r.c new file mode 100644 index 0000000..d229f91 --- /dev/null +++ b/src/lib/tests/unit/c/test_lang_r.c @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2026 Raven Computing + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "unity.h" + +#include "reckon/reckon.h" +#include "fileio.h" + +#define TEST_RES_DIR RECKON_TEST_PATH_RES_BASE "/r" +#define TEST_SAMPLE TEST_RES_DIR "/sample.R" +#define TEST_SAMPLE_ANNOTATED TEST_RES_DIR "/sample_annotated.R" +#define TEST_SAMPLE_MIN_FORMATTING TEST_RES_DIR "/sample_min_formatting.R" + +char* rSourceWithSyntaxError = + "x <- 42\n" + "<-\n" + "\n"; + +void setUp(void) { } + +void tearDown(void) { } + +void testRLogicalLineCountIsCorrect(void) { + RcnSourceFile* file = newSourceFile(TEST_SAMPLE); + readSourceFileContent(file); + RcnCountResult result = rcnCountLogicalLines( + RCN_LANG_R, + file->content + ); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(100, result.count); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); +} + +void testRLogicalLineCountIsLenientWithSyntaxError(void) { + RcnSourceText source = { + .text = rSourceWithSyntaxError, + .size = strlen(rSourceWithSyntaxError) + }; + RcnCountResult result = rcnCountLogicalLines(RCN_LANG_R, source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(1, result.count); +} + +void testRLogicalLineCountFailsWithSyntaxError(void) { + RcnSourceText source = { + .text = rSourceWithSyntaxError, + .size = strlen(rSourceWithSyntaxError) + }; + RcnCountResult result = rcnCountLogicalLinesStrict(RCN_LANG_R, source); + TEST_ASSERT_FALSE(result.state.ok); + TEST_ASSERT_EQUAL_INT(0, result.count); + TEST_ASSERT_EQUAL_INT(RCN_ERR_SYNTAX_ERROR, result.state.errorCode); + TEST_ASSERT_EQUAL_STRING( + "Syntax error detected in source code", + result.state.errorMessage + ); +} + +void testRPhysicalLineCountIsCorrect(void) { + RcnSourceFile* file = newSourceFile(TEST_SAMPLE); + readSourceFileContent(file); + RcnCountResult result = rcnCountPhysicalLines(file->content); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(227, result.count); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); +} + +void testRPhysicalLineCountWithSyntacticallyIncorrectCode(void) { + RcnSourceText source = { + .text = rSourceWithSyntaxError, + .size = strlen(rSourceWithSyntaxError) + }; + RcnCountResult result = rcnCountPhysicalLines(source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(3, result.count); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); +} + +void testRLogicalLineCountMarksAreCorrect(void) { + RcnSourceFile* file = newSourceFile(TEST_SAMPLE); + RcnSourceFile* goldenSample = newSourceFile(TEST_SAMPLE_ANNOTATED); + readSourceFileContent(file); + readSourceFileContent(goldenSample); + RcnSourceText actual = rcnMarkLogicalLinesInSourceText( + RCN_LANG_R, + file->content + ); + TEST_ASSERT_NOT_NULL(actual.text); + TEST_ASSERT_EQUAL_INT(7127, actual.size); + TEST_ASSERT_EQUAL_STRING(goldenSample->content.text, actual.text); + freeSourceFile(file); + freeSourceFile(goldenSample); + free(actual.text); +} + +void testRLogicalLineCountMarksForFilePathInput(void) { + char* path = TEST_SAMPLE; + RcnSourceFile* goldenSample = newSourceFile(TEST_SAMPLE_ANNOTATED); + readSourceFileContent(goldenSample); + RcnSourceText annotated = rcnMarkLogicalLinesInFile(path); + TEST_ASSERT_NOT_NULL(annotated.text); + TEST_ASSERT_EQUAL_INT(7127, annotated.size); + TEST_ASSERT_EQUAL_STRING(goldenSample->content.text, annotated.text); + freeSourceFile(goldenSample); + rcnFreeSourceText(&annotated); +} + +void testRCountAllIsCorrect(void) { + RcnCountStatistics* stats = rcnCreateCountStatistics(TEST_RES_DIR); + RcnStatOptions options = {0}; + options.formats = RCN_OPT_LANG_R; + rcnCount(stats, options); + TEST_ASSERT_TRUE(stats->state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, stats->state.errorCode); + TEST_ASSERT_NULL(stats->state.errorMessage); + TEST_ASSERT_EQUAL_INT(300, stats->totalLogicalLines); + TEST_ASSERT_EQUAL_INT(486, stats->totalPhysicalLines); + TEST_ASSERT_EQUAL_INT(14207, stats->totalSourceSize); + TEST_ASSERT_EQUAL_INT(300, stats->logicalLines[RCN_LANG_R]); + TEST_ASSERT_EQUAL_INT(486, stats->physicalLines[RCN_LANG_R]); + TEST_ASSERT_EQUAL_INT(14207, stats->sourceSize[RCN_LANG_R]); + TEST_ASSERT_EQUAL_INT(3, stats->count.size); + TEST_ASSERT_EQUAL_INT(3, stats->count.sizeProcessed); + RcnSourceFile* filelist = stats->count.files; + TEST_ASSERT_EQUAL_STRING("sample.R", filelist[0].name); + TEST_ASSERT_EQUAL_STRING("sample_annotated.R", filelist[1].name); + TEST_ASSERT_EQUAL_STRING("sample_min_formatting.R", filelist[2].name); + rcnFreeCountStatistics(stats); +} + +void testRLogicalLineCountForMinimizedFormattingIsCorrect(void) { + RcnSourceFile* file = newSourceFile(TEST_SAMPLE_MIN_FORMATTING); + readSourceFileContent(file); + RcnCountResult result = rcnCountLogicalLines( + RCN_LANG_R, + file->content + ); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(100, result.count); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); +} + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(testRLogicalLineCountIsCorrect); + RUN_TEST(testRLogicalLineCountIsLenientWithSyntaxError); + RUN_TEST(testRLogicalLineCountFailsWithSyntaxError); + RUN_TEST(testRPhysicalLineCountIsCorrect); + RUN_TEST(testRPhysicalLineCountWithSyntacticallyIncorrectCode); + RUN_TEST(testRLogicalLineCountMarksAreCorrect); + RUN_TEST(testRLogicalLineCountMarksForFilePathInput); + RUN_TEST(testRCountAllIsCorrect); + RUN_TEST(testRLogicalLineCountForMinimizedFormattingIsCorrect); + return UNITY_END(); +} diff --git a/src/lib/tests/unit/c/test_statistics.c b/src/lib/tests/unit/c/test_statistics.c index 6e3345f..d6c5e56 100644 --- a/src/lib/tests/unit/c/test_statistics.c +++ b/src/lib/tests/unit/c/test_statistics.c @@ -436,8 +436,8 @@ void testCountResultsR(void) { TEST_ASSERT_NULL(result->state.errorMessage); TEST_ASSERT_EQUAL_STRING("sample.R", file->name); TEST_ASSERT_TRUE(result->isProcessed); - TEST_ASSERT_FALSE(result->hasLogicalLines); - TEST_ASSERT_EQUAL_INT(0, result->logicalLines); + TEST_ASSERT_TRUE(result->hasLogicalLines); + TEST_ASSERT_EQUAL_INT(3, result->logicalLines); TEST_ASSERT_EQUAL_INT(4, result->physicalLines); TEST_ASSERT_EQUAL_INT(9, result->words); TEST_ASSERT_EQUAL_INT(28, result->characters); diff --git a/src/scount/c/print.c b/src/scount/c/print.c index 5d6334d..6190873 100644 --- a/src/scount/c/print.c +++ b/src/scount/c/print.c @@ -626,6 +626,7 @@ static void prSummaryRows( break; case RCN_LANG_R: label = "R"; + hasLogicalLines = true; break; case RCN_LANG_C: label = "C"; diff --git a/src/scount/tests/functionality/res/expected/mixed.txt b/src/scount/tests/functionality/res/expected/mixed.txt index 3a5759d..8063cc5 100644 --- a/src/scount/tests/functionality/res/expected/mixed.txt +++ b/src/scount/tests/functionality/res/expected/mixed.txt @@ -6,7 +6,7 @@ Scanned files: 32 | Sample1.cmake | n/a | 5 | 12 | 134 | 134 | | Sample1.java | 3 | 12 | 34 | 233 | 233 | | Sample2.java | 4 | 13 | 38 | 286 | 286 | - | sample1.R | n/a | 4 | 9 | 28 | 28 | + | sample1.R | 3 | 4 | 9 | 28 | 28 | | sample1.c | 4 | 10 | 29 | 180 | 180 | | sample1.css | n/a | 8 | 17 | 98 | 98 | | sample1.html | n/a | 9 | 14 | 121 | 121 | @@ -28,7 +28,7 @@ Scanned files: 32 | sample2.json | n/a | 23 | 35 | 311 | 311 | | sample2.md | n/a | 1 | 8 | 53 | 53 | | sample2.py | 7 | 13 | 32 | 264 | 264 | - | sample2.r | n/a | 4 | 9 | 58 | 58 | + | sample2.r | 3 | 4 | 9 | 58 | 58 | | sample2.sql | n/a | 11 | 31 | 170 | 170 | | sample2.ts | 6 | 13 | 24 | 220 | 220 | | sample2.txt | n/a | 2 | 13 | 75 | 75 | @@ -53,10 +53,10 @@ Summary: | Python | 13 | 25 | 60 | 493 | 493 | | JavaScript | 10 | 19 | 37 | 331 | 331 | | TypeScript | 11 | 27 | 51 | 436 | 436 | - | R | n/a | 8 | 18 | 86 | 86 | + | R | 6 | 8 | 18 | 86 | 86 | | Shell | 8 | 18 | 41 | 250 | 250 | o==========================o===========o===========o===========o===========o===========o - | Total: | 58 | 293 | 655 | 5069 | 5069 | + | Total: | 64 | 293 | 655 | 5069 | 5069 | o==========================o===========o===========o===========o===========o===========o