danieeeld2 · danieeeld2 · Apr 8, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/R/scores.R b/R/scores.R
@@ -0,0 +1,194 @@
+#' Compute and plot scores.
+#'
+#' @param model A list with model parameters:
+#'   - scores: Matrix [N x A] of data scores
+#'   - lvs: Vector of latent variables to use (e.g., 1:2)
+#'   - type: "PCA" or "PLS" (optional)
+#'
+#' @param obstest Optional [L x M] matrix with external observations
+#' @param plottype "Scatter" (default) or "Bars"
+#' @param plotcal Logical. If TRUE (default), plot both calibration and test. If FALSE, only test
+#' @param tit Plot title (default: "")
+#' @param label Labels for observations
+#' @param classes Classes for visualization
+#' @param blur Label density control (default: 1)
+#' @param color Color palette or vector
+#'
+#' @return List of ggplot objects
+#'
+scores <- function(model,
+                   obstest = NULL,
+                   plottype = "Scatter",
+                   plotcal = TRUE,
+                   tit = "",
+                   blur = 1,
+                   label = NULL,
+                   classes = NULL,
+                   color = NULL) {
+
+  # Basic input validation
+  if (missing(model)) stop("Model argument is required")
+
+  # Extract model components
+  T_cal <- model$scores
+  N <- nrow(T_cal)
+  M <- ifelse(!is.null(model$loads), nrow(model$loads), ncol(T_cal))
+  A <- length(model$lvs)
+
+  # Validate blur parameter
+  if (!is.null(blur)) {
+    if (length(blur) != 1 || !is.numeric(blur)) {
+      stop("Parameter 'BlurIndex' must be a single numeric value")
+    }
+  }
+
+  # Determine data dimensions
+  L <- ifelse(is.null(obstest), 0, nrow(obstest))
+
+  # Check if test data is embedded in the model
+  if (is.null(obstest) && !is.null(model$test)) {
+    obstest <- model$test
+    L <- nrow(obstest)
+  }
+
+  original_plotcal <- plotcal  # Save original value to decide on label initialization
+  if (!plotcal && L == 0) {
+    warning("plotcal=FALSE specified but no test data provided. Using calibration data instead.")
+    plotcal <- TRUE  # Force plotcal=TRUE to show calibration data
+  }
+
+  # K depends on plotcal and the presence of test data
+  if (plotcal) {
+    K <- N + L
+  } else {
+    K <- L # K should be L if plotting only test data
+  }
+
+  # Set default labels if not provided
+  if (is.null(label)) {
+    if (plotcal) {
+      label <- c(1:N, 1:L)
+    } else {
+      label <- 1:L
+    }
+    label <- as.character(label)
+  } else {
+    # Ensure label has the correct length
+    if (length(label) < K) {
+      warning("Not enough labels provided. Extending with numeric indices.")
+      additional_labels <- as.character((length(label) + 1):K)
+      label <- c(label, additional_labels)
+    } else if (length(label) > K) {
+      warning("Too many labels provided. Truncating to match observation count.")
+      label <- label[1:K]
+    }
+  }
+
+  # Set default classes if not provided
+  if (is.null(classes)) {
+    if (plotcal) {
+      classes <- c(rep(1, N), rep(2, L)) # Default to two classes if plotcal=TRUE and no classes provided
+    } else {
+      classes <- rep(1, L) # Use single class for test data if plotcal is FALSE
+    }
+  } else {
+    # Ensure classes has the correct length
+    if (length(classes) < K) {
+      warning("Not enough class labels provided. Repeating pattern cyclically.")
+      classes_pattern <- classes
+      classes <- rep(classes_pattern, length.out = K)
+    } else if (length(classes) > K) {
+      warning("Too many class labels provided. Truncating to match observation count.")
+      classes <- classes[1:K]
+    }
+  }
+
+  # Convert row arrays to column vectors if needed
+  if (length(label) > 0 && !is.vector(label)) {
+    label <- as.vector(label)
+  }
+  if (length(classes) > 0 && !is.vector(classes)) {
+    classes <- as.vector(classes)
+  }
+
+  # Verify dimensions of inputs
+  if (K > 0) {
+    if (length(label) != K) {
+      stop(sprintf("Label length must match number of observations (got %d, need %d)", 
+                  length(label), K))
+    }
+    if (length(classes) != K) {
+      stop(sprintf("Classes length must match number of observations (got %d, need %d)", 
+                  length(classes), K))
+    }
+  }
+
+  # Calculate diagonal of scores variance 
+  d <- diag(t(T_cal) %*% T_cal)
+
+  # Use alternative scores (scoresV) if provided
+  if (!is.null(model$scoresV)) {
+    T_cal <- model$scoresV
+  }
+
+  # Process test data if available
+  if (!is.null(obstest)) {
+    if (!is.null(model$av)) {
+      testcs <- scale(obstest, center = model$av, scale = model$sc)
+    } else {
+      testcs <- obstest
+    }
+    TT <- testcs %*% model$loads
+  } else {
+    TT <- NULL
+  }
+
+  # Combine calibration and test data
+  if (plotcal) {
+    if (is.null(TT)) {
+      Tt <- T_cal
+    } else {
+      Tt <- rbind(T_cal, TT)
+    }
+  } else {
+    Tt <- TT # If plotcal is FALSE, use only test data
+  }
+
+  # Initialize plot list
+  figH <- list()
+
+  # Set dimension label prefix (PC for PCA, LV for PLS)
+  dim <- ifelse(!is.null(model$type) && model$type == "PLS", "LV", "PC")
+
+  # Generate bar plots
+  if (plottype == "Bars" || A == 1) {
+    for (i in seq_along(model$lvs)) {
+      var_explained <- 100 * d[i] / model$var
+      fig <- plotVec(Tt[, i],
+                   EleLabel = label,
+                   ObsClass = classes,
+                   XYLabel = c("", sprintf("Scores %s %d (%.0f%%)", dim, model$lvs[i], var_explained)),
+                   Color = color) + ggplot2::ggtitle(tit) + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
+      figH[[length(figH) + 1]] <- fig
+    }
+  } 
+  # Generate scatter plots for multiple LVs
+  else if (plottype == "Scatter") {
+    for (i in 1:(length(model$lvs)-1)) {
+      for (j in (i+1):length(model$lvs)) {
+        var_explained_i <- 100 * d[i] / model$var
+        var_explained_j <- 100 * d[j] / model$var
+        fig <- plotScatter(data.frame(x = Tt[, i], y = Tt[, j]),
+                         EleLabel = label,
+                         ObsClass = classes,
+                         XYLabel = c(sprintf("Scores %s %d (%.0f%%)", dim, model$lvs[i], var_explained_i),
+                                    sprintf("Scores %s %d (%.0f%%)", dim, model$lvs[j], var_explained_j)),
+                         BlurIndex = blur,
+                         Color = color) + ggplot2::ggtitle(tit) + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
+        figH[[length(figH) + 1]] <- fig
+      }
+    }
+  }
+
+  return(figH)
+}
diff --git a/datasets/tests_datasets/scores_model_1.json b/datasets/tests_datasets/scores_model_1.json
@@ -0,0 +1,50 @@
+{
+	"lvs":[1,2],
+	"av":[-0.05744121819,-0.04610750026,0.07865259888,-0.09259301104,0.05515053534,0.001296228926,0.1009389346,0.04796459508],
+	"sc":[0.2112785503,0.2141683989,0.2039961381,0.1977713633,0.2119170105,0.2193022523,0.1934461033,0.2137406915],
+	"var":168,
+	"loads":[
+		[-0.2607946191,0.4288078372],
+		[-0.02243169211,0.5682233172],
+		[0.5085789886,0.1407437035],
+		[-0.5073316697,0.0674929658],
+		[0.4332101519,0.3128258958],
+		[0.06369529021,-0.1964959504],
+		[0.4677329879,0.01018372268],
+		[0.07030434876,-0.5764612756]
+	],
+	"scores":[
+		[-2.497934506,-0.5671854997],
+		[1.887156091,-2.224244254],
+		[-1.399984815,-0.6832581672],
+		[-1.196648172,1.137033478],
+		[-1.98555465,-0.3502668725],
+		[-0.4346434839,1.396657429],
+		[2.949917101,-3.34430368],
+		[-3.179809806,-2.040648426],
+		[1.179994668,1.094934691],
+		[-3.816931929,-1.33651409],
+		[0.2244036392,3.316243412],
+		[1.544807628,0.6975247763],
+		[0.008346960911,1.613974351],
+		[0.1329062224,-0.2009980132],
+		[0.9032452814,2.225993709],
+		[-0.7956657735,1.452998642],
+		[1.991181121,0.8245904218],
+		[0.2259775424,-2.354844647],
+		[-1.184983419,0.7576598263],
+		[1.160328624,-1.536987906],
+		[1.41399257,0.9349032557],
+		[2.869899105,-0.8132624366]
+	],
+	"test":[
+		[0.3299458468,0.1281295226,-0.1858099563,0.1337719624,-0.1097119432,-0.2624823971,-0.05895489166,-0.1595546689],
+		[0.0480192052,0.2692993425,0.3078710896,-0.1842345762,0.3539255963,0.003715983995,0.1878482577,-0.2384057934],
+		[0.1718357617,0.1405897995,-0.04684492593,0.1301970939,-0.002903070264,-0.1024467816,-0.08488036198,-0.1617999161],
+		[0.002862538325,-0.1757983248,0.1035200217,-0.2698331006,0.03077749005,-0.2380429948,0.3012845996,0.1591422274]
+	],
+	"info":{
+		"date_created":"07-Apr-2025 18:10:31",
+		"description":"PCA model with 22 observations and 8 variables"
+	}
+}
diff --git a/datasets/tests_datasets/scores_model_2.json b/datasets/tests_datasets/scores_model_2.json
@@ -0,0 +1,51 @@
+{
+	"lvs":[1,2],
+	"av":[0.03329815259,-0.002113769145,-0.00727511607,-0.01791555838,0.0181204887,0.01331695623,-0.03366679838],
+	"sc":[0.2115417818,0.2142531675,0.2141349056,0.2134795874,0.213461504,0.2138309814,0.2114807722],
+	"var":154,
+	"loads":[
+		[-0.1440618338,0.4532651504],
+		[-0.03895062741,0.5655163957],
+		[0.614102781,-0.002283312136],
+		[0.5666107008,0.1702562687],
+		[0.4895182761,0.1878616593],
+		[-0.1937813515,0.5819598056],
+		[0.04878288625,0.2679184217]
+	],
+	"scores":[
+		[-2.410956208,-0.8698731991],
+		[0.06963625603,1.861514174],
+		[0.3537403344,0.4567187175],
+		[1.522447622,0.3324497628],
+		[-2.25753684,-1.170660161],
+		[0.08346270469,3.659113768],
+		[-0.1745475236,-0.8160454549],
+		[-0.2869973488,-2.880836526],
+		[-0.3858618569,0.9231995475],
+		[0.535822078,0.8802162811],
+		[1.010515063,0.1386996778],
+		[-0.3129182742,1.325776459],
+		[-0.8966576005,0.432210226],
+		[0.3166699353,0.9611768881],
+		[1.79325368,-0.8594629929],
+		[-0.297875603,-0.3016892343],
+		[-0.9895936397,0.5036323142],
+		[-2.078034764,-2.052596312],
+		[-1.263624681,1.272504058],
+		[0.1980375324,-0.08282311517],
+		[2.98887064,1.149662781],
+		[4.307796834,-3.191499177],
+		[-1.825648341,-1.671388484]
+	],
+	"test":[
+		[-0.1536011659,-0.33653334,0.04740609233,-0.1134028802,-0.01984467673,-0.2741423952,-0.2136521315],
+		[-0.2888682845,-0.1007280622,0.1124770522,0.1345841267,-0.007537701605,-0.2689572482,0.0633716201],
+		[0.06718533568,-0.1671209958,0.2772714735,0.1632900864,0.3012413467,-0.05401987624,-0.2243734243],
+		[0.2462352376,0.03459811259,0.090983012,0.01714523957,0.2452837678,0.1730081378,-0.1325927209],
+		[0.09711224577,-0.1635025794,-0.2842679596,-0.3414960529,-0.173391529,0.02474644063,-0.2374156993]
+	],
+	"info":{
+		"date_created":"07-Apr-2025 18:10:31",
+		"description":"PCA model with 23 observations and 7 variables"
+	}
+}
diff --git a/datasets/tests_datasets/scores_model_3.json b/datasets/tests_datasets/scores_model_3.json
@@ -0,0 +1,55 @@
+{
+	"lvs":[1,2],
+	"av":[-0.05046516772,0.03744608055,-0.04848813555,0.04265719518,-0.01821361447,0.02955812665,-0.01806126977,-0.0279441978,0.06804018506],
+	"sc":[0.1985711114,0.2015507356,0.1990835794,0.2004691469,0.2042982507,0.2029119194,0.2043123389,0.2031499903,0.1930310133],
+	"var":216,
+	"loads":[
+		[-0.3917719266,-0.252427059],
+		[-0.2673630084,-0.06994225794],
+		[-0.4341751015,-0.1670783448],
+		[0.4322151641,-0.3233258505],
+		[0.2940348356,0.4225249407],
+		[0.147129514,-0.5759132603],
+		[0.3706386582,0.07913846345],
+		[-0.2304053922,0.5313809839],
+		[0.3180417806,0.01009089824]
+	],
+	"scores":[
+		[-2.227004952,-0.2721331418],
+		[-1.672868049,0.3861182653],
+		[1.468293555,-3.679076294],
+		[4.259153212,-0.2488332945],
+		[0.4080508507,2.065251298],
+		[2.250682112,-0.884493507],
+		[-1.010184364,-0.5463821383],
+		[0.6793376701,-0.554051645],
+		[-0.4677036344,0.1563846215],
+		[1.820651232,0.3298167891],
+		[-1.657956496,2.173115092],
+		[-1.88595279,0.9022496586],
+		[-0.4106554368,-3.059528695],
+		[-2.782679026,0.2166793524],
+		[1.917068315,1.000889936],
+		[-0.9639979088,-1.627432048],
+		[3.442137485,2.797194789],
+		[0.5617209557,-1.054596487],
+		[-0.8643939603,-1.085874474],
+		[0.4437128847,0.8331680473],
+		[-0.7540208472,0.5040492752],
+		[-2.005914002,-2.691115138],
+		[0.2111191334,0.7093484136],
+		[-3.16037197,2.897897065],
+		[2.401776031,0.7313542597]
+	],
+	"test":[
+		[-0.0240348172,-0.2240423556,-0.03030852739,0.1846598814,0.1342586633,0.09968444627,0.2122738594,-0.1566664557,-0.05036586519],
+		[0.02704125842,-0.1696841394,-0.04240897382,0.2914281959,-0.03565016184,0.310035516,0.1919103697,-0.3314020981,0.03164838913],
+		[0.1681385098,0.2065525591,0.1540599095,-0.009949859546,-0.2964182558,0.1684475265,-0.2410191364,-0.1301008097,0.002705514291],
+		[-0.3445240939,0.007396544723,-0.3481516369,0.1520302263,0.1625332261,-0.04660418357,0.06702690532,0.006397204408,0.3336469533],
+		[-0.1281603276,0.2088051423,-0.1345464277,0.1445302173,-0.1746089641,0.159866949,-0.1366503244,-0.1202337602,0.2234095608]
+	],
+	"info":{
+		"date_created":"07-Apr-2025 18:10:31",
+		"description":"PCA model with 25 observations and 9 variables"
+	}
+}