From 9f3104ef7503efe2b2c467353f2dc2303da87ee5 Mon Sep 17 00:00:00 2001 From: Pasca Robert-Paul Date: Wed, 18 Feb 2026 14:52:42 +0000 Subject: [PATCH 1/2] Backends/ApMonBackend: update metric payload format --- src/Backends/ApMonBackend.cxx | 68 ++++++++++++++++++++++++++--------- src/Backends/ApMonBackend.h | 6 ++++ 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/src/Backends/ApMonBackend.cxx b/src/Backends/ApMonBackend.cxx index 80875f48..5eb84f6c 100644 --- a/src/Backends/ApMonBackend.cxx +++ b/src/Backends/ApMonBackend.cxx @@ -17,6 +17,8 @@ #include "ApMonBackend.h" #include #include +#include +#include #include "../MonLogger.h" #include "../Exceptions/MonitoringException.h" @@ -59,9 +61,28 @@ void ApMonBackend::addGlobalTag(std::string_view /*name*/, std::string_view valu mEntity += value; } +std::string ApMonBackend::getNodeName() +{ + const char* env_p = std::getenv("ALIEN_PROC_ID"); + if (env_p) { + return std::string(env_p); + } + + char hostname[HOST_NAME_MAX]; + if (gethostname(hostname, sizeof(hostname)) == 0) { + hostname[sizeof(hostname) - 1] = '\0'; + return std::string(hostname); + } + + MonLogger::Get(Severity::Error) << "Failed to get hostname, using 'unknown'" << MonLogger::End(); + return "unknown"; +} + void ApMonBackend::send(const Metric& metric) { - std::string name = metric.getName(); + std::string clusterName(mClusterName); + std::string metricName = metric.getName(); + std::string nodeName = getNodeName(); std::string entity = mEntity; for (const auto& [key, value] : metric.getTags()) { entity += ','; @@ -72,46 +93,59 @@ void ApMonBackend::send(const Metric& metric) if (mRunNumber != 0) entity += (",run=" + std::to_string(mRunNumber)); int valueSize = metric.getValuesSize(); + int totalParams = valueSize * 2; // each metric value has a source parameter char **paramNames, **paramValues; int* valueTypes; - paramNames = (char**)std::malloc(valueSize * sizeof(char*)); - paramValues = (char**)std::malloc(valueSize * sizeof(char*)); - valueTypes = (int*)std::malloc(valueSize * sizeof(int)); + paramNames = (char**)std::malloc(totalParams * sizeof(char*)); + paramValues = (char**)std::malloc(totalParams * sizeof(char*)); + valueTypes = (int*)std::malloc(totalParams * sizeof(int)); // the scope of values must be the same as sendTimedParameters method int intValue; double doubleValue; std::string stringValue; auto& values = metric.getValues(); + std::string sourceName = metricName + "_src"; - for (int i = 0; i < valueSize; i++) { - paramNames[i] = const_cast(values[i].first.c_str()); + for (int i = 0; i < valueSize; ++i) { + int metricIdx = i * 2; + int sourceIdx = metricIdx + 1; + paramNames[metricIdx] = const_cast(metricName.c_str()); std::visit(overloaded{ [&](int value) { - valueTypes[i] = XDR_INT32; + valueTypes[metricIdx] = XDR_INT32; intValue = value; - paramValues[i] = reinterpret_cast(&intValue); + paramValues[metricIdx] = reinterpret_cast(&intValue); }, [&](double value) { - valueTypes[i] = XDR_REAL64; + valueTypes[metricIdx] = XDR_REAL64; doubleValue = value; - paramValues[i] = reinterpret_cast(&doubleValue); + paramValues[metricIdx] = reinterpret_cast(&doubleValue); }, [&](const std::string& value) { - valueTypes[i] = XDR_STRING; + valueTypes[metricIdx] = XDR_STRING; stringValue = value; - paramValues[i] = const_cast(stringValue.c_str()); + paramValues[metricIdx] = const_cast(stringValue.c_str()); }, [&](uint64_t value) { - valueTypes[i] = XDR_REAL64; + valueTypes[metricIdx] = XDR_REAL64; doubleValue = static_cast(value); - paramValues[i] = reinterpret_cast(&doubleValue); + paramValues[metricIdx] = reinterpret_cast(&doubleValue); }, - }, values[i].second); + }, values[metricIdx].second); + + paramNames[sourceIdx] = const_cast(sourceName.c_str()); + valueTypes[sourceIdx] = XDR_STRING; + stringValue = entity; + paramValues[sourceIdx] = const_cast(stringValue.c_str()); } - mApMon->sendTimedParameters(const_cast(name.c_str()), const_cast(entity.c_str()), - valueSize, paramNames, valueTypes, paramValues, convertTimestamp(metric.getTimestamp())); + mApMon->sendTimedParameters( + const_cast(clusterName.c_str()), + const_cast(nodeName.c_str()), + totalParams, paramNames, valueTypes, paramValues, + convertTimestamp(metric.getTimestamp()) + ); std::free(paramNames); std::free(paramValues); diff --git a/src/Backends/ApMonBackend.h b/src/Backends/ApMonBackend.h index 95a0f356..5ba95d3b 100644 --- a/src/Backends/ApMonBackend.h +++ b/src/Backends/ApMonBackend.h @@ -68,8 +68,14 @@ class ApMonBackend final : public Backend /// \return timestamp as integer (milliseconds from epoch) int convertTimestamp(const std::chrono::time_point& timestamp); + /// Gets node name + /// It looks for environment variable ALIEN_PROC_ID and if it is not set, it uses hostname as node name + /// \return node name as string + std::string getNodeName(); + std::unique_ptr mApMon; ///< ApMon object std::string mEntity; ///< MonALISA entity, created out of global tags + inline static constexpr std::string_view mClusterName = "O2Monitoring_Nodes"; ///< MonALISA cluster name }; } // namespace backends From a1accf5d3a5c3d8a96d01c85bbcd75481aa03c85 Mon Sep 17 00:00:00 2001 From: Pasca Robert-Paul Date: Mon, 23 Feb 2026 22:51:06 +0000 Subject: [PATCH 2/2] Backends/ApMonBackend: aggregate metric array into unified payload --- src/Backends/ApMonBackend.cxx | 153 ++++++++++++++++++++-------------- src/Backends/ApMonBackend.h | 5 ++ 2 files changed, 95 insertions(+), 63 deletions(-) diff --git a/src/Backends/ApMonBackend.cxx b/src/Backends/ApMonBackend.cxx index 5eb84f6c..0158516c 100644 --- a/src/Backends/ApMonBackend.cxx +++ b/src/Backends/ApMonBackend.cxx @@ -17,8 +17,11 @@ #include "ApMonBackend.h" #include #include +#include +#include #include #include +#include #include "../MonLogger.h" #include "../Exceptions/MonitoringException.h" @@ -78,84 +81,108 @@ std::string ApMonBackend::getNodeName() return "unknown"; } -void ApMonBackend::send(const Metric& metric) +void ApMonBackend::sendBatch(const std::vector>& metrics) { std::string clusterName(mClusterName); - std::string metricName = metric.getName(); std::string nodeName = getNodeName(); - std::string entity = mEntity; - for (const auto& [key, value] : metric.getTags()) { - entity += ','; - entity += tags::TAG_KEY[key]; - entity += '='; - (value > 0) ? entity += tags::GetValue(value) : entity += std::to_string(0 - value); + + int totalValues = 0; + for (const auto& metric : metrics) { + totalValues += metric.get().getValuesSize(); } - if (mRunNumber != 0) entity += (",run=" + std::to_string(mRunNumber)); - - int valueSize = metric.getValuesSize(); - int totalParams = valueSize * 2; // each metric value has a source parameter - char **paramNames, **paramValues; - int* valueTypes; - paramNames = (char**)std::malloc(totalParams * sizeof(char*)); - paramValues = (char**)std::malloc(totalParams * sizeof(char*)); - valueTypes = (int*)std::malloc(totalParams * sizeof(int)); - // the scope of values must be the same as sendTimedParameters method - int intValue; - double doubleValue; - std::string stringValue; - - auto& values = metric.getValues(); - std::string sourceName = metricName + "_src"; - - for (int i = 0; i < valueSize; ++i) { - int metricIdx = i * 2; - int sourceIdx = metricIdx + 1; - paramNames[metricIdx] = const_cast(metricName.c_str()); - std::visit(overloaded{ - [&](int value) { - valueTypes[metricIdx] = XDR_INT32; - intValue = value; - paramValues[metricIdx] = reinterpret_cast(&intValue); - }, - [&](double value) { - valueTypes[metricIdx] = XDR_REAL64; - doubleValue = value; - paramValues[metricIdx] = reinterpret_cast(&doubleValue); - }, - [&](const std::string& value) { - valueTypes[metricIdx] = XDR_STRING; - stringValue = value; - paramValues[metricIdx] = const_cast(stringValue.c_str()); - }, - [&](uint64_t value) { - valueTypes[metricIdx] = XDR_REAL64; - doubleValue = static_cast(value); - paramValues[metricIdx] = reinterpret_cast(&doubleValue); - }, - }, values[metricIdx].second); - - paramNames[sourceIdx] = const_cast(sourceName.c_str()); - valueTypes[sourceIdx] = XDR_STRING; - stringValue = entity; - paramValues[sourceIdx] = const_cast(stringValue.c_str()); + const int totalParams = totalValues * 2; + std::vector intValues; + std::vector doubleValues; + std::vector stringValues; + std::vector paramNames; + std::vector paramValues; + std::vector valueTypes; + + intValues.reserve(totalValues); + doubleValues.reserve(totalValues); + stringValues.reserve(metrics.size() * 3 + totalValues); + paramNames.reserve(totalParams); + paramValues.reserve(totalParams); + valueTypes.reserve(totalParams); + + for (const auto& metric : metrics) { + std::string entity = mEntity; + for (const auto& [key, value] : metric.get().getTags()) { + entity += ','; + entity += tags::TAG_KEY[key]; + entity += '='; + (value > 0) ? entity += tags::GetValue(value) : entity += std::to_string(0 - value); + } + if (mRunNumber != 0) entity += (",run=" + std::to_string(mRunNumber)); + + auto& values = metric.get().getValues(); + const int valueSize = metric.get().getValuesSize(); + + const std::string_view metricName = metric.get().getName(); + stringValues.emplace_back(metricName); + const char* metriNamePtr = stringValues.back().c_str(); + stringValues.emplace_back(std::string(metricName) + "_src"); + const char* metriNameSrcPtr = stringValues.back().c_str(); + stringValues.push_back(std::move(entity)); + const char* entityPtr = stringValues.back().c_str(); + + for (int i = 0; i < valueSize; ++i) { + paramNames.push_back(const_cast(metriNamePtr)); + std::visit(overloaded{ + [&](int value) { + valueTypes.push_back(XDR_INT32); + intValues.push_back(value); + paramValues.push_back(reinterpret_cast(&intValues.back())); + }, + [&](double value) { + valueTypes.push_back(XDR_REAL64); + doubleValues.push_back(value); + paramValues.push_back(reinterpret_cast(&doubleValues.back())); + }, + [&](const std::string& value) { + valueTypes.push_back(XDR_STRING); + stringValues.push_back(value); + paramValues.push_back(const_cast(stringValues.back().c_str())); + }, + [&](uint64_t value) { + valueTypes.push_back(XDR_REAL64); + doubleValues.push_back(static_cast(value)); + paramValues.push_back(reinterpret_cast(&doubleValues.back())); + }, + }, values[i].second); + + paramNames.push_back(const_cast(metriNameSrcPtr)); + valueTypes.push_back(XDR_STRING); + paramValues.push_back(const_cast(entityPtr)); + } } mApMon->sendTimedParameters( const_cast(clusterName.c_str()), const_cast(nodeName.c_str()), - totalParams, paramNames, valueTypes, paramValues, - convertTimestamp(metric.getTimestamp()) + totalParams, paramNames.data(), valueTypes.data(), paramValues.data(), + convertTimestamp(metrics[0].get().getTimestamp()) ); +} - std::free(paramNames); - std::free(paramValues); - std::free(valueTypes); +void ApMonBackend::send(const Metric& metric) +{ + sendBatch(std::vector>{std::cref(metric)}); } void ApMonBackend::send(std::vector&& metrics) { - for (auto& metric : metrics) { - send(metric); + if (metrics.empty()) { + return; + } + + std::map>> metricsByTimestamp; + for (const auto& metric : metrics) { + metricsByTimestamp[convertTimestamp(metric.getTimestamp())].push_back(std::cref(metric)); + } + + for (const auto& [timestamp, metricsGroup] : metricsByTimestamp) { + sendBatch(metricsGroup); } } diff --git a/src/Backends/ApMonBackend.h b/src/Backends/ApMonBackend.h index 5ba95d3b..6cea8112 100644 --- a/src/Backends/ApMonBackend.h +++ b/src/Backends/ApMonBackend.h @@ -22,6 +22,7 @@ #include #include #include +#include namespace o2 { @@ -63,6 +64,10 @@ class ApMonBackend final : public Backend void addGlobalTag(std::string_view name, std::string_view value) override; private: + /// Sends batch of metrics + /// \param metrics vector of metrics + void sendBatch(const std::vector>& metrics); + /// Converts timestamp to format supported by ApMonBackend /// \param timestamp timestamp in std::chrono::time_point format /// \return timestamp as integer (milliseconds from epoch)