diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 75fff78..b6e634d 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -29,7 +29,7 @@ jobs: MODULE: ${{ inputs.module_name }} RELEASE_TYPE: ${{ inputs.release_type }} IS_DEV_RELEASE: ${{ inputs.is_dev_release }} - CURRENT_FULL_VERSION: ${{ vars.FULL_VERSION || '0.0.0' }} + CURRENT_FULL_VERSION: ${{ vars.MODULE_VERSION || '0.0.0' }} outputs: version: ${{ steps.version.outputs.NEXT_RELEASE_VERSION }} changelog: ${{ steps.changelog.outputs.NOTES }} @@ -166,15 +166,17 @@ jobs: echo "EOF" >> $GITHUB_OUTPUT - name: Printing out the release information as a step summary for validation + env: + CHANGELOG_NOTES: ${{ steps.changelog.outputs.NOTES }} run: | echo "### Release Preview: ${{ inputs.module_name }}" >> $GITHUB_STEP_SUMMARY echo "**New Version:** \`${{ steps.version.outputs.NEXT_RELEASE_VERSION }}\`" >> $GITHUB_STEP_SUMMARY echo "**Release Type:** ${{ inputs.is_dev_release && 'In Development (Dev)' || 'Stable' }}" >> $GITHUB_STEP_SUMMARY echo "#### Proposed Changelogs:" >> $GITHUB_STEP_SUMMARY - if [ -z "${{ steps.changelog.outputs.NOTES }}" ]; then + if [ -z "$CHANGELOG_NOTES" ]; then echo "*No descriptive changes found (commits may lack the 'Implemented Changes' section).*" >> $GITHUB_STEP_SUMMARY else - echo "${{ steps.changelog.outputs.NOTES }}" >> $GITHUB_STEP_SUMMARY + echo "$CHANGELOG_NOTES" >> $GITHUB_STEP_SUMMARY fi echo "" >> $GITHUB_STEP_SUMMARY echo "> **Note:** Please review the details above. If correct, approve the next job to finalize the release." >> $GITHUB_STEP_SUMMARY diff --git a/modules/helm/README.md b/modules/helm/README.md index 009358a..470ae2b 100644 --- a/modules/helm/README.md +++ b/modules/helm/README.md @@ -5,7 +5,7 @@ OpenTofu Module to deploy the following required helm charts: 2. [Cloudnative PG (including Barman Plugin)](https://cloudnative-pg.io/) 3. [Traefik](https://traefik.io/) 4. [Calico CNI](https://www.tigera.io/project-calico/) -5. [NetObserv](https://github.com/netobserv) +5. [External Secrets](https://external-secrets.io) ## Providers @@ -22,7 +22,6 @@ OpenTofu Module to deploy the following required helm charts: | [helm_release.cnpg](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [helm_release.cnpg_barman_plugin](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [helm_release.external_secrets](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [helm_release.netobserv](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | | [helm_release.traefik](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | ## Inputs @@ -34,7 +33,6 @@ OpenTofu Module to deploy the following required helm charts: | [cnpg\_barman\_configuration](#input\_cnpg\_barman\_configuration) | Dictionary filled with Cloud Native PG Barman Configuration Details | `map(string)` |
{
"chart": "plugin-barman-cloud",
"name": "cnpg-barman",
"namespace": "cnpg-system",
"repository": "https://cloudnative-pg.github.io/charts",
"version": "v0.2.0"
} | no |
| [cnpg\_configuration](#input\_cnpg\_configuration) | Dictionary filled with Cloud Native PG Operator Configuration Details | `map(string)` | {
"chart": "cloudnative-pg",
"create_namespace": true,
"name": "cnpg",
"namespace": "cnpg-system",
"repository": "https://cloudnative-pg.github.io/charts",
"version": "v0.26.0"
} | no |
| [external\_secrets\_configuration](#input\_external\_secrets\_configuration) | Dictionary filled with External Secrets Operator Configuration Details | `map(string)` | {
"chart": "external-secrets",
"create_namespace": true,
"name": "external-secrets",
"namespace": "external-secrets",
"repository": "https://charts.external-secrets.io",
"version": "2.1.0"
} | no |
-| [netobserv\_configuration](#input\_netobserv\_configuration) | Dictionary filled with Netobserv Operator Configuration Details | `map(string)` | {
"chart": "netobserv-operator",
"create_namespace": true,
"name": "netobserv",
"namespace": "netobserv",
"repository": "https://netobserv.io/static/helm",
"version": "1.11.0"
} | no |
| [server\_node\_selector](#input\_server\_node\_selector) | Node Selector Label Value to be used for deploying required foundation components | `string` | n/a | yes |
| [traefik\_configuration](#input\_traefik\_configuration) | Dictionary filled with Traefik Controller Configuration Details | `map(string)` | {
"chart": "traefik",
"create_namespace": "true",
"name": "traefik",
"namespace": "traefik",
"repository": "https://traefik.github.io/charts",
"version": "v39.0.7"
} | no |
diff --git a/modules/helm/calico.tf b/modules/helm/calico.tf
index b5b1084..7636b83 100644
--- a/modules/helm/calico.tf
+++ b/modules/helm/calico.tf
@@ -35,6 +35,11 @@ resource "helm_release" "calico" {
{
name = "installation.calicoNetwork.mtu"
value = "1440"
+ },
+ {
+ name = "kubeletVolumePluginPath"
+ value = "None"
+ type = "string"
}
]
diff --git a/modules/helm/netobserv.tf b/modules/helm/netobserv.tf
deleted file mode 100644
index 2f9081c..0000000
--- a/modules/helm/netobserv.tf
+++ /dev/null
@@ -1,12 +0,0 @@
-# Netobserv Configuration
-resource "helm_release" "netobserv" {
- name = var.netobserv_configuration.name
- namespace = var.netobserv_configuration.namespace
- repository = var.netobserv_configuration.repository
- chart = var.netobserv_configuration.chart
- version = var.netobserv_configuration.version
- create_namespace = var.netobserv_configuration.create_namespace
-
- depends_on = [ helm_release.calico, helm_release.cert-manager ]
- timeout = 1800
-}
diff --git a/modules/helm/variables.tf b/modules/helm/variables.tf
index b485807..cff16ba 100644
--- a/modules/helm/variables.tf
+++ b/modules/helm/variables.tf
@@ -66,20 +66,6 @@ variable "calico_configuration" {
}
}
-# --------------- NETOBSERV VARIABLES --------------- #
-variable "netobserv_configuration" {
- description = "Dictionary filled with Netobserv Operator Configuration Details"
- type = map(string)
- default = {
- "name" = "netobserv"
- "namespace" = "netobserv"
- "repository" = "https://netobserv.io/static/helm"
- "chart" = "netobserv-operator"
- "version" = "1.11.0"
- "create_namespace" = true
- }
-}
-
# --------------- EXTERNAL SECRETS VARIABLES --------------- #
variable "external_secrets_configuration" {
description = "Dictionary filled with External Secrets Operator Configuration Details"
diff --git a/modules/keycloak/deployment.tf b/modules/keycloak/deployment.tf
index 651d622..5323a6c 100644
--- a/modules/keycloak/deployment.tf
+++ b/modules/keycloak/deployment.tf
@@ -2,7 +2,7 @@
resource "kubernetes_stateful_set" "keycloak_cluster" {
metadata {
name = "keycloak-cluster"
- namespace = var.namespace
+ namespace = kubernetes_namespace.namespace.metadata[0].name
labels = {
app = "keycloak"
component = "statefulset"
@@ -238,7 +238,7 @@ resource "kubernetes_stateful_set" "keycloak_cluster" {
limits = {
"cpu" = "500m"
- "memory" = "1Gi"
+ "memory" = "2Gi"
}
}
diff --git a/modules/keycloak/ingress.tf b/modules/keycloak/ingress.tf
index a817d27..2db278e 100644
--- a/modules/keycloak/ingress.tf
+++ b/modules/keycloak/ingress.tf
@@ -2,7 +2,7 @@
resource "kubernetes_ingress_v1" "ingress" {
metadata {
name = "ingress"
- namespace = var.namespace
+ namespace = kubernetes_namespace.namespace.metadata[0].name
labels = {
app = var.app_name
component = "ingress"
diff --git a/modules/keycloak/networkpolicy.tf b/modules/keycloak/networkpolicy.tf
index 35e4fd1..c55fb83 100644
--- a/modules/keycloak/networkpolicy.tf
+++ b/modules/keycloak/networkpolicy.tf
@@ -2,7 +2,7 @@
resource "kubernetes_network_policy" "keycloak_network_access_policy" {
metadata {
name = "keycloak-network-access-policy"
- namespace = var.namespace
+ namespace = kubernetes_namespace.namespace.metadata[0].name
}
spec {
policy_types = ["Ingress", "Egress"]
diff --git a/modules/keycloak/service.tf b/modules/keycloak/service.tf
index 381be97..e39a133 100644
--- a/modules/keycloak/service.tf
+++ b/modules/keycloak/service.tf
@@ -2,7 +2,7 @@
resource "kubernetes_service" "keycloak_discovery" {
metadata {
name = "keycloak-discovery"
- namespace = var.namespace
+ namespace = kubernetes_namespace.namespace.metadata[0].name
}
spec {
@@ -25,7 +25,7 @@ resource "kubernetes_service" "keycloak_discovery" {
resource "kubernetes_service" "keycloak_service" {
metadata {
name = "keycloak-cluster-service"
- namespace = var.namespace
+ namespace = kubernetes_namespace.namespace.metadata[0].name
}
spec {
diff --git a/modules/observability/README.md b/modules/observability/README.md
index 9914221..879a901 100644
--- a/modules/observability/README.md
+++ b/modules/observability/README.md
@@ -7,8 +7,7 @@ These components are being deployed as part of the Observability Module:
2. [VictoriaLogs](https://victoriametrics.com/products/victorialogs/) for logs storage generated from the cluster.
3. [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) for receiving and processing and export telemetry data to the storage databases.
4. [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) to generate and expose cluster-level metrics.
-5. [NetObserv](https://github.com/netobserv) for components related to Network Observability in Kubernetes based on eBPF.
-6. [Grafana](https://grafana.com/oss/grafana/?plcmt=oss-nav) for the visual layer for observability.
+5. [Grafana](https://grafana.com/oss/grafana/?plcmt=oss-nav) for the visual layer for observability.
## Providers
@@ -26,6 +25,8 @@ These components are being deployed as part of the Observability Module:
| [helm_release.logs](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.metrics](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.otel_collector](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [kubernetes_config_map.goldmane_api_proto](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/config_map) | resource |
+| [kubernetes_deployment.goldmane_otel_adapter](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/deployment) | resource |
| [kubernetes_ingress_v1.ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/ingress_v1) | resource |
| [kubernetes_manifest.certificate_authority](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.grafana_credentials_sync](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
@@ -34,7 +35,6 @@ These components are being deployed as part of the Observability Module:
| [kubernetes_manifest.issuer](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.middleware_buffering](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.middleware_rewrite](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
-| [kubernetes_manifest.network_observability](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.password_generator](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.public_issuer](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
| [kubernetes_manifest.push_grafana_credentials](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/manifest) | resource |
diff --git a/modules/observability/configmap.tf b/modules/observability/configmap.tf
new file mode 100644
index 0000000..67251ea
--- /dev/null
+++ b/modules/observability/configmap.tf
@@ -0,0 +1,11 @@
+# ConfigMap for setting up the proto file for querying Goldmane GRPC API
+resource "kubernetes_config_map" "goldmane_api_proto" {
+ metadata {
+ name = "goldmane-api-proto"
+ namespace = "calico-system"
+ }
+
+ data = {
+ "api.proto" = file("${path.module}/proto/api.proto")
+ }
+}
diff --git a/modules/observability/dashboards/cluster.json b/modules/observability/dashboards/cluster.json
index bb93e0a..ea03265 100644
--- a/modules/observability/dashboards/cluster.json
+++ b/modules/observability/dashboards/cluster.json
@@ -18,7 +18,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
- "id": 0,
+ "id": 9,
"links": [],
"panels": [
{
@@ -415,7 +415,7 @@
"uid": "P4169E866C3094E38"
},
"editorMode": "code",
- "expr": "sum(container_fs_usage_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\",instance=~\"^$Node$\"}) / sum(container_fs_limit_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\",instance=~\"^$Node$\"}) * 100",
+ "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$Node$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$Node$\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "",
@@ -780,7 +780,7 @@
}
]
},
- "unit": "bytes"
+ "unit": "decbytes"
},
"overrides": []
},
@@ -817,7 +817,7 @@
"uid": "P4169E866C3094E38"
},
"editorMode": "code",
- "expr": "sum(container_fs_usage_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\",instance=~\"^$Node$\"})",
+ "expr": "max(container_fs_usage_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|nvme[0-9]n[0-9]p[0-9])$\",id=\"/\",instance=~\"^$Node$\"})",
"interval": "10s",
"intervalFactor": 1,
"range": true,
@@ -897,7 +897,7 @@
"uid": "P4169E866C3094E38"
},
"editorMode": "code",
- "expr": "sum(container_fs_limit_bytes{device=~\"^/dev/[sv]d[a-z][1-9]$\",id=\"/\",instance=~\"^$Node$\"})",
+ "expr": "max(container_fs_limit_bytes{device=~\"^/dev/([sv]d[a-z][1-9]|nvme[0-9]n[0-9]p[0-9])$\",id=\"/\",instance=~\"^$Node$\"})",
"interval": "10s",
"intervalFactor": 1,
"range": true,
@@ -2123,5 +2123,5 @@
"timezone": "browser",
"title": "Kubernetes Cluster Level Monitoring",
"uid": "c98856d7-3f9c-4ab2-bf54-11fd6c046ef1",
- "version": 4
+ "version": 1
}
\ No newline at end of file
diff --git a/modules/observability/dashboards/network.json b/modules/observability/dashboards/network.json
index a425d62..a3817d5 100644
--- a/modules/observability/dashboards/network.json
+++ b/modules/observability/dashboards/network.json
@@ -36,8 +36,8 @@
},
{
"datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
"fieldConfig": {
"defaults": {
@@ -51,13 +51,13 @@
}
]
},
- "unit": "short"
+ "unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
- "w": 6,
+ "w": 13,
"x": 0,
"y": 1
},
@@ -82,78 +82,23 @@
"pluginVersion": "12.3.3",
"targets": [
{
- "editorMode": "code",
- "expr": "sum(rate(netobserv_workload_egress_bytes_total{SrcK8S_Namespace=~\"$SrcNamespace\"}[5m]))",
- "legendFormat": "__auto",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Total Cluster Egress Rate",
- "type": "stat"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": 0
- }
- ]
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 6,
- "y": 1
- },
- "id": 5,
- "options": {
- "colorMode": "background",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "percentChangeColorMode": "standard",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
- },
- "showPercentChange": false,
- "textMode": "auto",
- "wideLayout": true
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
"editorMode": "code",
- "expr": "sum(rate(netobserv_workload_ingress_bytes_total{DstK8S_Namespace=~\"$DstNamespace\"}[5m]))",
- "legendFormat": "__auto",
- "range": true,
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.reporter:=\"Src\" AND flow.Key.sourceNamespace:~\"^$SrcNamespace$\"\n| stats sum(flow.bytesOut) as Total_Egress_Bytes",
+ "queryType": "statsRange",
"refId": "A"
}
],
- "title": "Total Cluster Ingress Rate",
+ "title": "Total Cluster Egress",
"type": "stat"
},
{
"datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
"fieldConfig": {
"defaults": {
@@ -173,11 +118,11 @@
},
"gridPos": {
"h": 8,
- "w": 6,
- "x": 12,
+ "w": 11,
+ "x": 13,
"y": 1
},
- "id": 6,
+ "id": 5,
"options": {
"colorMode": "background",
"graphMode": "none",
@@ -198,72 +143,17 @@
"pluginVersion": "12.3.3",
"targets": [
{
- "editorMode": "code",
- "expr": "histogram_quantile(0.95, sum(rate(netobserv_workload_rtt_seconds_bucket{SrcK8S_Namespace=~\"$SrcNamespace\"}[5m])) by (le))",
- "legendFormat": "__auto",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Global TCP Latency (P95)",
- "type": "stat"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": 0
- }
- ]
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
- "unit": "Bps"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 6,
- "x": 18,
- "y": 1
- },
- "id": 7,
- "options": {
- "colorMode": "background",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "auto",
- "percentChangeColorMode": "standard",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
- },
- "showPercentChange": false,
- "textMode": "auto",
- "wideLayout": true
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
"editorMode": "code",
- "expr": "sum(rate(netobserv_workload_drop_bytes_total{SrcK8S_Namespace=~\"$SrcNamespace\"}[5m]))",
- "legendFormat": "__auto",
- "range": true,
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.reporter:=\"Dst\" AND flow.Key.destNamespace:~\"^$DstNamespace$\"\n| stats sum(flow.bytesIn) as Total_Ingress_Bytes",
+ "queryType": "statsRange",
"refId": "A"
}
],
- "title": "Active Network Drops",
+ "title": "Total Cluster Ingress",
"type": "stat"
},
{
@@ -274,74 +164,6 @@
"x": 0,
"y": 9
},
- "id": 2,
- "panels": [],
- "title": "Visual Network Topology",
- "type": "row"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
- "gridPos": {
- "h": 23,
- "w": 24,
- "x": 0,
- "y": 10
- },
- "id": 1,
- "options": {
- "edges": {},
- "layoutAlgorithm": "force",
- "nodes": {},
- "zoomMode": "greedy"
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
- "editorMode": "code",
- "exemplar": false,
- "expr": "label_join(\n label_replace(\n label_replace(\n sum by (SrcK8S_Namespace, DstK8S_Namespace) (rate(netobserv_workload_flows_total{SrcK8S_Namespace!=\"\", DstK8S_Namespace!=\"\"}[5m])) > 0,\n \"target\", \"$1\", \"DstK8S_Namespace\", \"(.*)\"\n ),\n \"source\", \"$1\", \"SrcK8S_Namespace\", \"(.*)\"\n ),\n \"id\", \"-\", \"source\", \"target\"\n)",
- "format": "table",
- "instant": true,
- "legendFormat": "__auto",
- "range": false,
- "refId": "A"
- }
- ],
- "title": "Dynamic Namespace Topology Map",
- "transformations": [
- {
- "id": "organize",
- "options": {
- "excludeByName": {
- "DstK8S_Namespace": true,
- "SrcK8S_Namespace": true,
- "Time": true
- },
- "includeByName": {},
- "indexByName": {},
- "renameByName": {
- "Value": "mainStat"
- }
- }
- }
- ],
- "type": "nodeGraph"
- },
- {
- "collapsed": false,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 33
- },
"id": 8,
"panels": [],
"title": "Cross-Namespace Top Talkers",
@@ -349,8 +171,8 @@
},
{
"datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
"fieldConfig": {
"defaults": {
@@ -382,7 +204,7 @@
},
"showPoints": "auto",
"showValues": false,
- "spanNulls": false,
+ "spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
@@ -398,10 +220,6 @@
{
"color": "green",
"value": 0
- },
- {
- "color": "red",
- "value": 80
}
]
}
@@ -409,10 +227,10 @@
"overrides": []
},
"gridPos": {
- "h": 8,
- "w": 12,
+ "h": 14,
+ "w": 24,
"x": 0,
- "y": 34
+ "y": 10
},
"id": 9,
"options": {
@@ -431,110 +249,18 @@
"pluginVersion": "12.3.3",
"targets": [
{
- "editorMode": "code",
- "expr": "topk(10, sum by (SrcK8S_Namespace, DstK8S_Namespace) (rate(netobserv_workload_flows_total{SrcK8S_Namespace=~\"$SrcNamespace\", DstK8S_Namespace=~\"$DstNamespace\"}[5m])))",
- "legendFormat": "{{SrcK8S_Namespace}} -> {{DstK8S_Namespace}}",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Top 10 Cross-Namespace Flows",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "fixedColor": "red",
- "mode": "shades"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "barWidthFactor": 0.6,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "showValues": false,
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": 0
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
- "unit": "Bps"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 34
- },
- "id": 10,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "hideZeros": false,
- "mode": "single",
- "sort": "none"
- }
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
"editorMode": "code",
- "expr": "topk(10, sum by (SrcK8S_OwnerName) (rate(netobserv_workload_egress_bytes_total{SrcK8S_Namespace=~\"$SrcNamespace\", DstK8S_Namespace=\"\"}[5m])))",
- "legendFormat": "{{SrcK8S_OwnerName}} -> External",
- "range": true,
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.reporter:=\"Src\" \n| stats by (_time: 1m, flow.Key.sourceNamespace, flow.Key.destNamespace) sum(flow.bytesOut) as Bytes\n| sort by (Bytes) desc \n| limit 10",
+ "legendFormat": "{{ flow.Key.sourceNamespace }} -> {{ flow.Key.destNamespace }}",
+ "queryType": "statsRange",
"refId": "A"
}
],
- "title": "Top External Talkers",
+ "title": "Top 10 Cross-Namespace Flows",
"type": "timeseries"
},
{
@@ -543,7 +269,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 42
+ "y": 24
},
"id": 11,
"panels": [],
@@ -552,8 +278,8 @@
},
{
"datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
"fieldConfig": {
"defaults": {
@@ -585,7 +311,7 @@
},
"showPoints": "auto",
"showValues": false,
- "spanNulls": false,
+ "spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
@@ -616,7 +342,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 43
+ "y": 25
},
"id": 12,
"options": {
@@ -635,10 +361,14 @@
"pluginVersion": "12.3.3",
"targets": [
{
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
+ },
"editorMode": "code",
- "expr": "topk(10, sum by (SrcK8S_OwnerName) (rate(netobserv_workload_egress_bytes_total{SrcK8S_Namespace=~\"$SrcNamespace\", SrcK8S_OwnerName=~\"$Pod\"}[5m])))",
- "legendFormat": "__auto",
- "range": true,
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.reporter:=\"Src\" AND flow.Key.sourceNamespace:~\"^$SrcNamespace$\"\n| stats by (_time: 1m, flow.Key.sourceName) sum(flow.bytesOut) as Egress_Bytes\n| sort by (Egress_Bytes) desc \n| limit 10",
+ "legendFormat": "{{ flow.Key.sourceName }}",
+ "queryType": "statsRange",
"refId": "A"
}
],
@@ -647,8 +377,8 @@
},
{
"datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
"fieldConfig": {
"defaults": {
@@ -680,7 +410,7 @@
},
"showPoints": "auto",
"showValues": false,
- "spanNulls": false,
+ "spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
@@ -707,7 +437,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 43
+ "y": 25
},
"id": 13,
"options": {
@@ -726,218 +456,18 @@
"pluginVersion": "12.3.3",
"targets": [
{
- "editorMode": "code",
- "expr": "topk(10, sum by (DstK8S_OwnerName) (rate(netobserv_workload_ingress_bytes_total{DstK8S_Namespace=~\"$DstNamespace\", DstK8S_OwnerName=~\"$Pod\"}[5m])))",
- "legendFormat": "__auto",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "Top 10 Pods by Ingress",
- "type": "timeseries"
- },
- {
- "collapsed": false,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 51
- },
- "id": 14,
- "panels": [],
- "title": "TCP Health & Network Drops",
- "type": "row"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "barWidthFactor": 0.6,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "showValues": false,
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": 0
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 52
- },
- "id": 15,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "hideZeros": false,
- "mode": "single",
- "sort": "none"
- }
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
- "editorMode": "code",
- "expr": "histogram_quantile(0.95, sum by (le, SrcK8S_OwnerName) (rate(netobserv_workload_rtt_seconds_bucket{SrcK8S_Namespace=~\"$SrcNamespace\", SrcK8S_OwnerName=~\"$Pod\"}[5m])))",
- "legendFormat": "{{ SrcK8S_OwnerName }}",
- "range": true,
- "refId": "A"
- }
- ],
- "title": "TCP Latency (RTT) by Pod",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "P4169E866C3094E38"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "fixedColor": "red",
- "mode": "shades"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "barWidthFactor": 0.6,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "showValues": false,
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": 0
- },
- {
- "color": "red",
- "value": 80
- }
- ]
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
},
- "unit": "Bps"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 52
- },
- "id": 16,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "hideZeros": false,
- "mode": "single",
- "sort": "none"
- }
- },
- "pluginVersion": "12.3.3",
- "targets": [
- {
"editorMode": "code",
- "expr": "sum by (SrcK8S_OwnerName) (rate(netobserv_workload_drop_bytes_total{SrcK8S_Namespace=~\"$SrcNamespace\", SrcK8S_OwnerName=~\"$Pod\"}[5m]))",
- "legendFormat": "Dropped: {{SrcK8S_OwnerName}}",
- "range": true,
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.reporter:=\"Dst\" AND flow.Key.destNamespace:~\"^$DstNamespace$\"\n| stats by (_time: 1m, flow.Key.destName) sum(flow.bytesIn) as Ingress_Bytes\n| sort by (Ingress_Bytes) desc \n| limit 10",
+ "legendFormat": "{{ flow.Key.destName }}",
+ "queryType": "statsRange",
"refId": "A"
}
],
- "title": "Dropped Traffic by Pod",
+ "title": "Top 10 Pods by Ingress",
"type": "timeseries"
},
{
@@ -946,7 +476,7 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 60
+ "y": 33
},
"id": 17,
"panels": [],
@@ -966,18 +496,20 @@
"h": 24,
"w": 24,
"x": 0,
- "y": 61
+ "y": 34
},
"id": 18,
"options": {
"dedupStrategy": "none",
- "enableInfiniteScrolling": false,
+ "detailsMode": "inline",
+ "enableInfiniteScrolling": true,
"enableLogDetails": true,
"prettifyLogMessage": true,
"showControls": true,
"showLabels": true,
"showTime": true,
"sortOrder": "Descending",
+ "syntaxHighlighting": true,
"wrapLogMessage": true
},
"pluginVersion": "12.3.3",
@@ -989,12 +521,12 @@
},
"direction": "desc",
"editorMode": "code",
- "expr": "source.k8s.namespace.name: \"$SrcNamespace\"",
+ "expr": "service.name:=\"goldmane-otel-adapter\"\n| unpack_json\n| filter flow.Key.sourceNamespace:~\"^$SrcNamespace$\" OR flow.Key.destNamespace:~\"^$SrcNamespace$\"",
"queryType": "instant",
"refId": "A"
}
],
- "title": "Real-Time eBPF Network Logs",
+ "title": "Real-Time Network Logs",
"type": "logs"
}
],
@@ -1013,16 +545,21 @@
"postgres"
]
},
- "definition": "label_values(netobserv_workload_egress_bytes_total,SrcK8S_Namespace)",
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
+ },
+ "definition": "service.name:=\"goldmane-otel-adapter\" | unpack_json | stats by (flow.Key.sourceNamespace) count() | keep flow.Key.sourceNamespace",
"includeAll": true,
"label": "Source Namespace",
- "multi": true,
"name": "SrcNamespace",
"options": [],
"query": {
- "qryType": 1,
- "query": "label_values(netobserv_workload_egress_bytes_total,SrcK8S_Namespace)",
- "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ "field": "flow.Key.sourceNamespace",
+ "limit": 25,
+ "query": "service.name:=\"goldmane-otel-adapter\" | unpack_json | stats by (flow.Key.sourceNamespace) count() | keep flow.Key.sourceNamespace",
+ "refId": "VictoriaLogsVariableQueryEditor-VariableQuery",
+ "type": "fieldValue"
},
"refresh": 1,
"regex": "",
@@ -1037,16 +574,21 @@
"garage"
]
},
- "definition": "label_values(netobserv_workload_ingress_bytes_total,DstK8S_Namespace)",
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
+ },
+ "definition": "service.name:=\"goldmane-otel-adapter\" | unpack_json | stats by (flow.Key.destNamespace) count() | keep flow.Key.destNamespace",
"includeAll": true,
"label": "Destination Namespace",
- "multi": true,
"name": "DstNamespace",
"options": [],
"query": {
- "qryType": 1,
- "query": "label_values(netobserv_workload_ingress_bytes_total,DstK8S_Namespace)",
- "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ "field": "flow.Key.destNamespace",
+ "limit": 25,
+ "query": "service.name:=\"goldmane-otel-adapter\" | unpack_json | stats by (flow.Key.destNamespace) count() | keep flow.Key.destNamespace",
+ "refId": "VictoriaLogsVariableQueryEditor-VariableQuery",
+ "type": "fieldValue"
},
"refresh": 1,
"regex": "",
@@ -1061,16 +603,22 @@
"postgresql-cluster"
]
},
- "definition": "label_values(netobserv_workload_egress_bytes_total{SrcK8S_Namespace=\"$SrcNamespace\"},SrcK8S_OwnerName)",
+ "datasource": {
+ "type": "victoriametrics-logs-datasource",
+ "uid": "PD775F2863313E6C7"
+ },
+ "definition": "service.name:=\"goldmane-otel-adapter\" | unpack_json | filter flow.Key.sourceNamespace:=\"$SrcNamespace\" | stats by (flow.Key.sourceName) count() | keep flow.Key.sourceName",
"includeAll": true,
"label": "Pod Name",
"multi": true,
"name": "Pod",
"options": [],
"query": {
- "qryType": 1,
- "query": "label_values(netobserv_workload_egress_bytes_total{SrcK8S_Namespace=\"$SrcNamespace\"},SrcK8S_OwnerName)",
- "refId": "PrometheusVariableQueryEditor-VariableQuery"
+ "field": "flow.Key.sourceName",
+ "limit": 1000,
+ "query": "service.name:=\"goldmane-otel-adapter\" | unpack_json | filter flow.Key.sourceNamespace:=\"$SrcNamespace\" | stats by (flow.Key.sourceName) count() | keep flow.Key.sourceName",
+ "refId": "VictoriaLogsVariableQueryEditor-VariableQuery",
+ "type": "fieldValue"
},
"refresh": 1,
"regex": "",
diff --git a/modules/observability/network_observability.tf b/modules/observability/network_observability.tf
index de6c665..5e74924 100644
--- a/modules/observability/network_observability.tf
+++ b/modules/observability/network_observability.tf
@@ -1,97 +1,91 @@
-resource "kubernetes_manifest" "network_observability" {
- manifest = {
- apiVersion = "flows.netobserv.io/v1beta2"
- kind = "FlowCollector"
- metadata = {
- // The operator expects this specific name
- name = "cluster"
+# Print out the network flow logs to stdout from Goldmane API
+resource "kubernetes_deployment" "goldmane_otel_adapter" {
+ metadata {
+ name = "goldmane-otel-adapter"
+ namespace = "calico-system"
+ }
+
+ spec {
+ replicas = 1
+
+ selector {
+ match_labels = {
+ app = "goldmane-otel-adapter"
+ }
}
- spec = {
- namespace = kubernetes_namespace.namespace.metadata[0].name
-
- // "Direct" mode sends logs straight to OTel
- deploymentModel = "Direct"
- agent = {
- type = "eBPF"
- ebpf = {
- // Required for "PacketDrop" to read kernel drop reasons
- privileged = true
-
- // Enable drop detection and TCP round trips metrics
- features = ["PacketDrop", "FlowRTT"]
+ template {
+ metadata {
+ labels = {
+ app = "goldmane-otel-adapter"
+ }
+ }
+
+ spec {
+ container {
+ name = "scraper"
+ image = "alpine:latest"
+ command = ["/bin/sh", "-c"]
- // 25 means 1 in 25 packets
- sampling = 25
- cacheActiveTimeout = "15s"
- cacheMaxFlows = 100000
+ args = [
+ <<-EOF
+ # Install jq and curl
+ apk add --no-cache jq curl
+
+ # Download and extract the grpcurl binary directly into our path
+ curl -sL https://github.com/fullstorydev/grpcurl/releases/download/v1.9.3/grpcurl_1.9.3_linux_x86_64.tar.gz | tar -xzf - -C /usr/local/bin grpcurl
+
+ # Run the stream and pipe it to jq
+ grpcurl -import-path /etc/proto -proto api.proto \
+ -cacert /etc/pki/tls/certs/tigera-ca-bundle.crt \
+ -cert /goldmane-key-pair/tls.crt \
+ -key /goldmane-key-pair/tls.key \
+ -d '{"start_time_gte": 0, "aggregation_interval": 5}' \
+ goldmane.calico-system.svc.cluster.local:7443 goldmane.Flows/Stream \
+ | jq --unbuffered -c '.'
+ EOF
+ ]
+
+ volume_mount {
+ name = "proto-file"
+ mount_path = "/etc/proto"
+ read_only = true
+ }
- // Ignore loopback traffic
- excludeInterfaces = ["lo"]
+ volume_mount {
+ name = "goldmane-ca-bundle"
+ mount_path = "/etc/pki/tls/certs"
+ read_only = true
+ }
- // Resource Constraints
- resources = {
- requests = {
- cpu = "50m"
- memory = "100Mi"
- }
- limits = {
- cpu = "500m"
- memory = "512Mi"
- }
+ volume_mount {
+ name = "goldmane-key-pair"
+ mount_path = "/goldmane-key-pair"
+ read_only = true
}
}
- }
- // Disable default stack for the netobserv instance
- loki = {
- enable = false
- }
- prometheus = {
- querier = {
- enable = false
+ volume {
+ name = "proto-file"
+ config_map {
+ name = kubernetes_config_map.goldmane_api_proto.metadata[0].name
+ }
}
- }
- consolePlugin = {
- enable = false
- }
- // Enrichment settings
- processor = {
- logTypes = "Flows"
- metrics = {
- // Disable agent-side metrics generation to save CPU
- disableAlerts = ["NetObservLokiError", "NetObservNoFlows"]
+ volume {
+ name = "goldmane-ca-bundle"
+ config_map {
+ name = "goldmane-ca-bundle"
+ }
}
- }
- // Pushing metrics to the OTel Collector
- exporters = [
- {
- type = "OpenTelemetry"
- openTelemetry = {
- targetHost = "otel-collector.${kubernetes_namespace.namespace.metadata[0].name}.svc.cluster.local"
- targetPort = 4317
- protocol = "grpc"
-
- logs = {
- enable = true
- pushTimeInterval = "20s"
- expiryTime = "2m"
- }
-
- metrics = {
- enable = true
- }
- tls = {
- enable = false
- insecureSkipVerify = true
- }
+ volume {
+ name = "goldmane-key-pair"
+ secret {
+ secret_name = "goldmane-key-pair"
}
}
- ]
+ }
}
}
-
- depends_on = [ helm_release.otel_collector ]
}
diff --git a/modules/observability/otel-collector.tf b/modules/observability/otel-collector.tf
index 5e9880a..6fc12b9 100644
--- a/modules/observability/otel-collector.tf
+++ b/modules/observability/otel-collector.tf
@@ -68,7 +68,7 @@ resource "helm_release" "otel_collector" {
}
// Scrape Node CPU/RAM/Disk
hostMetrics = {
- enabled = true
+ enabled = false
}
// Scrape Pod CPU/RAM (Kubelet)
kubeletMetrics = {
@@ -85,6 +85,17 @@ resource "helm_release" "otel_collector" {
// Custom Configuration for receivers
config = {
receivers = {
+ // Custom Host Metrics receiver configuration
+ hostmetrics = {
+ collection_interval = "10s"
+ scrapers = {
+ cpu = {}
+ memory = {}
+ disk = {}
+ network = {}
+ load = {}
+ }
+ }
// OTLP Endpoints to send stuff to this collector
otlp = {
protocols = {
@@ -206,17 +217,6 @@ resource "helm_release" "otel_collector" {
limit_mib = 400
spike_limit_mib = 100
}
- // Tag Netobserv logs appropriately
- "resource/netobserv" = {
- attributes = [
- {
- key = "log.source"
- value = "netobserv"
- action = "insert"
- }
- ]
- }
-
transform = {
// If a metric comes in missing its namespace or pod label,
// look at the underlying server/container it came from.
@@ -276,11 +276,6 @@ resource "helm_release" "otel_collector" {
processors = ["memory_limiter", "batch"]
exporters = ["debug"]
}
- "logs/netobserv" = {
- receivers = ["otlp"]
- processors = ["memory_limiter", "resource/netobserv", "batch"]
- exporters = ["otlphttp"]
- }
}
}
}
diff --git a/modules/observability/proto/api.proto b/modules/observability/proto/api.proto
new file mode 100644
index 0000000..36917fb
--- /dev/null
+++ b/modules/observability/proto/api.proto
@@ -0,0 +1,570 @@
+syntax = "proto3";
+
+package goldmane;
+
+option go_package = "./proto";
+
+// Flows provides APIs for querying aggregated Flow data.
+//
+// The returned Flows will be aggregated across cluster nodes, as well as the specified aggregation
+// time interval.
+service Flows {
+ // List is an API call to query for one or more Flows.
+ rpc List(FlowListRequest) returns (FlowListResult);
+
+ // Stream is an API call to return a long running stream of new Flows as they are generated.
+ rpc Stream(FlowStreamRequest) returns (stream FlowResult);
+
+ // FilterHints can be used to discover available filter criteria, such as
+ // Namespaces and source / destination names. It allows progressive filtering of criteria based on
+ // other filters. i.e., return the flow destinations given a source namespace.
+ // Note that this API provides hints to the UI based on past flows and other values may be valid.
+ rpc FilterHints(FilterHintsRequest) returns (FilterHintsResult);
+}
+
+// FlowListRequest defines a message to request a particular selection of aggregated Flow objects.
+message FlowListRequest {
+ // StartTimeGt specifies the beginning of a time window with which to filter Flows. Flows
+ // will be returned only if their start time is greater than or equal to the given value.
+ //
+ // - A value of zero indicates the oldest start time available by the server.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_gte = 1;
+
+ // StartTimeLt specifies the end of a time window with which to filter flows. Flows will
+ // be returned only if their start time occurs before the requested time.
+ //
+ // - A value of zero means "now", as determined by the server at the time of request.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_lt = 2;
+
+ // Page specifies the page to return. It requires that PageSize is also specified in order
+ // to determine page boundaries. Note that pages may change over time as new flow data is collected or expired.
+ // Querying the same page at different points in time may return different results.
+ int64 page = 3;
+
+ // PageSize configures the maximum number of results to return as part of this query.
+ int64 page_size = 4;
+
+ // SortBy configures how to sort the results of this query. By default flows are sorted by start time.
+ // The returned list is sorted by each sort option, in order, using the next sort option in the list as a tie-breaker.
+ // Note: At the moment, only a single sort option is supported.
+ repeated SortOption sort_by = 5;
+
+ // Filter allows specification of one or more criteria on which to filter the returned Flows.
+ Filter filter = 6;
+
+ // AggregationInterval is the width of the time window in seconds across which to aggregate when generating
+ // Flows to return. This must be a multiple of 15.
+ int64 aggregation_interval = 7;
+}
+
+// FlowListResult is a message containing a list of FlowResults and ListMetadata.
+message FlowListResult {
+ // Meta specifies metadata about the returned flows.
+ ListMetadata meta = 1;
+
+ // Flows is a list of FlowResult objects.
+ repeated FlowResult flows = 2;
+}
+
+// FlowStreamRequest defines a message to request a stream of aggregated Flows.
+message FlowStreamRequest {
+ // StartTimeGt specifies the beginning of a time window from which to stream Flows. Flows
+ // will be streamed only if their start time is greater than or equal to the given value.
+ //
+ // - A value of zero means "now", as determined by the server at the time of request.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_gte = 1;
+
+ // Filter allows specification of one or more criteria on which to filter the returned Flows.
+ Filter filter = 2;
+
+ // AggregationInterval defines both the frequency of streamed updates for each Flow, and the amount of time that FlowResult covers.
+ // It must always be 15s.
+ //
+ // Every AggregationInterval the server must send a FlowResult containing the aggregated data for that Flow from a
+ // time interval of width AggregationInterval.
+ //
+ // For a Flow that has continuous traffic, the server should send updates covering the range
+ // [now-2*AggregationInterval, now-AggregationInterval] so that the data is reasonably likely to be complete.
+ int64 aggregation_interval = 3;
+}
+
+message FilterHintsRequest {
+ // Type is type of Filter to query.
+ FilterType type = 1;
+
+ // Filter is a set of filter criteria used to narrow down returned results.
+ Filter filter = 2;
+
+ // StartTimeGt specifies the beginning of a time window with which to filter (inclusive).
+ //
+ // - A value of zero indicates the oldest start time available by the server.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_gte = 3;
+
+ // StartTimeLt specifies the end of a time window with which to filter.
+ //
+ // - A value of zero means "now", as determined by the server at the time of request.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_lt = 4;
+
+ // Page specifies the page number to return. It requires that PageSize is also specified in order
+ // to determine page boundaries. Note that pages may change over time as new flow data is collected or expired.
+ // Querying the same page at different points in time may return different results.
+ int64 page = 5;
+
+ // PageSize configures the maximum number of results to return as part of this query.
+ int64 page_size = 6;
+}
+
+message FilterHintsResult {
+ // ListMetadata specifies list information about the flows returned.
+ ListMetadata meta = 1;
+
+ // FilterHint contains the values that flows can be filtered on.
+ repeated FilterHint hints = 2;
+}
+
+// ListMetadata contains information about a returned list of items, such as pagination information (total number of pages
+// and total number of results).
+message ListMetadata {
+ // totalPages is the total number of pages that exist given that a pageSize was specified.
+ int64 totalPages = 1;
+
+ // TotalResults are the total number of results that would have been returned if no pagination was specified.
+ int64 totalResults = 2;
+}
+
+message FilterHint {
+ string value = 1;
+}
+
+// FilterType specifies which fields on the underlying Flow data to collect.
+enum FilterType {
+ FilterTypeUnspecified = 0;
+ FilterTypeDestName = 1;
+ FilterTypeSourceName = 2;
+ FilterTypeDestNamespace = 3;
+ FilterTypeSourceNamespace = 4;
+ FilterTypePolicyTier = 5;
+ FilterTypePolicyName = 6;
+ FilterTypePolicyKind = 7;
+ FilterTypePolicyNamespace = 8;
+}
+
+// FlowResult wraps a Flow object with additional metadata.
+message FlowResult {
+ // ID is an opaque integer value ID that can be used to identify a Flow, and is 1:1 with the FlowKey.
+ // Note that this ID is not valid across server restarts. Its primary use-case is for correlating FlowResult
+ // updates from a Stream request.
+ int64 id = 1;
+
+ // The Flow object itself.
+ Flow flow = 2;
+}
+
+enum Action {
+ ActionUnspecified = 0;
+ Allow = 1;
+ Deny = 2;
+ Pass = 3;
+}
+
+// Filter defines criteria for selecting a set of Flows based on their parameters.
+message Filter {
+ // SourceNames allows filtering on the source name field. Combined using logical OR.
+ repeated StringMatch source_names = 1;
+
+ // SourceNamespaces filters on the source namespace field. Combined using logical OR.
+ repeated StringMatch source_namespaces = 2;
+
+ // DestNames filters on the destination name field. Combined using logical OR.
+ repeated StringMatch dest_names = 3;
+
+ // DestNamespaces filters on the destination namespace field. Combined using logical OR.
+ repeated StringMatch dest_namespaces = 4;
+
+ // Protocols filters on the protocol field. Combined using logical OR.
+ repeated StringMatch protocols = 5;
+
+ // DestPorts filters on the port field. Combined using logical OR.
+ repeated PortMatch dest_ports = 6;
+
+ // Actions filters on the action field. Combined using logical OR.
+ repeated Action actions = 7;
+
+ // Policies matches on policy fields. Combined using logical OR.
+ repeated PolicyMatch policies = 8;
+
+ // Reporter filters on the reporter field.
+ Reporter reporter = 9;
+
+ // Pending/Staged Actions filters on the action field. Combined using logical OR.
+ repeated Action pending_actions = 10;
+}
+
+enum MatchType {
+ // Match the value exactly.
+ Exact = 0;
+
+ // Use fuzzy matching on the value.
+ Fuzzy = 1;
+}
+
+message StringMatch {
+ string value = 1;
+ MatchType type = 2;
+}
+
+message PortMatch {
+ int64 port = 1;
+}
+
+message SortOption {
+ // SortBy declares the field by which to sort.
+ SortBy sort_by = 1;
+}
+
+// PolicyMatch defines criteria for matching one or more policy rules within a Flow's
+// policy trace.
+message PolicyMatch {
+ PolicyKind kind = 1;
+ string tier = 2;
+ string namespace = 3;
+ string name = 4;
+ Action action = 5;
+}
+
+enum PolicyKind {
+ // Unspecified
+ KindUnspecified = 0;
+
+ // Calico policy types.
+ CalicoNetworkPolicy = 1;
+ GlobalNetworkPolicy = 2;
+ StagedNetworkPolicy = 3;
+ StagedGlobalNetworkPolicy = 4;
+ StagedKubernetesNetworkPolicy = 5;
+
+ // Native Kubernetes types.
+ NetworkPolicy = 6;
+ ClusterNetworkPolicy = 7;
+
+ // Calico Profiles.
+ Profile = 9;
+ EndOfTier = 10;
+}
+
+enum SortBy {
+ Time = 0;
+ DestName = 1;
+ DestNamespace = 2;
+ DestType = 3;
+ SourceName = 4;
+ SourceNamespace = 5;
+ SourceType = 6;
+}
+
+// FlowCollector provides APIs capable of receiving streams of Flow data from cluster nodes.
+service FlowCollector {
+ // Connect receives a connection that may stream one or more FlowUpdates. A FlowReceipt is returned
+ // to the client by the server after each FlowUpdate.
+ //
+ // Following a connection or reconnection to the server, clients should duplicates of previously transmitted FlowsUpdates
+ // in order to allow the server to rebuild its cache, as well as any new FlowUpdates that have not previously been transmitted.
+ // The server is responsible for deduplicating where needed.
+ rpc Connect(stream FlowUpdate) returns (stream FlowReceipt);
+}
+
+// FlowReceipt is a response from the server to a client after publishing a Flow.
+message FlowReceipt {}
+
+// FlowUpdate wraps a Flow with additional metadata.
+message FlowUpdate {
+ // Flow contains the actual flow being sent.
+ Flow flow = 1;
+}
+
+enum EndpointType {
+ // For queries, unspecified means "do not filter on this field".
+ EndpointTypeUnspecified = 0;
+
+ // WorkloadEndpoint represents an application endpoint with its own network identity. For example,
+ // a Kubernetes Pod.
+ WorkloadEndpoint = 1;
+
+ // HostEndpoint represents a host machine.
+ HostEndpoint = 2;
+
+ // NetworkSet represents an address from within a configured projectcalico.org/v3 NetworkSet or
+ // GlobalNetworkSet.
+ NetworkSet = 3;
+
+ // Network represents an endpoint on a public or private network not known by Calico. For example,
+ // traffic from the public internet or private LAN not covered by a NetworkSet.
+ Network = 4;
+}
+
+enum Reporter {
+ // For queries, unspecified means "do not filter on this field".
+ ReporterUnspecified = 0;
+ Src = 1;
+ Dst = 2;
+}
+
+// FlowKey includes the identifying fields for a Flow.
+// - Source: Name, namespace, type, and labels.
+// - Destination: Name, namespace, type, labels and port
+// - Action taken on the connection.
+// - Reporter (i.e., measured at source or destination).
+// - Protocol of the connection (TCP, UDP, etc.).
+message FlowKey {
+ // SourceName is the name of the source for this Flow.
+ // The value is contextualized by the source_type field:
+ // - For WorkloadEndpoint, this represents a set of pods that share a GenerateName.
+ // - For HostEndpoint, this is the host endpoint name.
+ // - For NetworkSet, it is the name of the network set.
+ // - For Network, this is either "pub" for a public network, or "pvt" for a private network.
+ string source_name = 1;
+
+ // SourceNamespace is the namespace of the source pods for this flow.
+ string source_namespace = 2;
+
+ // SourceType is the type of the source, used to contextualize the source
+ // name and namespace fields.
+ EndpointType source_type = 3;
+
+ // DestName is the name of the destination for this Flow.
+ // The value is contextualized by the source_type field:
+ // - For WorkloadEndpoint, this represents a set of pods that share a GenerateName.
+ // - For HostEndpoint, this is the host endpoint name.
+ // - For NetworkSet, it is the name of the network set.
+ // - For Network, this is either "pub" for a public network, or "pvt" for a private network.
+ string dest_name = 4;
+
+ // DestNamespace is the namespace of the destination pods for this flow.
+ string dest_namespace = 5;
+
+ // DestType is the type of the destination, used to contextualize the dest
+ // name and namespace fields.
+ EndpointType dest_type = 6;
+
+ // DestPort is the destination port on the specified protocol accessed by this flow.
+ int64 dest_port = 7;
+
+ // DestServiceName is the name of the destination service, if any.
+ string dest_service_name = 8;
+
+ // DestServiceNamespace is the namespace of the destination service, if any.
+ string dest_service_namespace = 9;
+
+ // DestServicePortName is the name of the port on the destination service, if any.
+ string dest_service_port_name = 10;
+
+ // DestServicePort is the port number on the destination service.
+ int64 dest_service_port = 11;
+
+ // Proto is the L4 protocol for this flow. For example, TCP, UDP, SCTP, ICMP.
+ string proto = 12;
+
+ // Reporter is either "src" or "dst", depending on whether this flow was generated
+ // at the initiating or terminating end of the connection attempt.
+ Reporter reporter = 13;
+
+ // Action is the ultimate action taken on the flow.
+ Action action = 14;
+
+ // Policies includes an entry for each policy rule that took an action on the connections
+ // aggregated into this flow.
+ PolicyTrace policies = 15;
+}
+
+// Flow is a message representing statistics gathered about connections that share common fields,
+// aggregated across either time, nodes, or both.
+message Flow {
+ // Key includes the identifying fields for this flow.
+ FlowKey Key = 1;
+
+ // StartTime is the start time for this flow. It is represented as the number of
+ // seconds since the UNIX epoch.
+ int64 start_time = 2;
+
+ // EndTime is the end time for this flow. It is always at least one aggregation
+ // interval after the start time.
+ int64 end_time = 3;
+
+ // SourceLabels contains the intersection of labels that appear on all source
+ // pods that contributed to this flow.
+ repeated string source_labels = 4;
+
+ // SourceLabels contains the intersection of labels that appear on all destination
+ // pods that contributed to this flow.
+ repeated string dest_labels = 5;
+
+ // Statistics.
+ int64 packets_in = 6;
+ int64 packets_out = 7;
+ int64 bytes_in = 8;
+ int64 bytes_out = 9;
+
+ // NumConnectionsStarted tracks the total number of new connections recorded for this Flow. It counts each
+ // connection attempt that matches the FlowKey that was made between this Flow's StartTime and EndTime.
+ int64 num_connections_started = 10;
+
+ // NumConnectionsCompleted tracks the total number of completed TCP connections recorded for this Flow. It counts each
+ // connection that matches the FlowKey that was completed between this Flow's StartTime and EndTime.
+ int64 num_connections_completed = 11;
+
+ // NumConnectionsLive tracks the total number of still active connections recorded for this Flow. It counts each
+ // connection that matches the FlowKey that was active at this Flow's EndTime.
+ int64 num_connections_live = 12;
+}
+
+message PolicyTrace {
+ // EnforcedPolicies shows the active dataplane policy rules traversed by this Flow.
+ repeated PolicyHit enforced_policies = 1;
+
+ // PendingPolicies shows the expected policy rules traversed by this Flow when including
+ // staged policies.
+ repeated PolicyHit pending_policies = 2;
+}
+
+// PolicyHit represents a policy rule that was traversed by this flow. It can be either an enforced policy hit
+// from the dataplane, or a staged policy hit that is not yet active.
+message PolicyHit {
+ // Kind corresponds to the resource Kind for the policy.
+ PolicyKind kind = 1;
+
+ // Namespace is the Kubernetes namespace of the Policy, if namespaced. It is empty for global /
+ // cluster-scoped policy kinds.
+ string namespace = 2;
+
+ // Name is the Name of the policy object.
+ string name = 3;
+
+ // Tier is the Tier of the policy object.
+ string tier = 4;
+
+ // Action is the action taken by this policy rule.
+ Action action = 5;
+
+ // PolicyIndex is the order of the Policy among all policies traversed.
+ int64 policy_index = 6;
+
+ // RuleIndex is the order of the Rule within the Policy rules.
+ int64 rule_index = 7;
+
+ // Trigger indicates the first policy that selected this Flow and thus triggered the tier's
+ // end-of-tier action. This is only valid for kind=EndOfTier, and is nil otherwise.
+ PolicyHit trigger = 8;
+}
+
+// Statistics provides APIs for retrieving Flow statistics.
+service Statistics {
+ // List returns statistics data for the given request. One StatisticsResult will be returned for
+ // each matching PolicyHit and direction over the timeframe, containing time-series data covering the
+ // provided time range.
+ rpc List(StatisticsRequest) returns (stream StatisticsResult);
+}
+
+// StatisticType represents the types of data available over the Statistics API endpoint.
+enum StatisticType {
+ PacketCount = 0;
+ ByteCount = 1;
+ LiveConnectionCount = 2;
+}
+
+enum StatisticsGroupBy {
+ // Policy configures statistics groupings on a per-policy basis.
+ Policy = 0;
+
+ // PolicyRule configures statistics groupings on a per-policy-rule basis.
+ PolicyRule = 1;
+}
+
+message StatisticsRequest {
+ // The start time from which to collect statistics (inclusive).
+ //
+ // - A value of zero indicates the oldest start time available by the server.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_gte = 1;
+
+ // The end time indicates the end of the windows from which to collect statistics.
+ //
+ // - A value of zero means "now", as determined by the server at the time of request.
+ // - A value greater than zero indicates an absolute time in seconds since the Unix epoch.
+ // - A value less than zero indicates a relative number of seconds from "now", as determined by the server.
+ int64 start_time_lt = 2;
+
+ // Type is the type of statistic to return. e.g., packets, bytes, etc.
+ StatisticType type = 3;
+
+ // Configure statistics aggregation.
+ // - Policy: each StatisticsResult will contain statistics for a particular policy.
+ // - PolicyRule: each StatisticsResult will contain statistics for a particular policy rule.
+ // - Any: return both per-Policy and per-PolicyRule results.
+ StatisticsGroupBy group_by = 4;
+
+ // Optionally configure fields to filter results. If provided, any policies not matching the PolicyMatch
+ // will be omitted from the results.
+ PolicyMatch policy_match = 5;
+
+ // TimeSeries configures whether or not to return time-series data in the response. If true,
+ // the response will include multiple datapoints over the given time window. If false, data
+ // across the time window will be aggregated into a single data point.
+ bool time_series = 6;
+}
+
+enum RuleDirection {
+ Any = 0;
+ Ingress = 1;
+ Egress = 2;
+}
+
+message StatisticsResult {
+ // Policy identifies the policy / rule for which this data applies. Its meaning is contextualized
+ // by the GroupBy field.
+ //
+ // - StatisticsGroupBy_Policy: this field represents the specific Policy, and statistics are aggregated across all
+ // rules within that policy. Rule identifiers (Action, RuleID) will be omitted.
+ //
+ // - StatisticsGroupBy_PolicyRule: this field identifies a specific rule within a Policy, and statistics are scoped to
+ // that particular rule.
+ PolicyHit policy = 1;
+
+ // For statistics results targeting a specific policy rule, the direction
+ // contextualizes the rule ID as either an ingress or egress rule.
+ //
+ // For statistics results grouped by policy, both ingress and egress statistics will be included.
+ RuleDirection direction = 2;
+
+ // GroupBy indicates whether the statistics in this result are aggregated for a policy, or for
+ // a specific rule within that policy.
+ StatisticsGroupBy group_by = 3;
+
+ // Type indicates the type of data carried in this result. e.g., PacketCount vs ByteCount.
+ StatisticType type = 4;
+
+ // AllowedIn contains the count of the requested statistic that was allowed for ingress flows.
+ // The semantic meaning (e.g., packets vs bytes) is indicated by the Type field.
+ repeated int64 allowed_in = 5;
+ repeated int64 allowed_out = 6;
+
+ repeated int64 denied_in = 7;
+ repeated int64 denied_out = 8;
+
+ repeated int64 passed_in = 9;
+ repeated int64 passed_out = 10;
+
+ // X is the x axis of the data for time-series data. i.e., the timestamp. For non-timeseries data,
+ // this will be nil.
+ repeated int64 x = 11;
+}