diff --git a/Scripts/check-cortexflow-components.sh b/Scripts/check-cortexflow-components.sh deleted file mode 100755 index 01232cbe..00000000 --- a/Scripts/check-cortexflow-components.sh +++ /dev/null @@ -1,21 +0,0 @@ -echo "Welcome to CortexFlow tools" -echo "Checking CortexFlow components" - -echo "Checking if CortexFlow namespace exists..." -if kubectl get namespace cortexflow >/dev/null 2>&1; then - echo "✅ Namespace 'cortexflow' exists." - - sleep 1 - echo "Checking pods..." - kubectl get pods -n cortexflow - - echo - - sleep 1 - echo "Checking services..." - kubectl get svc -n cortexflow - echo -else - echo "❌ Namespace 'cortexflow' does not exist." - exit 1 -fi diff --git a/Scripts/check-dev-requisites.sh b/Scripts/check-dev-requisites.sh deleted file mode 100755 index c775754c..00000000 --- a/Scripts/check-dev-requisites.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -echo "Welcome to the CortexFlow tools" -echo "Checking pre-requisites for developers" -echo - -echo "Checking Docker installation..." -if which docker >/dev/null 2>&1; then - echo "✅ Docker is installed." -else - echo "❌ Docker is NOT installed." -fi -sleep 1 - -echo -echo "Checking Minikube installation..." -if which minikube >/dev/null 2>&1; then - echo "✅ Minikube is installed." -else - echo "❌ Minikube is NOT installed." -fi -sleep 1 - -echo - -echo "Checking Node.js installation..." -if which node >/dev/null 2>&1; then - echo "✅ Node.js is installed." -else - echo "Node.js is NOT installed." -fi -sleep 1 - -echo - -echo "Checking npm installation..." -if which npm >/dev/null 2>&1; then - echo "✅ npm is installed." -else - echo "❌ npm is NOT installed." -fi diff --git a/Scripts/install-debugging-tools.sh b/Scripts/install-debugging-tools.sh deleted file mode 100755 index 9e3ed017..00000000 --- a/Scripts/install-debugging-tools.sh +++ /dev/null @@ -1,45 +0,0 @@ -if ! kubectl exec -n cortexflow $1 -c $2 -- which netstat >/dev/null 2>&1; then - echo "🔨 installing netstat" - kubectl exec -n cortexflow $1 -c $2 -- apt update - kubectl exec -n cortexflow $1 -c $2 -- apt install -y net-tools -else - echo "✅ Netstat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nc >/dev/null 2>&1; then - echo "🔨 installing netcat" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y netcat-traditional -else - echo "✅ Netcat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which curl >/dev/null 2>&1; then - echo "🔨 installing curl" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y curl -else - echo "✅ Curl is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nslookup >/dev/null 2>&1; then - echo "🔨 installing dnsutils" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y dnsutils -else - echo "✅ Nslookup is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which tcpdump >/dev/null 2>&1; then - echo "🔨 installing tcpdump" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y tcpdump -else - echo "✅ tcpdump is installed." -fi - -sleep 1.5 diff --git a/Scripts/test-connections.sh b/Scripts/test-connections.sh deleted file mode 100755 index 95dcc946..00000000 --- a/Scripts/test-connections.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -proxy_pod_name=$(kubectl get pods -n cortexflow --no-headers -o custom-columns=":metadata.name" | grep cortexflow-proxy) -proxy_ip=$(kubectl get -o template service/proxy-service -n cortexflow --template='{{.spec.clusterIP}}') -proxy_udp_port=5053 -proxy_tcp_port=5054 -proxy_metrics_port=9090 -proxy_container=$(kubectl get pod $proxy_pod_name -n cortexflow -o jsonpath='{.spec.containers[*].name}') - -echo "🧑🏻‍🔬 Checking cortexflow proxy inside the proxy pod: $proxy_pod_name" - -sleep 1.5 -echo "🔨 checking env variables" -kubectl exec -n cortexflow $proxy_pod_name -- env - -sleep 1.5 - -./install-debugging-tools.sh $proxy_pod_name $proxy_container -echo -./test-proxy-ports.sh $proxy_pod_name $proxy_metrics_port -echo -sleep 1.5 -echo "🔨 Sending a test package with netcat from proxy pod -> proxy pod" -kubectl exec -n cortexflow $proxy_pod_name -- sh -c echo b"Hi CortexFlow" | nc -u -w5 -v 127.0.0.1 $proxy_udp_port - -echo -sleep 1.5 -echo "🔨 Testing the DNS resolution manually with nslookup" -kubectl exec -n cortexflow $proxy_pod_name -- nslookup proxy-service.cortexflow.svc.cluster.local - -sleep 1.5 -echo -./test-proxy-endpoints.sh $proxy_pod_name -echo -echo -echo "🧑🏻‍🔬 Testing outside the proxy pod using a test pod" -echo "🔨 Testing using a temporary test pod and nslookup" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- nslookup proxy-service.cortexflow.svc.cluster.local - -echo -sleep 1.5 -echo "🔨 Sending a test message using netcat and a temporary test pod" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi CortexFlow | nc -u -w 3 -v $proxy_ip $proxy_udp_port" - -echo -sleep 1.5 -echo "🔨 Testing the tcp port" -echo "🔨 Sending a test message using netcat and a temporary test pod " -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi TCP | nc -w 3 -v $proxy_ip $proxy_tcp_port" diff --git a/Scripts/test-proxy-endpoints.sh b/Scripts/test-proxy-endpoints.sh deleted file mode 100755 index c89e52ef..00000000 --- a/Scripts/test-proxy-endpoints.sh +++ /dev/null @@ -1,45 +0,0 @@ -echo "🔨 Testing curl command" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking / endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/ -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /health endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/health) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /health endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/health -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /metrics endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/metrics) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /metrics endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/metrics -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /status endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/status) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /status endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/status -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi diff --git a/Scripts/test-proxy-ports.sh b/Scripts/test-proxy-ports.sh deleted file mode 100755 index 33d658dd..00000000 --- a/Scripts/test-proxy-ports.sh +++ /dev/null @@ -1,18 +0,0 @@ -echo "🔨 Testing network connections" -kubectl exec -n cortexflow $1 -- netstat -tulnp | grep $2 - -sleep 1.5 - -echo -echo "🔨 testing if the process is in execution" -kubectl exec -n cortexflow $1 -- ps aux | grep cortexflow-proxy - -sleep 1.5 -echo -echo "🔨 testing using netcat" -kubectl exec -n cortexflow $1 -- nc -zv proxy-service.cortexflow.svc.cluster.local $2 - -sleep 1.5 -echo -echo "🔨 Checking if the proxy is listening in the 5053 port" -kubectl exec -n cortexflow $1 -- netstat -ulnp diff --git a/Scripts/test-sidecar-advanced-tcp.sh b/Scripts/test-sidecar-advanced-tcp.sh deleted file mode 100755 index ec3fce4c..00000000 --- a/Scripts/test-sidecar-advanced-tcp.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the tcp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy 5054 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -# execute the functions in background -test_proxy_to_proxy2 & -test_proxy2_to_proxy & -test_proxy3_to_proxy2 & -test_proxy4_to_proxy2 & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' diff --git a/Scripts/test-sidecar-advanced-udp.sh b/Scripts/test-sidecar-advanced-udp.sh deleted file mode 100755 index d9c52a85..00000000 --- a/Scripts/test-sidecar-advanced-udp.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the udp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy to test-proxy2..." - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy2 to test-proxy..." - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy 5053 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy3 to test-proxy2..." - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy4 to test-proxy2..." - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -# execute the functions in background -(test_proxy_to_proxy2 &) & -(test_proxy2_to_proxy &) & -(test_proxy3_to_proxy2 &) & -(test_proxy4_to_proxy2 &) & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' diff --git a/Scripts/test-sidecar-proxy.sh b/Scripts/test-sidecar-proxy.sh deleted file mode 100755 index fcce42de..00000000 --- a/Scripts/test-sidecar-proxy.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -echo "Testing Sidecar proxy injection " - -sleep 1 -echo "Checking pods" -kubectl get pods -o wide -n cortexflow -echo -echo "Checking if the sidecar proxy is present" -kubectl get pods -n cortexflow -o json | jq '.items[].spec.containers[].name' - -echo -sleep 1 -echo "Checking open ports in test-proxy" -kubectl get pods test-proxy -o jsonpath='{.spec.containers[*].ports}' -n cortexflow -echo -kubectl get pods test-proxy2 -o jsonpath='{.spec.containers[*].ports}' -n cortexflow - -echo -echo -echo "Installing debugging tools in test-proxy: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy proxy-sidecar -echo -echo -echo "Installing debugging tools in test-proxy2: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy2 proxy-sidecar - -echo -echo -echo "Checking network connections in test-proxy pod " -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- netstat -tulnp -echo -echo "Checking network connections in test-proxy2 pod" -kubectl exec -it test-proxy2 -c proxy-sidecar -n cortexflow -- netstat -tulnp - - -echo -sleep 2 -echo "TEST 1: Checking if test-proxy can communicate with test-proxy2" -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- nc -zv test-proxy2.cortexflow.svc.cluster.local 5054 -echo - -echo - -echo "TEST 2: Checking if test-proxy can communicate with test-proxy2 (TCP)" - -# 2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w3 test-proxy2 5054 && echo "✅ Test completed" -' - -echo -sleep 2 -echo -echo "TEST 2: Sending a message from test-proxy to test-proxy2 (UDP)" - -#Start the UDP listener on test-proxy2 (MUST be before sending the message) -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup sh -c "nc -lu -p 5053 > /tmp/received_message.log" >/dev/null 2>&1 & - sleep 2 # Wait for the listener to start -' - -#2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - echo "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJtZXNzYWdlIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}" | nc -u -w3 test-proxy2 5053 && echo "✅ Test completed" -' diff --git a/cli/Cargo.lock b/cli/Cargo.lock index a2d89684..7c843ec0 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -216,18 +216,15 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" -version = "1.23.2" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" -dependencies = [ - "bytemuck_derive", -] +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "bytemuck_derive" -version = "1.10.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f154e572231cb6ba2bd1176980827e3d5dc04cc183a75dea38109fbdd672d29" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", @@ -236,9 +233,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cc" @@ -256,12 +253,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.42" @@ -278,9 +269,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", "clap_derive", @@ -288,9 +279,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ "anstream", "anstyle", @@ -359,10 +350,21 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cortexbrain-common" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5daea06747f06e000deaa52b7aceb504ddc309c061badf76e0b4b3d146ebf3a4" dependencies = [ "anyhow", + "aya", + "bytemuck", + "bytemuck_derive", + "bytes", + "k8s-openapi", + "kube", + "opentelemetry", + "opentelemetry-appender-tracing", + "opentelemetry-otlp", + "opentelemetry-semantic-conventions", + "opentelemetry-stdout", + "opentelemetry_sdk", + "tokio", "tracing", "tracing-subscriber", ] @@ -390,8 +392,6 @@ dependencies = [ [[package]] name = "cortexflow_agent_api" version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bfebbb2894a8d2edec3c4f3631952860c34706b798aa8d77ea2806ddd6fc476" dependencies = [ "anyhow", "aya", @@ -399,7 +399,6 @@ dependencies = [ "bytemuck_derive", "chrono", "cortexbrain-common", - "cortexflow_identity", "prost", "tokio", "tokio-stream", @@ -412,27 +411,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "cortexflow_identity" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5725a802e4f494b5fab4c69b1455a32dd3804b52a58c665a7d751eeae93ddfca" -dependencies = [ - "anyhow", - "aya", - "bytemuck", - "bytemuck_derive", - "bytes", - "cortexbrain-common", - "k8s-openapi", - "kube", - "libc", - "nix", - "tokio", - "tracing", - "tracing-subscriber", -] - [[package]] name = "cpufeatures" version = "0.2.17" @@ -512,6 +490,17 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -603,12 +592,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -630,6 +641,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -822,6 +834,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" dependencies = [ + "base64", "bytes", "futures-channel", "futures-core", @@ -829,7 +842,9 @@ dependencies = [ "http", "http-body", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2", "tokio", @@ -861,6 +876,108 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.11.0" @@ -871,6 +988,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1019,6 +1142,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.13" @@ -1056,15 +1185,6 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -1088,19 +1208,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "nix" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" -dependencies = [ - "bitflags", - "cfg-if", - "cfg_aliases", - "libc", - "memoffset", -] - [[package]] name = "nu-ansi-term" version = "0.50.1" @@ -1149,6 +1256,112 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "opentelemetry" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0142c63252a9e054e68a4c61a5778f7b14f576274d593f8ce883d191a099682" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.16", + "tracing", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0080f0dc1d7c786f467cd85a4e395fcab11ee852004f39a29a18ab7c25d837" +dependencies = [ + "opentelemetry", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "opentelemetry-http" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5683015d09e2df236ef005b17f6f196f0d5f6313c4fa43a7b6a53b52776e4331" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.16", + "tokio", + "tonic", + "tonic-types", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56d658ba1faf63f7b9c492cfbe6e0ec365440a16132d3270c1065f7b33f1b638" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ca2f98a0437b427b4b08f19f1caa3c44db885a202bc12cfea13d6c702243d68" + +[[package]] +name = "opentelemetry-stdout" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b1c6a247d79091f0062a5f4bd058589525cf987a8d4c169440d9c1be72f0ad" +dependencies = [ + "chrono", + "opentelemetry", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b59f80e1ac4d5ff7a2db8fb6c80badb7f0f3f858211fba08dd9aaec750894f9" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "portable-atomic", + "rand", + "thiserror 2.0.16", + "tokio", + "tokio-stream", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1288,6 +1501,30 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1309,9 +1546,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -1341,9 +1578,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", @@ -1354,9 +1591,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -1396,6 +1633,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.3", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -1445,6 +1711,37 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -1701,6 +1998,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "strsim" version = "0.11.1" @@ -1729,6 +2032,20 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "tempfile" @@ -1792,11 +2109,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", @@ -1936,6 +2263,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "tonic-types" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a875a902255423d34c1f20838ab374126db8eb41625b7947a1d54113b0b7399" +dependencies = [ + "prost", + "prost-types", + "tonic", +] + [[package]] name = "tower" version = "0.5.2" @@ -1957,20 +2295,23 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "base64", "bitflags", "bytes", + "futures-util", "http", "http-body", "mime", "pin-project-lite", + "tower", "tower-layer", "tower-service", "tracing", + "url", ] [[package]] @@ -1987,9 +2328,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -1999,9 +2340,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -2010,9 +2351,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -2089,6 +2430,24 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2158,6 +2517,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.104" @@ -2190,6 +2562,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "windows-core" version = "0.62.1" @@ -2426,8 +2808,111 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2a43cb9c..a14b5273 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -11,18 +11,18 @@ license = "Apache-2.0" readme = "../README.md" [dependencies] -clap = { version = "4.5.51", features = ["derive"] } +clap = { version = "4.5.54", features = ["derive"] } colored = "3.0.0" directories = "6.0.0" serde = { version = "1.0.219", features = ["derive"] } -tracing = "0.1.41" -tokio = {version = "1.47.0",features = ["macros",'rt-multi-thread']} +tracing = "0.1.44" +tokio = {version = "1.49.0",features = ["macros",'rt-multi-thread']} anyhow = "1.0.100" tonic = "0.14.2" tonic-reflection = "0.14.2" -prost-types = "0.14.1" -prost = "0.14.1" -cortexflow_agent_api = {version = "0.1.1",features = ["client"]} +prost-types = "0.14.3" +prost = "0.14.3" +cortexflow_agent_api = {path = "../core/api",features = ["client"]} kube = "2.0.1" k8s-openapi = {version = "0.26.0", features = ["v1_34"]} diff --git a/cli/src/errors.rs b/cli/src/errors.rs new file mode 100644 index 00000000..b813e35b --- /dev/null +++ b/cli/src/errors.rs @@ -0,0 +1,124 @@ +use colored::Colorize; +use std::{error::Error, fmt}; + +// docs: +// +// CliError enum to group all the errors +// +// Custom error definition +// +// BaseError: +// - used for general errors +// +// InstallerError: +// - used for general installation errors occured during the installation of cortexflow components. Can be used for: +// - Return downloading errors +// - Return unsuccessful file removal during installation +// +// ClientError: +// - used for Kubernetes client errors. Can be used for: +// - Return client connection errors +// +// AgentError: +// - used for cortexflow agent errors. Can be used for: +// - return errors from the reflection server +// - return unavailable agent errors (404) +// +// +// implements fmt::Display for user friendly error messages + +#[derive(Debug)] +pub enum CliError { + InstallerError { reason: String }, + ClientError(kube::Error), + AgentError(tonic_reflection::server::Error), + BaseError { reason: String }, +} +// docs: +// +// The following functions implements the trait From conversions +// +// The From Trait is used to perform a value-to-value conversion while consuming input values. +// We use that to return a single error type 'CliError' that incapsulates multiple error types + +impl From for CliError { + fn from(e: kube::Error) -> Self { + CliError::ClientError(e) + } +} +impl From for CliError { + fn from(e: anyhow::Error) -> Self { + CliError::BaseError { + reason: e.to_string(), + } + } +} +impl From for CliError { + fn from(e: prost::DecodeError) -> Self { + return CliError::AgentError(tonic_reflection::server::Error::DecodeError(e)); + } +} +impl From for CliError { + fn from(e: tonic::Status) -> Self { + return CliError::BaseError { + reason: e.to_string(), + }; + } +} + +// docs: +// +// The Trait fmt::Display is used to create a user friendly error message for the CliError type. +// This Trait automatically implements the ToString trait for the type allowing +// the usage of .to_string() method + +impl fmt::Display for CliError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CliError::InstallerError { reason } => { + write!( + f, + "{} {} {}", + "=====>".blue().bold(), + "An error occured while installing cortexflow components. Reason:" + .bold() + .red(), + reason.red().bold() + ) + } + CliError::BaseError { reason } => { + write!( + f, + "{} {} {}", + "=====>".blue().bold(), + "An error occured. Reason:" + .bold() + .red(), + reason.red().bold() + ) + } + CliError::ClientError(e) => { + // raw error looks like this + // (ErrorResponse { status: "failed", message: "Failed to connect to kubernetes client", reason: "transport error", code: 404 } + let msg = Error::source(e).unwrap(); // msg = Failed to connect to kubernetes client: transport error + write!( + f, + "{} {} {}", + "=====>".blue().bold(), + "Client Error:".bold().red(), + msg.to_string().red().bold() + ) + } + CliError::AgentError(e) => { + let msg = Error::source(e).unwrap(); + write!( + f, + "{} {} {}", + "=====>".bold().blue(), + "Agent Error:".bold().red(), + msg.to_string().bold().red() + ) + } + } + } +} diff --git a/cli/src/essential.rs b/cli/src/essential.rs index 3f433508..5ca01b9a 100644 --- a/cli/src/essential.rs +++ b/cli/src/essential.rs @@ -1,12 +1,10 @@ +use crate::errors::CliError; use std::borrow::Cow; -use std::process::Output; use std::thread; use std::time::Duration; -use std::{collections::BTreeMap, fmt, process::Command, result::Result::Ok}; +use std::{collections::BTreeMap, process::Command, result::Result::Ok}; -use anyhow::Error; use colored::Colorize; -use k8s_openapi::apimachinery::pkg::version; use kube::core::ErrorResponse; use serde::Serialize; @@ -17,97 +15,6 @@ use kube::client::Client; pub static BASE_COMMAND: &str = "kubectl"; // docs: Kubernetes base command -// docs: -// -// CliError enum to group all the errors -// -// Custom error definition -// InstallerError: -// - used for general installation errors occured during the installation of cortexflow components. Can be used for: -// - Return downloading errors -// - Return unsuccessful file removal during installation -// -// ClientError: -// - used for Kubernetes client errors. Can be used for: -// - Return client connection errors -// -// UninstallError: -// - used for general installation errors occured during the uninstall for cortexflow components. Can be used for: -// - Return components removal errors -// -// AgentError: -// - used for cortexflow agent errors. Can be used for: -// - return errors from the reflection server -// - return unavailable agent errors (404) -// -// MonitoringError: -// - used for general monitoring errors. TODO: currently under implementation -// -// implements fmt::Display for user friendly error messages - -#[derive(Debug)] -pub enum CliError { - InstallerError { reason: String }, - ClientError(kube::Error), - UninstallError { reason: String }, - AgentError(tonic_reflection::server::Error), - MonitoringError { reason: String }, -} -// docs: -// error type conversions - -impl From for CliError { - fn from(e: kube::Error) -> Self { - CliError::ClientError(e) - } -} -impl From for CliError { - fn from(e: anyhow::Error) -> Self { - CliError::MonitoringError { - reason: format!("{}", e), - } - } -} -impl From<()> for CliError { - fn from(v: ()) -> Self { - return ().into(); - } -} - -// docs: -// fmt::Display implementation for CliError type. Creates a user friendly message error message. -// TODO: implement colored messages using the colorize crate for better output display - -impl fmt::Display for CliError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CliError::InstallerError { reason } => { - write!( - f, - "An error occured while installing cortexflow components. Reason: {}", - reason - ) - } - CliError::UninstallError { reason } => { - write!( - f, - "An error occured while installing cortexflow components. Reason: {}", - reason - ) - } - CliError::MonitoringError { reason } => { - write!( - f, - "An error occured while installing cortexflow components. Reason: {}", - reason - ) - } - CliError::ClientError(e) => write!(f, "Client Error: {}", e), - CliError::AgentError(e) => write!(f, "Agent Error: {}", e), - } - } -} - #[derive(Serialize)] pub struct MetadataConfigFile { blocklist: Vec, @@ -143,7 +50,7 @@ pub async fn connect_to_client() -> Result { // // Returns an error if the command fails -pub fn update_cli() { +pub fn update_cli() -> Result<(), CliError> { let latest_version = get_latest_cfcli_version().expect("Can't get the latest version"); println!("{} {}", "=====>".blue().bold(), "Updating CortexFlow CLI"); println!( @@ -158,10 +65,12 @@ pub fn update_cli() { .expect("error"); if !output.status.success() { - eprintln!( - "Error extracting the version : {}", - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error extracting the version : {}", + String::from_utf8_lossy(&output.stderr) + ), + }); } else { // extract the cli version: let version = String::from_utf8_lossy(&output.stdout) @@ -199,10 +108,12 @@ pub fn update_cli() { .output() .expect("error"); if !update_command.status.success() { - eprintln!( - "Error updating the CLI: {} ", - String::from_utf8_lossy(&update_command.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error updating the CLI: {} ", + String::from_utf8_lossy(&update_command.stderr) + ), + }); } else { println!( "{} {}", @@ -212,21 +123,22 @@ pub fn update_cli() { } } } + Ok(()) } // docs: // // This function returns the latest version of the CLI from the crates.io registry -pub fn get_latest_cfcli_version() -> Result { +pub fn get_latest_cfcli_version() -> Result { let output = Command::new("cargo") .args(["search", "cortexflow-cli", "--limit", "1"]) .output() .expect("Error"); if !output.status.success() { - return Err(Error::msg(format!( - "An error occured during the latest version extraction" - ))); + return Err(CliError::InstallerError { + reason: "Cannot extract the latest version".to_string(), + }); } else { let command_stdout = String::from_utf8_lossy(&output.stdout); @@ -323,10 +235,10 @@ pub async fn read_configs() -> Result, CliError> { Ok(Vec::new()) //in case the key fails } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } @@ -351,7 +263,7 @@ pub async fn create_config_file(config_struct: MetadataConfigFile) -> Result<(), match connect_to_client().await { Ok(client) => { let namespace = "cortexflow"; - let configmap = "cortexbrain-client-config"; + //let configmap = "cortexbrain-client-config"; let api: Api = Api::namespaced(client, namespace); @@ -373,15 +285,20 @@ pub async fn create_config_file(config_struct: MetadataConfigFile) -> Result<(), println!("Configmap created successfully"); } Err(e) => { - eprintln!("An error occured: {}", e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to create configmap".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } @@ -422,7 +339,9 @@ pub async fn update_config_metadata(input: &str, action: &str) -> Result<(), Cli if let Some(index) = ips.iter().position(|target| target == &input.to_string()) { ips.remove(index); } else { - eprintln!("Index of element not found"); + return Err(CliError::BaseError { + reason: "Index of element not found".to_string(), + }); } // override blocklist parameters @@ -472,17 +391,18 @@ pub async fn update_configmap(config_struct: MetadataConfigFile) -> Result<(), C println!("Map updated successfully"); } Err(e) => { - eprintln!("An error occured during the patching process: {}", e); - return Err(e.into()); + return Err(CliError::BaseError { + reason: format!("An error occured during the patching process: {}", e), + }); } } Ok(()) } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } diff --git a/cli/src/install.rs b/cli/src/install.rs index a24fc227..105853c0 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -1,10 +1,11 @@ -use crate::essential::{ - BASE_COMMAND, CliError, connect_to_client, create_config_file, create_configs, -}; -use clap::{Args, Subcommand, command}; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client, create_config_file, create_configs}; +use clap::{Args, Subcommand}; use colored::Colorize; -use kube::Error; +use k8s_openapi::api::core::v1::ConfigMap; use kube::core::ErrorResponse; +use kube::{Api, Client, Error}; +use std::thread::sleep; use std::{process::Command, thread, time::Duration}; // docs: @@ -39,10 +40,12 @@ pub enum InstallCommands { about = "Deploys a simple example contained in deploy-test-pod.yaml" )] TestPods, + #[command(name = "blocklist", about = "Install or Repair blocklist configmap")] + Blocklist, } //install args -#[derive(Args, Debug, Clone)] +#[derive(Args, Debug)] pub struct InstallArgs { #[command(subcommand)] pub install_cmd: InstallCommands, @@ -144,12 +147,16 @@ async fn install_cluster_components() -> Result<(), CliError> { ); Ok(()) } - Err(e) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -190,12 +197,94 @@ async fn install_simple_example_component() -> Result<(), CliError> { ); Ok(()) } - Err(e) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } + } +} + +// docs: +pub async fn install_blocklist_configmap() -> Result<(), CliError> { + match connect_to_client().await { + Ok(client) => { + println!( + "{} {}", + "=====>".blue().bold(), + "Checking if the Blocklist configmap exists" + ); + sleep(Duration::from_secs(1)); + let blocklist_exists = check_if_blocklist_exists(client).await?; + if !blocklist_exists { + println!( + "{} {}", + "=====>".blue().bold(), + "Blocklist configmap does not exist".red().bold() + ); + sleep(Duration::from_secs(1)); + println!("{} {}", "=====>".bold().blue(), "Creating configmap"); + let metdata_configs = create_configs(); + sleep(Duration::from_secs(1)); + match create_config_file(metdata_configs).await { + Ok(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Configmap created/repaired successfully".bold().green() + ) + } + Err(e) => { + return Err(CliError::InstallerError { + reason: e.to_string(), + }); + } + } + return Ok(()); + } else { + println!() + } + + Ok(()) + } + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); + } + } +} + +// docs: +async fn check_if_blocklist_exists(client: Client) -> Result { + let namespace = "cortexflow"; + let name = "cortexbrain-client-config"; + let api: Api = Api::namespaced(client, namespace); + match api.get(name).await { + Ok(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Blocklist configmap exists".green().bold() + ); + Ok(true) + } + Err(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Blocklist configmap doesn not exists".red().bold(), + ); + Ok(false) + } } } @@ -238,7 +327,7 @@ fn install_components(components_type: &str) -> Result<(), CliError> { "Applying", component.to_string().green().bold() ); - apply_component(component); + apply_component(component)?; i = i + 1; } } else if components_type == "simple-example" { @@ -258,7 +347,7 @@ fn install_components(components_type: &str) -> Result<(), CliError> { "Applying", component.to_string().green().bold() ); - apply_component(component); + apply_component(component)?; i = i + 1; } } else { @@ -282,16 +371,18 @@ fn apply_component(file: &str) -> Result<(), CliError> { let output = Command::new(BASE_COMMAND) .args(["apply", "-f", file]) .output() - .map_err(|_| CliError::InstallerError { - reason: "Can't install component from file".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { - eprintln!( - "Error installing file: {}:\n{}", - file, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error installing file: {}:\n{}", + file, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Applied {}", file); } @@ -366,16 +457,18 @@ fn download_file(src: &str) -> Result<(), CliError> { Command::new("wget") .args([src]) .output() - .map_err(|_| CliError::InstallerError { - reason: "An error occured: component download failed".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { - eprintln!( - "Error copying file: {}.\n{}", - src, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error copying file: {}.\n{}", + src, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Copied file from {} ", src); } @@ -396,16 +489,18 @@ fn rm_file(file_to_remove: &str) -> Result<(), CliError> { let output = Command::new("rm") .args(["-f", file_to_remove]) .output() - .map_err(|_| CliError::InstallerError { - reason: "cannot remove temporary installation file".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { - eprintln!( - "Error removing file: {}:\n{}", - file_to_remove, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error removing file: {}:\n{}", + file_to_remove, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Removed file {}", file_to_remove); } diff --git a/cli/src/logs.rs b/cli/src/logs.rs index bd819ccd..102d97b6 100644 --- a/cli/src/logs.rs +++ b/cli/src/logs.rs @@ -1,8 +1,9 @@ -use std::{ str, process::Command, result::Result::Ok }; -use colored::Colorize; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; use clap::Args; -use kube::{ Error, core::ErrorResponse }; -use crate::essential::{ connect_to_client, BASE_COMMAND, CliError }; +use colored::Colorize; +use kube::{Error, core::ErrorResponse}; +use std::{process::Command, result::Result::Ok, str}; #[derive(Args, Debug, Clone)] pub struct LogsArgs { @@ -53,7 +54,7 @@ impl Component { pub async fn logs_command( service: Option, component: Option, - namespace: Option + namespace: Option, ) -> Result<(), CliError> { match connect_to_client().await { Ok(_) => { @@ -92,12 +93,18 @@ pub async fn logs_command( .collect() } (Some(service_name), None) => { - println!("Getting logs for service '{}' in namespace '{}'", service_name, ns); + println!( + "Getting logs for service '{}' in namespace '{}'", + service_name, ns + ); get_pods_for_service(&ns, &service_name).await? } (None, Some(component_str)) => { let comp = Component::from(component_str); - println!("Getting logs for component '{:?}' in namespace '{}'", comp, ns); + println!( + "Getting logs for component '{:?}' in namespace '{}'", + comp, ns + ); get_pods_for_component(&ns, &comp).await? } (None, None) => { @@ -117,8 +124,9 @@ pub async fn logs_command( for pod in pods { println!("{} Logs for pod: {:?}", "=====>".blue().bold(), pod); - match - Command::new(BASE_COMMAND).args(["logs", &pod, "-n", &ns, "--tail=50"]).output() + match Command::new(BASE_COMMAND) + .args(["logs", &pod, "-n", &ns, "--tail=50"]) + .output() { Ok(output) => { if output.status.success() { @@ -130,33 +138,34 @@ pub async fn logs_command( } } else { let stderr = str::from_utf8(&output.stderr).unwrap_or("Unknown error"); - eprintln!("Error getting logs for pod '{:?}': {}", pod, stderr); + return Err(CliError::BaseError { + reason: format!( + "Error getting logs for pod '{:?}': {}", + pod, stderr + ), + }); } } Err(err) => { - eprintln!( - "Failed to execute {} logs for pod '{:?}': {}", - BASE_COMMAND, - pod, - err - ); + return Err(CliError::BaseError { + reason: format!( + "Failed to execute {} logs for pod '{:?}': {}", + BASE_COMMAND, pod, err + ), + }); } } } Ok(()) } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -174,24 +183,22 @@ pub async fn logs_command( pub async fn check_namespace_exists(namespace: &str) -> Result { match connect_to_client().await { Ok(_) => { - let output = Command::new(BASE_COMMAND).args(["get", "namespace", namespace]).output(); + let output = Command::new(BASE_COMMAND) + .args(["get", "namespace", namespace]) + .output(); match output { Ok(output) => Ok(output.status.success()), Err(_) => Ok(false), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -232,17 +239,13 @@ pub async fn get_available_namespaces() -> Result, CliError> { _ => Ok(Vec::new()), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -259,7 +262,7 @@ pub async fn get_available_namespaces() -> Result, CliError> { async fn get_pods_for_service( namespace: &str, - service_name: &str + service_name: &str, ) -> Result, CliError> { match connect_to_client().await { Ok(_) => { @@ -290,17 +293,13 @@ async fn get_pods_for_service( _ => Ok(Vec::new()), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -318,7 +317,7 @@ async fn get_pods_for_service( async fn get_pods_for_component( namespace: &str, - component: &Component + component: &Component, ) -> Result, CliError> { match connect_to_client().await { Ok(_) => { @@ -349,17 +348,13 @@ async fn get_pods_for_component( _ => Ok(Vec::new()), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -402,17 +397,13 @@ async fn get_all_pods(namespace: &str) -> Result, CliError> { _ => Ok(Vec::new()), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 272123ff..8d543cd1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,4 +1,4 @@ -#![allow(warnings)] +mod errors; mod essential; mod install; mod logs; @@ -8,24 +8,23 @@ mod service; mod status; mod uninstall; -use clap::{ Args, Parser, Subcommand }; -use colored::Colorize; +use clap::{Args, Parser, Subcommand}; use std::result::Result::Ok; use tracing::debug; -use crate::essential::{ CliError, info, update_cli }; -use crate::install::{ InstallArgs, InstallCommands, install_cortexflow, install_simple_example }; -use crate::logs::{ LogsArgs, logs_command }; -use crate::monitoring::{ MonitorArgs, MonitorCommands, list_features, monitor_dropped_packets, monitor_identity_events, monitor_latency_metrics }; +use crate::errors::CliError; +use crate::essential::{info, update_cli}; +use crate::install::{InstallArgs, InstallCommands, install_cortexflow, install_simple_example}; +use crate::logs::{LogsArgs, logs_command}; +use crate::monitoring::{ + MonitorArgs, MonitorCommands, list_features, monitor_dropped_packets, monitor_identity_events, + monitor_latency_metrics, monitor_tracked_veth, +}; use crate::policies::{ - PoliciesArgs, - PoliciesCommands, - check_blocklist, - create_blocklist, - remove_ip, + PoliciesArgs, PoliciesCommands, check_blocklist, create_blocklist, remove_ip, }; -use crate::service::{ ServiceArgs, ServiceCommands, describe_service, list_services }; -use crate::status::{ StatusArgs, status_command }; +use crate::service::{ServiceArgs, ServiceCommands, describe_service, list_services}; +use crate::status::{StatusArgs, status_command}; use crate::uninstall::uninstall; use crate::essential::update_config_metadata; @@ -43,89 +42,104 @@ struct Cli { cmd: Option, } -#[derive(Subcommand, Debug, Clone)] +#[derive(Subcommand, Debug)] enum Commands { /* list of available commands */ - #[command(name = "install", about = "Manage installation")] Install(InstallArgs), + #[command(name = "install", about = "Manage installation")] + Install(InstallArgs), #[command(name = "uninstall", about = "Manage uninstallation")] Uninstall, #[command(name = "update", about = "Check for updates")] Update, #[command(name = "info", about = "Check core info")] Info, - #[command(name = "service", about = "Manage services")] Service(ServiceArgs), - #[command(name = "status", about = "Check components status")] Status(StatusArgs), - #[command(name = "logs", about = "Check services logs")] Logs(LogsArgs), - #[command(name = "monitoring", about = "Monitoring commands")] Monitor(MonitorArgs), - #[command(name = "policy", about = "Network Policies")] Policies(PoliciesArgs), + #[command(name = "service", about = "Manage services")] + Service(ServiceArgs), + #[command(name = "status", about = "Check components status")] + Status(StatusArgs), + #[command(name = "logs", about = "Check services logs")] + Logs(LogsArgs), + #[command(name = "monitoring", about = "Monitoring commands")] + Monitor(MonitorArgs), + #[command(name = "policy", about = "Network Policies")] + Policies(PoliciesArgs), } -#[derive(Args, Debug, Clone)] +#[derive(Args)] struct SetArgs { val: String, } - +//TODO: add command for monitoring veth interfaces async fn args_parser() -> Result<(), CliError> { let args = Cli::parse(); debug!("Arguments {:?}", args.cmd); match args.cmd { - Some(Commands::Install(installation_args)) => - match installation_args.install_cmd { - InstallCommands::All => { - install_cortexflow().await.map_err(|e| eprintln!("{}",e) )?; - } - InstallCommands::TestPods => { - install_simple_example().await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Install(installation_args)) => match installation_args.install_cmd { + InstallCommands::All => { + install_cortexflow().await?; } + InstallCommands::TestPods => { + install_simple_example().await?; + } + InstallCommands::Blocklist => { + //install or repair blocklist configmap + let _ = install::install_blocklist_configmap().await?; + } + }, Some(Commands::Uninstall) => { - uninstall().await.map_err(|e| eprintln!("{}",e) )?; + uninstall().await?; } Some(Commands::Update) => { - update_cli(); + update_cli()?; } Some(Commands::Info) => { info(); } - Some(Commands::Service(service_args)) => - match service_args.service_cmd { - ServiceCommands::List { namespace } => { - list_services(namespace).await.map_err(|e| eprintln!("{}",e) )?; - } - ServiceCommands::Describe { service_name, namespace } => { - describe_service(service_name, &namespace).await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Service(service_args)) => match service_args.service_cmd { + ServiceCommands::List { namespace } => { + list_services(namespace).await?; + } + ServiceCommands::Describe { + service_name, + namespace, + } => { + describe_service(service_name, &namespace).await?; } + }, Some(Commands::Status(status_args)) => { - status_command(status_args.output, status_args.namespace).await.map_err(|e| eprintln!("{}",e) )?; + status_command(status_args.output, status_args.namespace).await?; } Some(Commands::Logs(logs_args)) => { - logs_command(logs_args.service, logs_args.component, logs_args.namespace).await.map_err(|e| eprintln!("{}",e) )?; + logs_command(logs_args.service, logs_args.component, logs_args.namespace).await?; } - Some(Commands::Monitor(monitor_args)) => - match monitor_args.monitor_cmd { - MonitorCommands::List => { - let _ = list_features().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Connections => { - let _ = monitor_identity_events().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Latencymetrics => { - let _ = monitor_latency_metrics().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Droppedpackets => { - let _ = monitor_dropped_packets().await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Monitor(monitor_args)) => match monitor_args.monitor_cmd { + MonitorCommands::List => { + let _ = list_features().await?; + } + MonitorCommands::Connections => { + let _ = monitor_identity_events().await?; } + MonitorCommands::Latencymetrics => { + let _ = monitor_latency_metrics().await?; + } + MonitorCommands::Droppedpackets => { + let _ = monitor_dropped_packets().await?; + } + MonitorCommands::Veth => { + let _ = monitor_tracked_veth().await?; + } + }, Some(Commands::Policies(policies_args)) => { match policies_args.policy_cmd { PoliciesCommands::CheckBlocklist => { - let _ = check_blocklist().await.map_err(|e| eprintln!("{}",e) )?; + let _ = check_blocklist().await?; } PoliciesCommands::CreateBlocklist => { // pass the ip as a monitoring flag match policies_args.flags { None => { - eprintln!("{}", "Insert at least one ip to create a blocklist".red()); + return Err(CliError::BaseError { + reason: "Insert at least one ip to create a blocklist".to_string(), + }); } Some(ip) => { println!("inserted ip: {} ", ip); @@ -133,39 +147,44 @@ async fn args_parser() -> Result<(), CliError> { match create_blocklist(&ip).await { Ok(_) => { //update the config metadata - let _ = update_config_metadata(&ip, "add").await.map_err(|e| eprintln!("{}",e) )?; + let _ = update_config_metadata(&ip, "add").await?; } Err(e) => { - eprintln!("{}", e); + return Err(CliError::BaseError { + reason: e.to_string(), + }); } } } } } - PoliciesCommands::RemoveIpFromBlocklist => - match policies_args.flags { - None => { - eprintln!( - "{}", - "Insert at least one ip to remove from the blocklist".red() - ); - } - Some(ip) => { - println!("Inserted ip: {}", ip); - match remove_ip(&ip).await { - Ok(_) => { - let _ = update_config_metadata(&ip, "delete").await.map_err(|e| eprintln!("{}",e) )?; - } - Err(e) => { - eprintln!("{}", e); - } + PoliciesCommands::RemoveIpFromBlocklist => match policies_args.flags { + None => { + return Err(CliError::BaseError { + reason: "Insert at least one ip to remove from the blocklist" + .to_string(), + }); + } + Some(ip) => { + println!("Inserted ip: {}", ip); + match remove_ip(&ip).await { + Ok(_) => { + let _ = update_config_metadata(&ip, "delete").await?; + } + Err(e) => { + return Err(CliError::BaseError { + reason: e.to_string(), + }); } } } + }, } } None => { - eprintln!("CLI unknown argument. Cli arguments passed: {:?}", args.cmd); + return Err(CliError::BaseError { + reason: format!("CLI unknown argument. Cli arguments passed: {:?}", args.cmd), + }); } } Ok(()) @@ -173,5 +192,5 @@ async fn args_parser() -> Result<(), CliError> { #[tokio::main] async fn main() { - let _ = args_parser().await; + let _ = args_parser().await.map_err(|e| eprintln!("{}", e)); } diff --git a/cli/src/mod.rs b/cli/src/mod.rs index 2c91fdc5..fe7c8165 100644 --- a/cli/src/mod.rs +++ b/cli/src/mod.rs @@ -5,4 +5,5 @@ pub mod service; pub mod status; pub mod logs; pub mod monitoring; -pub mod policies; \ No newline at end of file +pub mod policies; +pub mod errors; \ No newline at end of file diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 506cc5ff..eefae1c7 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -1,19 +1,20 @@ -#![allow(warnings)] - //monitoring CLI function for identity service -use anyhow::Error; use colored::Colorize; use k8s_openapi::chrono::DateTime; +use kube::core::ErrorResponse; use prost::Message; use prost_types::FileDescriptorProto; use std::result::Result::Ok; use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; -use agent_api::requests::{get_all_features, send_active_connection_request}; +use agent_api::requests::{ + get_all_features, send_active_connection_request, send_dropped_packets_request, + send_latency_metrics_request, send_tracked_veth_request, send_veth_tracked_hashmap_req, +}; -use clap::command; -use clap::{Args, Parser, Subcommand}; +use crate::errors::CliError; +use clap::{Args, Subcommand}; //monitoring subcommands #[derive(Subcommand, Debug, Clone)] @@ -23,15 +24,23 @@ pub enum MonitorCommands { #[command( name = "connections", about = "Monitor the recent connections detected by the identity service" - )] Connections, + )] + Connections, #[command( name = "latencymetrics", about = "Monitor the latency metrics detected by the metrics service" - )] Latencymetrics, + )] + Latencymetrics, #[command( name = "droppedpackets", about = "Monitor the dropped packets metrics detected by the metrics service" - )] Droppedpackets, + )] + Droppedpackets, + #[command( + name = "veth", + about = "Monitor tracked veth interfaces from the identity service" + )] + Veth, } // cfcli monitor @@ -39,11 +48,9 @@ pub enum MonitorCommands { pub struct MonitorArgs { #[command(subcommand)] pub monitor_cmd: MonitorCommands, - //#[arg(long, short)] - //pub flags: Option, } -pub async fn list_features() -> Result<(), Error> { +pub async fn list_features() -> Result<(), CliError> { match connect_to_server_reflection().await { Ok(client) => { println!( @@ -57,9 +64,8 @@ pub async fn list_features() -> Result<(), Error> { //decoding the proto file while let Some(resp) = streaming.message().await? { - if - let Some(MessageResponse::FileDescriptorResponse(fdr)) = - resp.message_response + if let Some(MessageResponse::FileDescriptorResponse(fdr)) = + resp.message_response { println!("Available services:"); for bytes in fdr.file_descriptor_proto { @@ -77,35 +83,38 @@ pub async fn list_features() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Server Reflection".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -pub async fn monitor_identity_events() -> Result<(), Error> { - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); +pub async fn monitor_identity_events() -> Result<(), CliError> { + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); match send_active_connection_request(client).await { Ok(response) => { let resp = response.into_inner(); @@ -130,39 +139,42 @@ pub async fn monitor_identity_events() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -pub async fn monitor_latency_metrics() -> Result<(), Error> { +pub async fn monitor_latency_metrics() -> Result<(), CliError> { //function to monitor latency metrics - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); //send request to get latency metrics - match agent_api::requests::send_latency_metrics_request(client).await { + match send_latency_metrics_request(client).await { Ok(response) => { let resp = response.into_inner(); if resp.metrics.is_empty() { @@ -173,9 +185,10 @@ pub async fn monitor_latency_metrics() -> Result<(), Error> { "=====>".blue().bold(), resp.metrics.len() ); - + for (i, metric) in resp.metrics.iter().enumerate() { - let converted_timestamp= convert_timestamp_to_date(metric.timestamp_us); + let converted_timestamp = + convert_timestamp_to_date(metric.timestamp_us); println!( "{} Latency[{}] \n tgid: {} \n process_name: {} \n address_family: {} \n delta(us): {} \n src_address_v4: {} \n dst_address_v4: {} \n src_address_v6: {} \n dst_address_v6: {} \n local_port: {} \n remote_port: {} \n timestamp_us: {}\n", "=====>".blue().bold(), @@ -196,38 +209,41 @@ pub async fn monitor_latency_metrics() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -pub async fn monitor_dropped_packets() -> Result<(), Error> { +pub async fn monitor_dropped_packets() -> Result<(), CliError> { //function to monitor dropped packets metrics - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); //send request to get dropped packets metrics - match agent_api::requests::send_dropped_packets_request(client).await { + match send_dropped_packets_request(client).await { Ok(response) => { let resp = response.into_inner(); if resp.metrics.is_empty() { @@ -242,7 +258,8 @@ pub async fn monitor_dropped_packets() -> Result<(), Error> { resp.metrics.len() ); for (i, metric) in resp.metrics.iter().enumerate() { - let converted_timestamp= convert_timestamp_to_date(metric.timestamp_us); + let converted_timestamp = + convert_timestamp_to_date(metric.timestamp_us); println!( "{} DroppedPackets[{}]\n TGID: {}\n Process: {}\n SK Drops: {}\n Socket Errors: {}\n Soft Errors: {}\n Backlog Length: {}\n Write Memory Queued: {}\n Receive Buffer Size: {}\n ACK Backlog: {}\n Timestamp: {} µs", "=====>".blue().bold(), @@ -262,30 +279,69 @@ pub async fn monitor_dropped_packets() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -fn convert_timestamp_to_date(timestamp:u64)->String{ - let datetime = DateTime::from_timestamp_micros(timestamp as i64).unwrap(); - datetime.to_string() +pub async fn monitor_tracked_veth() -> Result<(), CliError> { + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); + match connect_to_client().await { + Ok(client) => match send_veth_tracked_hashmap_req(client).await { + Ok(response) => { + let veth_response = response.into_inner(); + // if veth_response.tot_monitored_veth == 0 { + // println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); + // Ok(()) + // } else { + // println!( + // "{} {} {} {} ", + // "=====>".blue().bold(), + // "Found:", + // &veth_response.tot_monitored_veth, + // "tracked veth" + // ); + for veth in veth_response.veths.iter() { + println!("{} {:?}", "=====>".blue().bold(), &veth); + } + Ok(()) + } + Err(e) => { + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); + } + }, + Err(e) => { + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); + } + } +} + +fn convert_timestamp_to_date(timestamp: u64) -> String { + DateTime::from_timestamp_micros(timestamp as i64) + .map(|dt| dt.to_string()) + .unwrap_or_else(|| "Cannot convert timestamp to date".to_string()) } diff --git a/cli/src/service.rs b/cli/src/service.rs index b66ed7e1..8cfebf10 100644 --- a/cli/src/service.rs +++ b/cli/src/service.rs @@ -1,19 +1,22 @@ -use std::{ str, process::Command }; +use clap::{Args, Subcommand}; use colored::Colorize; -use clap::{ Args, Subcommand }; -use kube::{ core::ErrorResponse, Error }; +use kube::{Error, core::ErrorResponse}; +use std::{process::Command, str}; -use crate::essential::{ BASE_COMMAND, connect_to_client, CliError }; -use crate::logs::{ get_available_namespaces, check_namespace_exists }; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; +use crate::logs::{check_namespace_exists, get_available_namespaces}; //service subcommands #[derive(Subcommand, Debug, Clone)] pub enum ServiceCommands { - #[command(name = "list", about = "Check services list")] List { + #[command(name = "list", about = "Check services list")] + List { #[arg(long)] namespace: Option, }, - #[command(name = "describe", about = "Describe service")] Describe { + #[command(name = "describe", about = "Describe service")] + Describe { service_name: String, #[arg(long)] namespace: Option, @@ -44,7 +47,12 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { Ok(_) => { let ns = namespace.unwrap_or_else(|| "cortexflow".to_string()); - println!("{} {} {}", "=====>".blue().bold(), "Listing services in namespace:", ns); + println!( + "{} {} {}", + "=====>".blue().bold(), + "Listing services in namespace:", + ns + ); // Check if namespace exists first if !check_namespace_exists(&ns).await? { @@ -72,7 +80,9 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { Ok(output) => { if !output.status.success() { let error = str::from_utf8(&output.stderr).unwrap_or("Unknown error"); - eprintln!("Error executing {}: {}", BASE_COMMAND, error); + return Err(CliError::BaseError { + reason: format!("Error executing {}: {}", BASE_COMMAND, error), + }); } let stdout = str::from_utf8(&output.stdout).unwrap_or(""); @@ -87,7 +97,10 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { } // header for Table - println!("{:<40} {:<20} {:<10} {:<10}", "NAME", "STATUS", "RESTARTS", "AGE"); + println!( + "{:<40} {:<20} {:<10} {:<10}", + "NAME", "STATUS", "RESTARTS", "AGE" + ); println!("{}", "-".repeat(80)); // Display Each Pod. @@ -108,40 +121,33 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { println!( "{:<40} {:<20} {:<10} {:<10}", - name, - full_status, - restarts, - age + name, full_status, restarts, age ); } } Ok(()) } Err(err) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to execute the kubectl command".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to execute the kubectl command".to_string(), + reason: err.to_string(), + code: 404, + }))) + }; } } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; } } } @@ -157,19 +163,19 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { // - else return an empty Vector // // -// Returns a CliError if the connection fails +// Returns a CliError if the connection failsss pub async fn describe_service( service_name: String, - namespace: &Option + namespace: &Option, ) -> Result<(), CliError> { match connect_to_client().await { Ok(_) => { match list_services(namespace.clone()).await { Ok(_) => { - //let file_path = get_config_directory().unwrap().1; - - let ns = namespace.clone().unwrap_or_else(|| "cortexflow".to_string()); + let ns = namespace + .clone() + .unwrap_or_else(|| "cortexflow".to_string()); println!( "{} {} {} {} {}", @@ -193,7 +199,10 @@ pub async fn describe_service( for available_ns in &available_namespaces { println!(" • {}", available_ns); } - println!("\nTry: cortex service describe {} --namespace ", service_name); + println!( + "\nTry: cortexflow service describe {} --namespace ", + service_name + ); } else { println!("No namespaces found in the cluster."); } @@ -207,15 +216,14 @@ pub async fn describe_service( match output { Ok(output) => { if !output.status.success() { - let error = str - ::from_utf8(&output.stderr) - .unwrap_or("Unknown error"); - eprintln!("Error executing kubectl describe: {}", error); - eprintln!( - "Make sure the pod '{}' exists in namespace '{}'", - service_name, - ns - ); + let error = + str::from_utf8(&output.stderr).unwrap_or("Unknown error"); + return Err(CliError::BaseError { + reason: format!( + "Error executing kubectl describe: {}.Make sure the pod '{}' exists in namespace '{}'", + error, service_name, ns + ), + }); } let stdout = str::from_utf8(&output.stdout).unwrap_or(""); @@ -229,33 +237,33 @@ pub async fn describe_service( Ok(()) } Err(err) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to execute the kubectl command ".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to execute the kubectl command ".to_string(), + reason: err.to_string(), + code: 404, + }))) + }; } } } - Err(e) => todo!(), + Err(e) => { + return Err(CliError::BaseError { + reason: format!("Cannot list services: {}", e), + }); + } } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; } } } diff --git a/cli/src/status.rs b/cli/src/status.rs index 2680781f..ca5d43aa 100644 --- a/cli/src/status.rs +++ b/cli/src/status.rs @@ -4,7 +4,8 @@ use clap::Args; use kube::{ Error, core::ErrorResponse }; use crate::logs::{ get_available_namespaces, check_namespace_exists }; -use crate::essential::{ BASE_COMMAND, connect_to_client, CliError }; +use crate::essential::{ BASE_COMMAND, connect_to_client }; +use crate::errors::CliError; #[derive(Debug)] pub enum OutputFormat { @@ -130,13 +131,13 @@ pub async fn status_command( } } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -185,13 +186,13 @@ async fn get_pods_status(namespace: &str) -> Result Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -240,13 +241,13 @@ async fn get_services_status(namespace: &str) -> Result Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) diff --git a/cli/src/uninstall.rs b/cli/src/uninstall.rs index 0d71cfa9..b9558ddc 100644 --- a/cli/src/uninstall.rs +++ b/cli/src/uninstall.rs @@ -1,7 +1,8 @@ use colored::Colorize; -use std::{io::stdin, process::Command, thread, time::Duration}; +use std::{io::stdin, process::Command}; -use crate::essential::{BASE_COMMAND, CliError, connect_to_client}; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; use kube::{Error, core::ErrorResponse}; //docs: @@ -38,12 +39,16 @@ pub async fn uninstall() -> Result<(), CliError> { } Ok(()) } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -85,18 +90,21 @@ async fn uninstall_all() -> Result<(), CliError> { Ok(()) } else { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error deleting cortexflow namespace. Error: {} ", stderr); - Err(CliError::InstallerError { + return Err(CliError::InstallerError { reason: format!("Failed to delete cortexflow namespace. Error: {}", stderr), - }) + }); } } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -131,44 +139,20 @@ async fn uninstall_component(component_type: &str, component: &str) -> Result<() Ok(()) } else { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error deleting {}:\n{}", component, stderr); - Err(CliError::InstallerError { + return Err(CliError::InstallerError { reason: format!("Failed to delete component '{}': {}", component, stderr), - }) + }); } } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), - } -} - -// -// -//docs: -// -// This function is deprecated and will be removed in the next version -// -// Do not include or refactor this function -#[deprecated(since = "0.1.4")] -fn rm_dir(directory_to_remove: &str) { - let output = Command::new("rm") - .args(["-rf", directory_to_remove]) - .output() - .expect("cannot remove directory"); - - if !output.status.success() { - eprintln!( - "Error removing directory: {}:\n{}", - directory_to_remove, - String::from_utf8_lossy(&output.stderr) - ); - } else { - println!("✅ Removed directory {}", directory_to_remove); + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } - - thread::sleep(Duration::from_secs(2)); -} +} \ No newline at end of file diff --git a/core/Cargo.lock b/core/Cargo.lock index 506d5dc4..6ae4f98d 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -275,9 +275,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -295,9 +295,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "camino" @@ -406,17 +406,19 @@ name = "cortexbrain-common" version = "0.1.0" dependencies = [ "anyhow", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "cortexbrain-common" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5daea06747f06e000deaa52b7aceb504ddc309c061badf76e0b4b3d146ebf3a4" -dependencies = [ - "anyhow", + "aya", + "bytemuck", + "bytemuck_derive", + "bytes", + "k8s-openapi", + "kube", + "opentelemetry", + "opentelemetry-appender-tracing", + "opentelemetry-otlp", + "opentelemetry-semantic-conventions", + "opentelemetry-stdout", + "opentelemetry_sdk", + "tokio", "tracing", "tracing-subscriber", ] @@ -430,8 +432,7 @@ dependencies = [ "bytemuck", "bytemuck_derive", "chrono", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cortexflow_identity 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "cortexbrain-common", "prost", "tokio", "tokio-stream", @@ -453,35 +454,12 @@ dependencies = [ "bytemuck", "bytemuck_derive", "bytes", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "k8s-openapi", - "kube", - "libc", - "nix", - "tokio", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "cortexflow_identity" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5725a802e4f494b5fab4c69b1455a32dd3804b52a58c665a7d751eeae93ddfca" -dependencies = [ - "anyhow", - "aya", - "bytemuck", - "bytemuck_derive", - "bytes", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cortexbrain-common", "k8s-openapi", "kube", - "libc", "nix", "tokio", "tracing", - "tracing-subscriber", ] [[package]] @@ -542,6 +520,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -639,12 +628,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -666,6 +677,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -864,6 +876,7 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ + "base64", "bytes", "futures-channel", "futures-core", @@ -871,7 +884,9 @@ dependencies = [ "http", "http-body", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2", "tokio", @@ -903,6 +918,108 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.12.0" @@ -913,6 +1030,12 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + [[package]] name = "itertools" version = "0.14.0" @@ -1045,6 +1168,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.14" @@ -1099,9 +1228,12 @@ dependencies = [ "aya-log", "bytemuck", "bytes", - "cortexbrain-common 0.1.0", + "cortexbrain-common", "libc", "nix", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "tokio", "tracing", "tracing-subscriber", @@ -1216,6 +1348,112 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "opentelemetry" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0142c63252a9e054e68a4c61a5778f7b14f576274d593f8ce883d191a099682" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", + "tracing", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0080f0dc1d7c786f467cd85a4e395fcab11ee852004f39a29a18ab7c25d837" +dependencies = [ + "opentelemetry", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "opentelemetry-http" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5683015d09e2df236ef005b17f6f196f0d5f6313c4fa43a7b6a53b52776e4331" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.17", + "tokio", + "tonic", + "tonic-types", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56d658ba1faf63f7b9c492cfbe6e0ec365440a16132d3270c1065f7b33f1b638" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ca2f98a0437b427b4b08f19f1caa3c44db885a202bc12cfea13d6c702243d68" + +[[package]] +name = "opentelemetry-stdout" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b1c6a247d79091f0062a5f4bd058589525cf987a8d4c169440d9c1be72f0ad" +dependencies = [ + "chrono", + "opentelemetry", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b59f80e1ac4d5ff7a2db8fb6c80badb7f0f3f858211fba08dd9aaec750894f9" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "portable-atomic", + "rand", + "thiserror 2.0.17", + "tokio", + "tokio-stream", +] + [[package]] name = "ordered-float" version = "2.10.1" @@ -1349,6 +1587,30 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1469,6 +1731,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1507,6 +1798,37 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -1773,6 +2095,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "subtle" version = "2.6.1" @@ -1795,6 +2123,20 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "tempfile" @@ -1858,11 +2200,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -2002,6 +2354,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "tonic-types" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a875a902255423d34c1f20838ab374126db8eb41625b7947a1d54113b0b7399" +dependencies = [ + "prost", + "prost-types", + "tonic", +] + [[package]] name = "tower" version = "0.5.2" @@ -2023,20 +2386,23 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "base64", "bitflags", "bytes", + "futures-util", "http", "http-body", "mime", "pin-project-lite", + "tower", "tower-layer", "tower-service", "tracing", + "url", ] [[package]] @@ -2155,6 +2521,24 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "valuable" version = "0.1.1" @@ -2204,6 +2588,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.105" @@ -2236,6 +2633,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "which" version = "7.0.3" @@ -2475,8 +2882,111 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/core/api/Cargo.toml b/core/api/Cargo.toml index 988ac46d..0070430d 100644 --- a/core/api/Cargo.toml +++ b/core/api/Cargo.toml @@ -3,7 +3,13 @@ name = "cortexflow_agent_api" version = "0.1.1" edition = "2024" description = "CortexFlow agent API" -authors = ["Lorenzo Tettamanti", "Pranav Verma", "Lorenzo Bradanini","Siddharth Sutar","Andrea Bozzo"] +authors = [ + "Lorenzo Tettamanti", + "Pranav Verma", + "Lorenzo Bradanini", + "Siddharth Sutar", + "Andrea Bozzo", +] documentation = "https://docs.cortexflow.org" homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" @@ -23,14 +29,18 @@ tonic = "0.14.0" tonic-prost = "0.14.0" tracing = "0.1.41" aya = "0.13.1" -cortexbrain-common = "0.1.0" +cortexbrain-common = { path = "../common", features = [ + "map-handlers", + "network-structs", + "buffer-reader", + "monitoring-structs" +] } tonic-reflection = "0.14.0" tonic-build = "0.14.0" tracing-subscriber = "0.3.19" tokio-stream = "0.1.17" -bytemuck = {version ="1.23.0"} +bytemuck = { version = "1.23.0" } bytemuck_derive = "1.10.1" -cortexflow_identity = {version = "0.1.1", features = ["enums"]} chrono = "0.4.42" [build-dependencies] diff --git a/core/api/protos/agent.proto b/core/api/protos/agent.proto index 3cd236b3..e2b1500a 100644 --- a/core/api/protos/agent.proto +++ b/core/api/protos/agent.proto @@ -68,8 +68,29 @@ message DroppedPacketsResponse { uint32 total_drops = 3; // Total drops across all connections } +// Veth Info + +message VethResponse{ + string status = 1; + repeated string veth_names = 2; // List of active veth interface names + int32 tot_monitored_veth = 3; +} +message VethEvent{ + string name = 1; // Virtual Ethernet Interface Name + uint64 state = 2; // Veth State + string dev_addr = 3; // Veth device Address + uint32 event_type = 4; // Event type + uint32 netns = 5; // Network Namespace + uint32 pid = 6; // Process ID +} + +message VethHashMapResponse{ // returns tracked veth from the tracked_veth hashmap + string status = 1; + map veths = 2; +} + +// Agent Service -//declare agent api service Agent{ // active connections endpoint rpc ActiveConnections(RequestActiveConnections) returns (ActiveConnectionResponse); @@ -81,13 +102,21 @@ service Agent{ // remove ip from blocklist endpoint rpc RmIpFromBlocklist(RmIpFromBlocklistRequest) returns (RmIpFromBlocklistResponse); - // metrics data + // metrics data endpoint rpc GetLatencyMetrics(google.protobuf.Empty) returns (LatencyMetricsResponse); - // dropped packets + // dropped packets endpoint rpc GetDroppedPacketsMetrics(google.protobuf.Empty) returns (DroppedPacketsResponse); + + // TODO: can i combine this 2 endpoints? + // active veth info endpoint + rpc GetTrackedVeth(google.protobuf.Empty) returns (VethResponse); + // get tracked veth from blocklist + rpc GetTrackedVethFromHashMap(google.protobuf.Empty) returns (VethHashMapResponse); } +// Blocklist + message AddIpToBlocklistRequest{ optional string ip = 1 ; } diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index c6f5126d..8d004b90 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -121,6 +121,48 @@ pub struct DroppedPacketsResponse { pub total_drops: u32, } #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VethResponse { + #[prost(string, tag = "1")] + pub status: ::prost::alloc::string::String, + /// List of active veth interface names + #[prost(string, repeated, tag = "2")] + pub veth_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(int32, tag = "3")] + pub tot_monitored_veth: i32, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VethEvent { + /// Virtual Ethernet Interface Name + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// Veth State + #[prost(uint64, tag = "2")] + pub state: u64, + /// Veth device Address + #[prost(string, tag = "3")] + pub dev_addr: ::prost::alloc::string::String, + /// Event type + #[prost(uint32, tag = "4")] + pub event_type: u32, + /// Network Namespace + #[prost(uint32, tag = "5")] + pub netns: u32, + /// Process ID + #[prost(uint32, tag = "6")] + pub pid: u32, +} +/// returns tracked veth from the tracked_veth hashmap +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VethHashMapResponse { + #[prost(string, tag = "1")] + pub status: ::prost::alloc::string::String, + #[prost(map = "string, string", tag = "2")] + pub veths: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct AddIpToBlocklistRequest { #[prost(string, optional, tag = "1")] pub ip: ::core::option::Option<::prost::alloc::string::String>, @@ -161,7 +203,6 @@ pub mod agent_client { )] use tonic::codegen::*; use tonic::codegen::http::Uri; - /// declare agent api #[derive(Debug, Clone)] pub struct AgentClient { inner: tonic::client::Grpc, @@ -341,7 +382,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "RmIpFromBlocklist")); self.inner.unary(req, path, codec).await } - /// metrics data + /// metrics data endpoint pub async fn get_latency_metrics( &mut self, request: impl tonic::IntoRequest<()>, @@ -366,7 +407,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetLatencyMetrics")); self.inner.unary(req, path, codec).await } - /// dropped packets + /// dropped packets endpoint pub async fn get_dropped_packets_metrics( &mut self, request: impl tonic::IntoRequest<()>, @@ -391,6 +432,54 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetDroppedPacketsMetrics")); self.inner.unary(req, path, codec).await } + /// TODO: can i combine this 2 endpoints? + /// active veth info endpoint + pub async fn get_tracked_veth( + &mut self, + request: impl tonic::IntoRequest<()>, + ) -> std::result::Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/agent.Agent/GetTrackedVeth", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("agent.Agent", "GetTrackedVeth")); + self.inner.unary(req, path, codec).await + } + /// get tracked veth from blocklist + pub async fn get_tracked_veth_from_hash_map( + &mut self, + request: impl tonic::IntoRequest<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/agent.Agent/GetTrackedVethFromHashMap", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("agent.Agent", "GetTrackedVethFromHashMap")); + self.inner.unary(req, path, codec).await + } } } /// Generated server implementations. @@ -437,7 +526,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; - /// metrics data + /// metrics data endpoint async fn get_latency_metrics( &self, request: tonic::Request<()>, @@ -445,7 +534,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; - /// dropped packets + /// dropped packets endpoint async fn get_dropped_packets_metrics( &self, request: tonic::Request<()>, @@ -453,8 +542,21 @@ pub mod agent_server { tonic::Response, tonic::Status, >; + /// TODO: can i combine this 2 endpoints? + /// active veth info endpoint + async fn get_tracked_veth( + &self, + request: tonic::Request<()>, + ) -> std::result::Result, tonic::Status>; + /// get tracked veth from blocklist + async fn get_tracked_veth_from_hash_map( + &self, + request: tonic::Request<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; } - /// declare agent api #[derive(Debug)] pub struct AgentServer { inner: Arc, @@ -787,6 +889,90 @@ pub mod agent_server { }; Box::pin(fut) } + "/agent.Agent/GetTrackedVeth" => { + #[allow(non_camel_case_types)] + struct GetTrackedVethSvc(pub Arc); + impl tonic::server::UnaryService<()> + for GetTrackedVethSvc { + type Response = super::VethResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call(&mut self, request: tonic::Request<()>) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_tracked_veth(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetTrackedVethSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/agent.Agent/GetTrackedVethFromHashMap" => { + #[allow(non_camel_case_types)] + struct GetTrackedVethFromHashMapSvc(pub Arc); + impl tonic::server::UnaryService<()> + for GetTrackedVethFromHashMapSvc { + type Response = super::VethHashMapResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call(&mut self, request: tonic::Request<()>) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_tracked_veth_from_hash_map( + &inner, + request, + ) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetTrackedVethFromHashMapSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } _ => { Box::pin(async move { let mut response = http::Response::new( diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 27641b40..ba25101b 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,19 +1,19 @@ -#![allow(warnings)] use anyhow::Context; +use anyhow::anyhow; +use aya::maps::perf::PerfEventArrayBuffer; use chrono::Local; -use cortexbrain_common::{ - formatters::{format_ipv4, format_ipv6}, -}; -use prost::bytes::BytesMut; -use std::{str::FromStr, sync::Arc}; +use cortexbrain_common::buffer_type::IpProtocols; +use cortexbrain_common::buffer_type::NetworkMetrics; +use cortexbrain_common::buffer_type::PacketLog; +use cortexbrain_common::buffer_type::TimeStampMetrics; +use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; +use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; +use std::str::FromStr; use std::sync::Mutex; use tonic::{Request, Response, Status}; use tracing::info; -use aya::{ - maps::{MapData, PerfEventArray}, - util::online_cpus, -}; +use aya::maps::MapData; use std::result::Result::Ok; use tonic::async_trait; @@ -22,131 +22,70 @@ use std::collections::HashMap; use tokio::sync::mpsc; use tokio::task; -use crate::{ - agent::{ - ConnectionEvent, DroppedPacketMetric, DroppedPacketsResponse, - LatencyMetric, LatencyMetricsResponse, - }, +use crate::agent::{ + ConnectionEvent, DroppedPacketMetric, DroppedPacketsResponse, LatencyMetric, + LatencyMetricsResponse, VethEvent, }; -use crate::structs::{NetworkMetrics, PacketLog, TimeStampMetrics}; +use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration use crate::agent::{ - agent_server::Agent, ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, - RequestActiveConnections, RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, + ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, RequestActiveConnections, + RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethHashMapResponse, VethResponse, + agent_server::Agent, }; use crate::constants::PIN_BLOCKLIST_MAP_PATH; use crate::helpers::comm_to_string; use aya::maps::Map; -use cortexbrain_common::constants::BPF_PATH; -use cortexflow_identity::enums::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; +use cortexbrain_common::buffer_type::BufferSize; +use cortexbrain_common::buffer_type::fill_buffers; + pub struct AgentApi { //* event_rx is an istance of a mpsc receiver. //* is used to receive the data from the transmitter (tx) active_connection_event_rx: Mutex, Status>>>, - active_connection_event_tx: mpsc::Sender, Status>>, + pub(crate) active_connection_event_tx: mpsc::Sender, Status>>, latency_metrics_rx: Mutex, Status>>>, - latency_metrics_tx: mpsc::Sender, Status>>, + pub(crate) latency_metrics_tx: mpsc::Sender, Status>>, dropped_packet_metrics_rx: Mutex, Status>>>, - dropped_packet_metrics_tx: mpsc::Sender, Status>>, -} - -//* Event sender trait. Takes an event from a map and send that to the mpsc channel -//* using the send_map function -#[async_trait] -pub trait EventSender: Send + Sync + 'static { - async fn send_active_connection_event(&self, event: Vec); - async fn send_active_connection_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - - let _ = tx.send(event).await; - } - - async fn send_latency_metrics_event(&self, event: Vec); - async fn send_latency_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec); - async fn send_dropped_packet_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - -} - -// send event function. takes an HashMap and send that using mpsc event_tx -#[async_trait] -impl EventSender for AgentApi { - async fn send_active_connection_event(&self, event: Vec) { - self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) - .await; - } - - async fn send_latency_metrics_event(&self, event: Vec) { - self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) - .await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec) { - self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) - .await; - } + pub(crate) dropped_packet_metrics_tx: mpsc::Sender, Status>>, + tracked_veth_rx: Mutex, Status>>>, + pub(crate) tracked_veth_tx: mpsc::Sender, Status>>, } //initialize a default trait for AgentApi. Loads a name and a bpf istance. //this trait is essential for init the Agent. impl Default for AgentApi { - //TODO:this part needs a better error handling fn default() -> Self { - // load connections maps mapdata - let active_connection_mapdata = MapData::from_pin("/sys/fs/bpf/maps/events_map") - .expect("cannot open events_map Mapdata"); - let active_connection_map = Map::PerfEventArray(active_connection_mapdata); //creates a PerfEventArray from the mapdata - - let mut active_connection_events_array = PerfEventArray::try_from(active_connection_map) - .expect("Error while initializing events array"); - - // load network metrics maps mapdata - let network_metrics_mapdata = MapData::from_pin("/sys/fs/bpf/trace_maps/net_metrics") - .expect("cannot open net_metrics Mapdata"); - let network_metrics_map = Map::PerfEventArray(network_metrics_mapdata); //creates a PerfEventArray from the mapdata - let mut network_metrics_events_array = PerfEventArray::try_from(network_metrics_map) - .expect("Error while initializing network metrics array"); - - // load time stamp events maps mapdata - let time_stamp_events_mapdata = MapData::from_pin("/sys/fs/bpf/trace_maps/time_stamp_events") - .expect("cannot open time_stamp_events Mapdata"); - let time_stamp_events_map = Map::PerfEventArray(time_stamp_events_mapdata); // - let mut time_stamp_events_array = PerfEventArray::try_from(time_stamp_events_map) - .expect("Error while initializing time stamp events array"); - - //init a mpsc channel + // + // init MapData from the kernel space + // + + // TODO: in the future will be better to not use .unwrap() + let active_connection_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/events_map").unwrap(); + let network_metrics_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/net_metrics").unwrap(); + let time_stamp_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/time_stamp_events").unwrap(); + let tracked_veth_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/veth_identity_map").unwrap(); + + // + // init a mpsc channels with TX (transmission) and RX(Receiver) components + // + let (conn_tx, conn_rx) = mpsc::channel(1024); let (lat_tx, lat_rx) = mpsc::channel(2048); let (drop_tx, drop_rx) = mpsc::channel(2048); + let (veth_tx, tracked_veth_rx) = mpsc::channel(1024); + // init the API to send the events from the agent to the CLI let api = AgentApi { active_connection_event_rx: conn_rx.into(), active_connection_event_tx: conn_tx.clone(), @@ -154,37 +93,49 @@ impl Default for AgentApi { latency_metrics_tx: lat_tx.clone(), dropped_packet_metrics_rx: Mutex::new(drop_rx), dropped_packet_metrics_tx: drop_tx.clone(), + tracked_veth_rx: Mutex::new(tracked_veth_rx), + tracked_veth_tx: veth_tx.clone(), }; + // init map manager + //let map_manager = map_manager(maps)? + + // init the buffers + let mut net_events_buffers = BufferSize::TcpEvents.set_buffer(); + let mut net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let mut ts_metrics_buffers = BufferSize::TimeMetricsEvents.set_buffer(); + let mut veth_metrics_buffers = BufferSize::VethEvents.set_buffer(); + + // init the Vec of Buffers + + let mut net_events_vec_buffer = Vec::>::new(); + let mut net_metrics_vec_buffer = Vec::>::new(); + let mut ts_events_vec_buffer = Vec::>::new(); + let mut veth_events_vec_buffer = Vec::>::new(); + + // fill the Vec of Buffers + + net_events_vec_buffer = fill_buffers(net_events_vec_buffer, active_connection_events_array); + net_metrics_vec_buffer = fill_buffers(net_metrics_vec_buffer, network_metrics_events_array); + + ts_events_vec_buffer = fill_buffers(ts_events_vec_buffer, time_stamp_events_array); + + veth_events_vec_buffer = fill_buffers(veth_events_vec_buffer, tracked_veth_events_array); + // For network metrics //spawn an event readers task::spawn(async move { - let mut net_events_buffer = Vec::new(); - //scan the cpus to read the data - - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = active_connection_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_events_buf structure"); - - let buffers = vec![BytesMut::with_capacity(4096); 8]; - net_events_buffer.push((buf, buffers)); - } - info!("Starting event listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_events_vec_buffer.iter_mut() { + match buf.read_events(&mut net_events_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_events_buffers[i]; if data.len() >= std::mem::size_of::() { let pl: PacketLog = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -198,12 +149,7 @@ impl Default for AgentApi { Ok(proto) => { info!( "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", - event_id, - proto, - src, - src_port, - dst, - dst_port + event_id, proto, src, src_port, dst, dst_port ); info!("creating vector for the aggregated data"); let mut evt = Vec::new(); @@ -234,12 +180,12 @@ impl Default for AgentApi { ); } } - } else if events.read == 0 { - info!("[Agent/API] 0 Events found"); + } else if events.lost > 0 { + info!("[Agent/API] Lost {} events", events.lost); } } Err(e) => { - eprintln!("Errore nella lettura eventi: {}", e); + eprintln!("Error while reading events: {}", e); tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; } } @@ -250,32 +196,17 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut net_metrics_buffer = Vec::new(); - - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = network_metrics_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_metrics_buf structure"); - - let buffers = vec![BytesMut::with_capacity(4096); 8]; - net_metrics_buffer.push((buf, buffers)); - } - info!("Starting network metrics listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_metrics_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_metrics_vec_buffer.iter_mut() { + match buf.read_events(&mut net_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let nm: NetworkMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -296,18 +227,18 @@ impl Default for AgentApi { if dropped_packet_metrics.sk_drops > 0 { let mut evt = Vec::new(); info!( - "Dropped Packet Metric - tgid: {}, process_name: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_wmem_queued: {}, sk_rcvbuf: {}, sk_ack_backlog: {}, timestamp_us: {}", - dropped_packet_metrics.tgid, - dropped_packet_metrics.process_name, - dropped_packet_metrics.sk_drops, - dropped_packet_metrics.sk_err, - dropped_packet_metrics.sk_err_soft, - dropped_packet_metrics.sk_backlog_len, - dropped_packet_metrics.sk_wmem_queued, - dropped_packet_metrics.sk_rcvbuf, - dropped_packet_metrics.sk_ack_backlog, - dropped_packet_metrics.timestamp_us - ); + "Dropped Packet Metric - tgid: {}, process_name: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_wmem_queued: {}, sk_rcvbuf: {}, sk_ack_backlog: {}, timestamp_us: {}", + dropped_packet_metrics.tgid, + dropped_packet_metrics.process_name, + dropped_packet_metrics.sk_drops, + dropped_packet_metrics.sk_err, + dropped_packet_metrics.sk_err_soft, + dropped_packet_metrics.sk_backlog_len, + dropped_packet_metrics.sk_wmem_queued, + dropped_packet_metrics.sk_rcvbuf, + dropped_packet_metrics.sk_ack_backlog, + dropped_packet_metrics.timestamp_us + ); evt.push(dropped_packet_metrics.clone()); let _ = drop_tx.send(Ok(evt)).await; } @@ -332,34 +263,22 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut ts_events_buffer = Vec::new(); - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = time_stamp_events_array - .open(cpu_id, None) - .expect("Error during the creation of time stamp events buf structure"); - - let buffers = vec![BytesMut::with_capacity(4096); 8]; - ts_events_buffer.push((buf, buffers)); - } - info!("Starting time stamp events listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in ts_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in ts_events_vec_buffer.iter_mut() { + match buf.read_events(&mut ts_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &ts_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let tsm: TimeStampMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; + let saddr_v6 = tsm.saddr_v6; + let daddr_v6 = tsm.daddr_v6; let latency_metric = LatencyMetric { delta_us: tsm.delta_us, timestamp_us: tsm.ts_us, @@ -370,8 +289,8 @@ impl Default for AgentApi { address_family: tsm.af as u32, src_address_v4: format_ipv4(tsm.saddr_v4), dst_address_v4: format_ipv4(tsm.daddr_v4), - src_address_v6: format_ipv6(&tsm.saddr_v6), - dst_address_v6: format_ipv6(&tsm.daddr_v6), + src_address_v6: format_ipv6(&saddr_v6), + dst_address_v6: format_ipv6(&daddr_v6), }; info!( "Latency Metric - tgid: {}, process_name: {}, delta_us: {}, timestamp_us: {}, local_port: {}, remote_port: {}, address_family: {}, src_address_v4: {}, dst_address_v4: {}, src_address_v6: {}, dst_address_v6: {}", @@ -408,6 +327,75 @@ impl Default for AgentApi { } }); + task::spawn(async move { + info!("Starting time stamp events listener"); + + //send the data through a mpsc channel + loop { + for buf in veth_events_vec_buffer.iter_mut() { + match buf.read_events(&mut veth_metrics_buffers) { + Ok(events) => { + //read the events, this function is similar to the one used in identity/helpers.rs/display_events + if events.read > 0 { + for i in 0..events.read { + info!("Found veth events {}", events.read); + let data = &veth_metrics_buffers[i]; + if data.len() >= std::mem::size_of::() { + let veth: VethLog = + unsafe { std::ptr::read(data.as_ptr() as *const _) }; + let veth_event = VethEvent { + name: String::from_utf8_lossy(unsafe { + std::slice::from_raw_parts( + veth.name.as_ptr() as *const u8, + veth.name.len() * std::mem::size_of::(), + ) + }) + .trim_end_matches('\0') + .to_string(), + state: veth.state, + dev_addr: String::from_utf8_lossy(unsafe { + std::slice::from_raw_parts( + veth.dev_addr.as_ptr() as *const u8, + veth.dev_addr.len() + * std::mem::size_of::(), + ) + }) + .trim_end_matches('\0') + .to_string(), + event_type: veth.event_type.into(), + netns: veth.netns, + pid: veth.pid, + }; + info!( + "Veth Event - name: {}, state: {}, dev_addr: {}, event_type: {}, netns: {}, pid: {}", + veth_event.name, + veth_event.state, + veth_event.dev_addr, + veth_event.event_type, + veth_event.netns, + veth_event.pid + ); + let mut evt = Vec::new(); + evt.push(veth_event.clone()); + let _ = veth_tx.send(Ok(evt)).await; + } else { + warn!( + "Received time stamp metrics data too small: {} bytes", + data.len() + ); + } + } + } + } + Err(e) => { + eprintln!("Errore nella lettura time stamp eventi: {}", e); + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + } + } + } + } + }); + api } } @@ -423,7 +411,7 @@ impl Agent for AgentApi { request: Request, ) -> Result, Status> { //read request - let req = request.into_inner(); + let _req = request.into_inner(); //create the hashmap to process events from the mpsc channel queue let mut aggregated_events: Vec = Vec::new(); @@ -470,22 +458,25 @@ impl Agent for AgentApi { } else { // add ip to the blocklist // log blocklist event - let datetime = Local::now().to_string(); + let _datetime = Local::now().to_string(); let ip = req.ip.unwrap(); //convert ip from string to [u8;4] type and insert into the bpf map let u8_4_ip = Ipv4Addr::from_str(&ip).unwrap().octets(); //TODO: convert datetime in a kernel compatible format - blocklist_map.insert(u8_4_ip, u8_4_ip, 0); + blocklist_map + .insert(u8_4_ip, u8_4_ip, 0) + .map_err(|e| anyhow!("Cannot insert address in the blocklist. Reason: {}", e)) + .unwrap(); info!("CURRENT BLOCKLIST: {:?}", blocklist_map); } - let path = std::env::var(PIN_BLOCKLIST_MAP_PATH) + let _path = std::env::var(PIN_BLOCKLIST_MAP_PATH) .context("Blocklist map path not found!") .unwrap(); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -501,7 +492,7 @@ impl Agent for AgentApi { async fn check_blocklist( &self, - request: Request<()>, + _request: Request<()>, ) -> Result, Status> { info!("Returning blocklist hashmap"); //open blocklist map @@ -516,7 +507,7 @@ impl Agent for AgentApi { let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -543,7 +534,7 @@ impl Agent for AgentApi { //remove the address let ip_to_remove = req.ip; let u8_4_ip_to_remove = Ipv4Addr::from_str(&ip_to_remove).unwrap().octets(); - blocklist_map.remove(&u8_4_ip_to_remove); + let _ = blocklist_map.remove(&u8_4_ip_to_remove); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); @@ -566,7 +557,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting latency metrics"); // Here you would typically query your data source for the latency metrics @@ -629,7 +620,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting dropped packets metrics"); let mut aggregated_dropped_packet_metrics: Vec = Vec::new(); @@ -659,4 +650,63 @@ impl Agent for AgentApi { Ok(Response::new(response)) } + + async fn get_tracked_veth( + &self, + request: Request<()>, + ) -> Result, Status> { + let _req = request.into_inner(); + info!("Getting tracked veth metrics"); + let mut tracked_veth = Vec::::new(); + let mut tot_veth = 0 as i32; + + while let Ok(evt) = self.tracked_veth_rx.lock().unwrap().try_recv() { + if let Ok(vec) = evt { + tracked_veth.extend(vec); + } + } + tot_veth = tracked_veth.len() as i32; + + info!("Total tracked veth events: {}", tot_veth); + info!("Tracked veth: {:?}", &tracked_veth); + + let veth_names: Vec = tracked_veth.iter().map(|v| v.name.clone()).collect(); + + let response = VethResponse { + status: "success".to_string(), + veth_names, + tot_monitored_veth: tot_veth, + }; + + Ok(Response::new(response)) + } + + async fn get_tracked_veth_from_hash_map( + &self, + _request: Request<()>, + ) -> Result, Status> { + info!("Returning veth hashmap"); + //open blocklist map + let mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .expect("cannot open tracked_veth Mapdata"); + let tracked_veth_mapdata = Map::HashMap(mapdata); //load mapdata + + let tracked_veth_map: ayaHashMap = + ayaHashMap::try_from(tracked_veth_mapdata).unwrap(); + + //convert the maps with a buffer to match the protobuffer types + + let mut converted_tracked_veth_map: HashMap = HashMap::new(); + for item in tracked_veth_map.iter() { + let (k, v) = item.unwrap(); + // convert keys and values from [u8;4] to String + let key = String::from_utf8(k.to_vec()).unwrap(); + let value = String::from_utf8(v.to_vec()).unwrap(); + converted_tracked_veth_map.insert(key, value); + } + Ok(Response::new(VethHashMapResponse { + status: "success".to_string(), + veths: converted_tracked_veth_map, + })) + } } diff --git a/core/api/src/batcher.rs b/core/api/src/batcher.rs new file mode 100644 index 00000000..12d92784 --- /dev/null +++ b/core/api/src/batcher.rs @@ -0,0 +1,87 @@ +// This module is experimental and may be subject to major changes. + + +// Do not use any of these functions +// FIXME: this module will be deprecated in the next version probably + + +use tokio::sync::mpsc; +use tonic::{Status, async_trait}; + +use crate::{ + agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric, VethEvent}, + api::AgentApi, +}; + +// Event sender trait. Takes an event from a map and send that to the mpsc channel +// using the send_map function +#[async_trait] +pub trait EventSender: Send + Sync + 'static { + async fn send_active_connection_event(&self, event: Vec); + async fn send_active_connection_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + + let _ = tx.send(event).await; + } + + async fn send_latency_metrics_event(&self, event: Vec); + async fn send_latency_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec); + async fn send_dropped_packet_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_tracked_veth_event(&self, event: Vec); + async fn send_tracked_veth_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } +} + +// send event function. takes an HashMap and send that using mpsc event_tx +#[async_trait] +impl EventSender for AgentApi { + async fn send_active_connection_event(&self, event: Vec) { + self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) + .await; + } + + async fn send_latency_metrics_event(&self, event: Vec) { + self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) + .await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec) { + self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) + .await; + } + async fn send_tracked_veth_event(&self, event: Vec) { + self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) + .await; + } +} diff --git a/core/api/src/client.rs b/core/api/src/client.rs index 844ea75e..096b176c 100644 --- a/core/api/src/client.rs +++ b/core/api/src/client.rs @@ -1,29 +1,23 @@ +use crate::agent::agent_client::AgentClient; use anyhow::Error; use std::result::Result::Ok; -use tonic::{transport::Channel}; -use tonic_reflection::pb::v1::{ - server_reflection_client::ServerReflectionClient, -}; -use crate::agent::agent_client::AgentClient; +use tonic::transport::Channel; +use tonic_reflection::pb::v1::server_reflection_client::ServerReflectionClient; -const AGENT_IP : &str = "http://127.0.0.1:9090"; +const AGENT_IP: &str = "http://127.0.0.1:9090"; -#[cfg(feature="client")] +#[cfg(feature = "client")] pub async fn connect_to_client() -> Result, Error> { //this methods force a HTTP/2 connection from a static string //FIXME: this will require an update to ensure a protected connection - let channel = Channel::from_static(AGENT_IP) - .connect() - .await?; + let channel = Channel::from_static(AGENT_IP).connect().await?; let client = AgentClient::new(channel); Ok(client) } -#[cfg(feature="client")] +#[cfg(feature = "client")] pub async fn connect_to_server_reflection() -> Result, Error> { //this methods force a HTTP/2 connection from a static string - let channel = Channel::from_static(AGENT_IP) - .connect() - .await?; + let channel = Channel::from_static(AGENT_IP).connect().await?; let client = ServerReflectionClient::new(channel); Ok(client) } diff --git a/core/api/src/lib.rs b/core/api/src/lib.rs index 03ecd684..e0939202 100644 --- a/core/api/src/lib.rs +++ b/core/api/src/lib.rs @@ -2,6 +2,6 @@ pub mod api; pub mod agent; pub mod client; pub mod requests; -pub mod structs; pub mod constants; pub mod helpers; +pub mod batcher; diff --git a/core/api/src/main.rs b/core/api/src/main.rs index 30fe5506..87478f54 100644 --- a/core/api/src/main.rs +++ b/core/api/src/main.rs @@ -6,7 +6,6 @@ mod agent; mod api; mod constants; mod helpers; -mod structs; mod agent_proto { use tonic::include_file_descriptor_set; diff --git a/core/api/src/requests.rs b/core/api/src/requests.rs index a518f4af..7c9f447d 100644 --- a/core/api/src/requests.rs +++ b/core/api/src/requests.rs @@ -1,26 +1,26 @@ use anyhow::Error; use std::result::Result::Ok; -use tonic::{ Request, Response, Streaming, transport::Channel }; +use tonic::{Request, Response, Streaming, transport::Channel}; use tonic_reflection::pb::v1::{ - ServerReflectionRequest, - ServerReflectionResponse, - server_reflection_client::ServerReflectionClient, - server_reflection_request::MessageRequest, + ServerReflectionRequest, ServerReflectionResponse, + server_reflection_client::ServerReflectionClient, server_reflection_request::MessageRequest, }; -use crate::agent::agent_client::AgentClient; use crate::agent::ActiveConnectionResponse; -use crate::agent::RequestActiveConnections; -use crate::agent::BlocklistResponse; use crate::agent::AddIpToBlocklistRequest; -use crate::agent::RmIpFromBlocklistRequest; -use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::BlocklistResponse; use crate::agent::DroppedPacketsResponse; use crate::agent::LatencyMetricsResponse; +use crate::agent::RequestActiveConnections; +use crate::agent::RmIpFromBlocklistRequest; +use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::VethHashMapResponse; +use crate::agent::VethResponse; +use crate::agent::agent_client::AgentClient; #[cfg(feature = "client")] pub async fn send_active_connection_request( - mut client: AgentClient + mut client: AgentClient, ) -> Result, Error> { let request = Request::new(RequestActiveConnections { pod_ip: None }); let response = client.active_connections(request).await?; @@ -29,13 +29,17 @@ pub async fn send_active_connection_request( #[cfg(feature = "client")] pub async fn get_all_features( - mut client: ServerReflectionClient + mut client: ServerReflectionClient, ) -> Result>, Error> { let request = ServerReflectionRequest { host: "".to_string(), - message_request: Some(MessageRequest::FileContainingSymbol("agent.Agent".to_string())), + message_request: Some(MessageRequest::FileContainingSymbol( + "agent.Agent".to_string(), + )), }; - let response = client.server_reflection_info(tokio_stream::iter(vec![request])).await?; + let response = client + .server_reflection_info(tokio_stream::iter(vec![request])) + .await?; Ok(response) } @@ -43,7 +47,7 @@ pub async fn get_all_features( #[cfg(feature = "client")] pub async fn send_create_blocklist_request( mut client: AgentClient, - ip: &str + ip: &str, ) -> Result, Error> { let ip = Some(ip.to_string()); let request = Request::new(AddIpToBlocklistRequest { ip }); @@ -53,7 +57,7 @@ pub async fn send_create_blocklist_request( #[cfg(feature = "client")] pub async fn send_check_blocklist_request( - mut client: AgentClient + mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); let response = client.check_blocklist(request).await?; @@ -63,7 +67,7 @@ pub async fn send_check_blocklist_request( #[cfg(feature = "client")] pub async fn remove_ip_from_blocklist_request( mut client: AgentClient, - ip: &str + ip: &str, ) -> Result, Error> { let ip = ip.to_string(); let request = Request::new(RmIpFromBlocklistRequest { ip }); @@ -76,9 +80,7 @@ pub async fn send_dropped_packets_request( mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); - let response = client.get_dropped_packets_metrics( - request - ).await?; + let response = client.get_dropped_packets_metrics(request).await?; Ok(response) } @@ -87,8 +89,24 @@ pub async fn send_latency_metrics_request( mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); - let response = client.get_latency_metrics( - request - ).await?; + let response = client.get_latency_metrics(request).await?; + Ok(response) +} + +#[cfg(feature = "client")] +pub async fn send_tracked_veth_request( + mut client: AgentClient, +) -> Result, Error> { + let request = Request::new(()); + let response = client.get_tracked_veth(request).await?; + Ok(response) +} + +#[cfg(feature = "client")] +pub async fn send_veth_tracked_hashmap_req( + mut client: AgentClient, +) -> Result, Error> { + let request = Request::new(()); + let response = client.get_tracked_veth_from_hash_map(request).await?; Ok(response) } diff --git a/core/api/src/structs.rs b/core/api/src/structs.rs deleted file mode 100644 index b15fa225..00000000 --- a/core/api/src/structs.rs +++ /dev/null @@ -1,48 +0,0 @@ -use bytemuck::Zeroable; -use crate::constants::TASK_COMM_LEN; - - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct PacketLog { - pub proto: u8, - pub src_ip: u32, - pub src_port: u16, - pub dst_ip: u32, - pub dst_port: u16, - pub pid: u32, -} -unsafe impl aya::Pod for PacketLog {} - -#[repr(C, packed)] -#[derive(Clone, Copy, Zeroable)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, - pub sk_err_soft: i32, - pub sk_backlog_len: i32, - pub sk_write_memory_queued: i32, - pub sk_receive_buffer_size: i32, - pub sk_ack_backlog: u32, - pub sk_drops: i32, -} -unsafe impl aya::Pod for NetworkMetrics {} - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} -unsafe impl aya::Pod for TimeStampMetrics {} diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index 70545781..e1c39c5c 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -10,6 +10,27 @@ homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" [dependencies] -tracing = "0.1" +tracing = { version = "0.1", features = ["std"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } anyhow = "1.0" +kube = { version = "2.0.1", features = ["client"] } +k8s-openapi = { version = "0.26.0", features = ["v1_34"] } +aya = "0.13.1" +opentelemetry = "0.32.0" +opentelemetry_sdk = { version = "0.32.0", features = ["logs", "rt-tokio"] } +opentelemetry-stdout = { version = "0.32.0", features = ["logs"] } +opentelemetry-appender-tracing = "0.32.0" +opentelemetry-otlp = { version = "0.32.0", features = ["logs", "grpc-tonic"] } +bytemuck = "1.25.0" +bytes = "1.11.0" +bytemuck_derive = "1.10.2" +tokio = "1.49.0" +opentelemetry-semantic-conventions = "0.32.0" + +[features] +map-handlers = [] +program-handlers = [] +network-structs = [] +monitoring-structs = [] +buffer-reader = [] +experimental = [] diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs new file mode 100644 index 00000000..45d82c81 --- /dev/null +++ b/core/common/src/buffer_type.rs @@ -0,0 +1,655 @@ +#[cfg(feature = "monitoring-structs")] +use crate::otel_metrics::Metrics; +#[cfg(feature = "buffer-reader")] +use aya::maps::{MapData, PerfEventArray}; +use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; +use bytemuck_derive::Zeroable; +use bytes::BytesMut; +use std::net::Ipv4Addr; +#[cfg(feature = "buffer-reader")] +#[cfg(feature = "monitoring-structs")] +use std::sync::Arc; +use tracing::{error, info, warn}; + +// +// IpProtocols enum to reconstruct the packet protocol based on the +// IPV4 Header Protocol code +// + +#[derive(Debug)] +#[repr(u8)] +pub enum IpProtocols { + ICMP = 1, + TCP = 6, + UDP = 17, +} + +// +// TryFrom Trait implementation for IpProtocols enum +// This is used to reconstruct the packet protocol based on the +// IPV4 Header Protocol code +// + +impl TryFrom for IpProtocols { + type Error = (); + fn try_from(proto: u8) -> Result { + match proto { + 1 => Ok(IpProtocols::ICMP), + 6 => Ok(IpProtocols::TCP), + 17 => Ok(IpProtocols::UDP), + _ => Err(()), + } + } +} + +// +// Structure PacketLog +//This structure is used to store the packet information +// +#[cfg(feature = "network-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct PacketLog { + pub proto: u8, + pub src_ip: u32, + pub src_port: u16, + pub dst_ip: u32, + pub dst_port: u16, + pub pid: u32, +} +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for PacketLog {} + +#[cfg(feature = "network-structs")] +#[repr(C, packed)] +#[derive(Clone, Copy, Zeroable)] +pub struct VethLog { + pub name: [u8; 16], // 16 bytes: veth interface name + pub state: u64, // 8 bytes: state variable (unsigned long in kernel) + pub dev_addr: [u8; 6], // 6 bytes: device address + pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction + pub netns: u32, // 4 bytes: network namespace inode number + pub pid: u32, // 4 bytes: PID that triggered the event +} +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for VethLog {} + +#[cfg(feature = "network-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct TcpPacketRegistry { + pub proto: u8, + pub src_ip: u32, + pub dst_ip: u32, + pub src_port: u16, + pub dst_port: u16, + pub pid: u32, + pub command: [u8; 16], + pub cgroup_id: u64, +} +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for TcpPacketRegistry {} + +#[cfg(feature = "monitoring-structs")] +pub const TASK_COMM_LEN: usize = 16; // linux/sched.h +#[cfg(feature = "monitoring-structs")] +#[repr(C, packed)] +#[derive(Clone, Copy, Zeroable)] +pub struct NetworkMetrics { + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub ts_us: u64, + pub sk_err: i32, // Offset 284 + pub sk_err_soft: i32, // Offset 600 + pub sk_backlog_len: i32, // Offset 196 + pub sk_write_memory_queued: i32, // Offset 376 + pub sk_receive_buffer_size: i32, // Offset 244 + pub sk_ack_backlog: u32, // Offset 604 + pub sk_drops: i32, // Offset 136 +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for NetworkMetrics {} + +#[cfg(feature = "monitoring-structs")] +#[repr(C, packed)] +#[derive(Clone, Copy, Zeroable)] +pub struct TimeStampMetrics { + pub delta_us: u64, + pub ts_us: u64, + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub lport: u16, + pub dport_be: u16, + pub af: u16, + pub saddr_v4: u32, + pub daddr_v4: u32, + pub saddr_v6: [u32; 4], + pub daddr_v6: [u32; 4], +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for TimeStampMetrics {} + +// docs: +// This function perform a byte swap from little-endian to big-endian +// It's used to reconstruct the correct IPv4 address from the u32 representation +// +// Takes a u32 address in big-endian format and returns a Ipv4Addr with reversed octets +// +#[inline(always)] +pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { + let octects = addr.to_be_bytes(); + let [a, b, c, d] = [octects[3], octects[2], octects[1], octects[0]]; + let reversed_ip = Ipv4Addr::new(a, b, c, d); + reversed_ip +} + +// enum BuffersType +#[cfg(feature = "buffer-reader")] +pub enum BufferType { + #[cfg(feature = "network-structs")] + PacketLog, + #[cfg(feature = "network-structs")] + TcpPacketRegistry, + #[cfg(feature = "network-structs")] + VethLog, + #[cfg(feature = "monitoring-structs")] + NetworkMetrics, + #[cfg(feature = "monitoring-structs")] + TimeStampMetrics, +} + +// IDEA: this is an experimental implementation to centralize buffer reading logic +// TODO: add variant for cortexflow API exporter +#[cfg(feature = "buffer-reader")] +impl BufferType { + #[cfg(feature = "network-structs")] + pub async fn read_packet_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Packet log data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let pl: PacketLog = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let src_ip = reverse_be_addr(pl.src_ip); + let dst_ip = reverse_be_addr(pl.dst_ip); + let src_port = u16::from_be(pl.src_port); + let dst_port = u16::from_be(pl.dst_port); + let event_id = pl.pid; + let protocol = pl.proto; + + // protocol extraction + match IpProtocols::try_from(protocol) { + Ok(proto) => { + info!( + "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", + event_id, proto, src_ip, src_port, dst_ip, dst_port + ); + } + Err(e) => { + error!("Unknown protocol. Data maybe corrupted. Reason:{:?}", e); + } + } + } + } + } + #[cfg(feature = "network-structs")] + pub async fn read_tcp_registry_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted data Tcp Registry data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let pl: TcpPacketRegistry = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let src = reverse_be_addr(pl.src_ip); + let dst = reverse_be_addr(pl.dst_ip); + let src_port = u16::from_be(pl.src_port); + let dst_port = u16::from_be(pl.dst_port); + let event_id = pl.pid; + let command = pl.command.to_vec(); + let end = command + .iter() + .position(|&x| x == 0) + .unwrap_or(command.len()); + let command_str = String::from_utf8_lossy(&command[..end]).to_string(); + let cgroup_id = pl.cgroup_id; + let protocol = pl.proto; + + // protocol extraction + match IpProtocols::try_from(protocol) { + Ok(proto) => { + info!( + "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{} Command: {} Cgroup_id: {}", + event_id, + proto, + src, + src_port, + dst, + dst_port, + command_str, + cgroup_id //proc_content + ); + } + Err(e) => { + error!("Unknown protocol. Data maybe corrupted. Reason:{:?}", e); + } + } + } + } + } + #[cfg(feature = "network-structs")] + pub async fn read_and_handle_veth_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted data VethLog data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let vthl: VethLog = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let name_bytes = vthl.name; + let dev_addr_bytes = vthl.dev_addr; + let name = std::str::from_utf8(&name_bytes); + let state = vthl.state; + + let dev_addr = dev_addr_bytes; + let netns = vthl.netns; + let mut event_type = String::new(); + + // event_type extraction + match vthl.event_type { + 1 => { + event_type = "creation".to_string(); + match name { + Ok(veth_name) => { + info!( + "[{}] Veth Event: Type: {} Name: {} Dev_addr: {:x?} State: {}", + netns, + event_type, + veth_name.trim_end_matches("\0"), + dev_addr, + state + ); + } + Err(e) => { + error!( + "Failed to extract veth name during event_type = creation (1).Reason:{}", + e + ); + } + } + } + 2 => { + event_type = "deletion".to_string(); + match name { + Ok(veth_name) => { + info!( + "[{}] Veth Event: Type: {} Name: {} Dev_addr: {:x?} State: {}", + netns, + event_type, + veth_name.trim_end_matches("\0"), + dev_addr, + state + ); + } + Err(e) => { + error!( + "Failed to extract veth name during event_type = deletion (2).Reason:{}", + e + ); + } + } + } + _ => { + warn!("Unknown event type") + } + } + } + } + } + #[cfg(feature = "monitoring-structs")] + /// Continuously read [`NetworkMetrics`] events and record OpenTelemetry + /// observations. + /// + /// This helper mirrors the core behaviour of + /// [`cortexbrain_common::buffer_type::read_perf_buffer`] but adds the OTel + /// instrumentation layer. + /// + /// # Loop + /// + /// 1. For every CPU buffer call `read_events`. + /// 2. Parse each raw [`BytesMut`] into [`NetworkMetrics`] using an + /// unaligned read (the struct is `#[repr(C, packed)]` and `Pod`). + /// 3. Call [`Metrics::record_network_metrics`]. + /// 4. Retain the legacy `tracing::info!` log for human-readable local output. + /// 5. Sleep 100 ms between polls. + /// + /// # Safety + /// + /// `std::ptr::read_unaligned` is safe here because the eBPF program writes + /// exactly the `NetworkMetrics` layout into the ring buffer and the struct + /// implements [`aya::Pod`]. + /// Continuously read [`TimeStampMetrics`] events and record OpenTelemetry + /// observations. + /// + /// Counterpart to [`read_network_buffer`] for the `time_stamp_events` map. + + pub async fn read_network_metrics( + buffers: &mut [BytesMut], + tot_events: i32, + offset: i32, + exporter: &str, + metrics: Arc, + ) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let net_metrics: NetworkMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + + match exporter { + "otlp" => metrics.record_network_metrics(&net_metrics), + _ => continue, // skip + } + let tgid = net_metrics.tgid; + let comm = String::from_utf8_lossy(&net_metrics.comm); + let ts_us = net_metrics.ts_us; + let sk_drop_count = net_metrics.sk_drops; + let sk_err = net_metrics.sk_err; + let sk_err_soft = net_metrics.sk_err_soft; + let sk_backlog_len = net_metrics.sk_backlog_len; + let sk_write_memory_queued = net_metrics.sk_write_memory_queued; + let sk_ack_backlog = net_metrics.sk_ack_backlog; + let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; + + info!( + "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", + tgid, + comm, + ts_us, + sk_drop_count, + sk_err, + sk_err_soft, + sk_backlog_len, + sk_write_memory_queued, + sk_ack_backlog, + sk_receive_buffer_size + ); + } + } + } + #[cfg(feature = "monitoring-structs")] + pub async fn read_timestamp_metrics( + buffers: &mut [BytesMut], + tot_events: i32, + offset: i32, + exporter: &str, + metrics: Arc, + ) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let time_stamp_event: TimeStampMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + + match exporter { + "otlp" => metrics.record_timestamp_metrics(&time_stamp_event), + _ => continue, + } + + let delta_us = time_stamp_event.delta_us; + let ts_us = time_stamp_event.ts_us; + let tgid = time_stamp_event.tgid; + let comm = String::from_utf8_lossy(&time_stamp_event.comm); + let lport = time_stamp_event.lport; + let dport_be = time_stamp_event.dport_be; + let af = time_stamp_event.af; + info!( + "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", + delta_us, ts_us, tgid, comm, lport, dport_be, af + ); + } + } + } +} + +// docs: read buffer function: +// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec +#[cfg(feature = "buffer-reader")] +pub async fn read_perf_buffer>( + mut array_buffers: Vec>, + mut buffers: Vec, + buffer_type: BufferType, + #[cfg(feature = "monitoring-structs")] metrics: Option>, +) { + // loop over the buffers + loop { + for buf in array_buffers.iter_mut() { + match buf.read_events(&mut buffers) { + Ok(events) => { + // triggered if some events are lost + if events.lost > 0 { + tracing::debug!("Lost events: {} ", events.lost); + } + // triggered if some events are readed + if events.read > 0 { + tracing::debug!("Readed events: {}", events.read); + let offset = 0; + let tot_events = events.read as i32; + + //read the events in the buffer + match buffer_type { + #[cfg(feature = "network-structs")] + BufferType::PacketLog => { + BufferType::read_packet_log(&mut buffers, tot_events, offset).await + } + #[cfg(feature = "network-structs")] + BufferType::TcpPacketRegistry => { + BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) + .await + } + #[cfg(feature = "network-structs")] + BufferType::VethLog => { + BufferType::read_and_handle_veth_log( + &mut buffers, + tot_events, + offset, + ) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::NetworkMetrics => { + BufferType::read_network_metrics( + &mut buffers, + tot_events, + offset, + "otlp", + metrics + .clone() + .expect("Metrics required for NetworkMetrics"), + ) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::TimeStampMetrics => { + BufferType::read_timestamp_metrics( + &mut buffers, + tot_events, + offset, + "otlp", + metrics + .clone() + .expect("Metric required for TimeStampMetrics"), + ) + .await + } + } + } + } + Err(e) => { + error!("Cannot read events from buffer. Reason: {} ", e); + } + } + } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep + } +} + +#[cfg(feature = "buffer-reader")] +pub enum BufferSize { + #[cfg(feature = "network-structs")] + ClassifierNetEvents, + #[cfg(feature = "network-structs")] + VethEvents, + #[cfg(feature = "network-structs")] + TcpEvents, + #[cfg(feature = "monitoring-structs")] + NetworkMetricsEvents, + #[cfg(feature = "monitoring-structs")] + TimeMetricsEvents, +} +#[cfg(feature = "buffer-reader")] +impl BufferSize { + pub fn get_size(&self) -> usize { + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => std::mem::size_of::(), + } + } + pub fn set_buffer(&self) -> Vec { + // iter returns and iterator of cpu ids, + // we need only the total number of cpus to set the buffer size so we use .len() to get + // the count of total cpus and then we allocate a buffer for each cpu with a capacity + // based on the structure size * a factor to have a bigger buffer to avoid overflows and lost events + + // Old buffers where 1024 bytes long. Now we set different buffer size based on + // the frequence of the events. + // ClassifierNetEvents are triggered by the TC classifier program, events has high frequency + // VethEvents are triggered by the creation and deletion of veth interfaces, events has small frequency compared to classifier events + // TcpEvents are triggered by TCP events and connections. Events has similar frequency to ClassifierNetEvents. + + let tot_cpu = online_cpus().iter().len(); // total number of cpus + + // TODO: finish to do all the calculations for the buffer sizes + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => { + let capacity = self.get_size() * 100; // Allocates 4Kb of memory for the buffers + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + } + } +} + +#[cfg(feature = "buffer-reader")] +pub fn fill_buffers( + //buf: PerfEventArrayBuffer, + mut vec_of_buffers: Vec>, + //buffers: Vec, + mut events_array: PerfEventArray, +) -> Vec> { + for cpu_id in online_cpus() + .map_err(|e| anyhow::anyhow!("Error {:?}", e)) + .unwrap() + { + let buf = events_array + .open(cpu_id, None) + .expect("Error during the creation of net_events_buf structure"); + + vec_of_buffers.push(buf); + } + vec_of_buffers +} diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index f8fadc66..15c4ad70 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,3 +1,15 @@ +#[cfg(any( + feature = "buffer-reader", + feature = "network-structs", + feature = "monitoring-structs", +))] +pub mod buffer_type; pub mod constants; +pub mod formatters; pub mod logger; -pub mod formatters; \ No newline at end of file +#[cfg(feature = "map-handlers")] +pub mod map_handlers; +#[cfg(feature = "monitoring-structs")] +pub mod otel_metrics; +#[cfg(feature = "program-handlers")] +pub mod program_handlers; diff --git a/core/common/src/logger.rs b/core/common/src/logger.rs index 5a1b8906..ab06f79b 100644 --- a/core/common/src/logger.rs +++ b/core/common/src/logger.rs @@ -1,4 +1,7 @@ -use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; +use tracing_subscriber::Layer; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{EnvFilter, fmt::format::FmtSpan}; /// Initialize the default logger configuration used across CortexBrain components. /// @@ -35,3 +38,47 @@ pub fn init_logger_without_time() { .with_line_number(false) .init(); } + +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_otlp::{LogExporter, WithExportConfig}; +use opentelemetry_sdk::Resource; +use opentelemetry_sdk::logs::SdkLoggerProvider; + +pub fn otlp_logger_init(service_name: String) -> SdkLoggerProvider { + //exporter and provider initialization + let otlp_endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:4317".to_string()); + + let exporter = LogExporter::builder() + .with_tonic() + .with_endpoint(otlp_endpoint) + .build() + .expect("Failed to create OTLP exporter"); + + //needs a service name + let provider = SdkLoggerProvider::builder() + .with_resource(Resource::builder().with_service_name(service_name).build()) + .with_batch_exporter(exporter) + .build(); + + //maybe we will need some filter later + //init otel_filter and layer + let otel_layer = OpenTelemetryTracingBridge::new(&provider); + + // init fmt filter and layer + let fmt_filter = EnvFilter::new("info").add_directive("opentelemetry=debug".parse().unwrap()); + let fmt_layer = tracing_subscriber::fmt::layer() + .with_thread_names(true) + .with_line_number(false) + .with_target(false) + .pretty() + .with_filter(fmt_filter); + + //init tracing subscriber with otel layer + tracing_subscriber::registry() + .with(otel_layer) + .with(fmt_layer) + .init(); + + provider +} diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs new file mode 100644 index 00000000..b246b701 --- /dev/null +++ b/core/common/src/map_handlers.rs @@ -0,0 +1,214 @@ +use anyhow::Error; +use anyhow::Ok; +use aya::Ebpf; +use aya::maps::HashMap; +use aya::maps::Map; +use k8s_openapi::api::core::v1::ConfigMap; +use kube::{Api, Client}; +use std::net::Ipv4Addr; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use std::sync::Mutex; +use tracing::warn; +use tracing::{error, info}; + +// docs +// +// this function init the bpfs maps used in the main program +// +// + +#[cfg(feature = "map-handlers")] +pub struct BpfMapsData { + pub bpf_obj_names: Vec, + pub bpf_obj_map: Vec, +} + +#[cfg(feature = "map-handlers")] +pub fn init_bpf_maps( + bpf: Arc>, + map_names: Vec, +) -> Result { + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; + + let mut maps = Vec::new(); // stores bpf_maps_objects + + for name in &map_names { + let bpf_map_init = bpf_new + .take_map(&name) + .ok_or_else(|| anyhow::anyhow!("{} map not found", &name))?; + maps.push(bpf_map_init); + } + Ok(BpfMapsData { + bpf_obj_names: map_names.clone(), + bpf_obj_map: maps, + }) +} + +//TODO: save bpf maps path in the cli metadata + +//takes an array of bpf maps and pin them to persist session data + +#[cfg(feature = "map-handlers")] +pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result { + if !path.exists() { + info!("Pin path {:?} does not exist. Creating it...", path); + std::fs::create_dir_all(&path)?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755))?; + } + } + + //let mut owned_maps = Vec::new(); // aya::Maps does not implement the clone trait i need to create a raw copy of the vec map + let mut owned_bpf_maps_data = BpfMapsData { + bpf_obj_names: Vec::new(), + bpf_obj_map: Vec::new(), + }; + // an iterator that iterates two iterators simultaneously + for (map_obj, name) in maps + .bpf_obj_map + .into_iter() + .zip(maps.bpf_obj_names.into_iter()) + { + let map_path = path.join(&name); + if map_path.exists() { + warn!("Path {} already exists", name); + warn!("Removing path {}", name); + std::fs::remove_file(&map_path)?; + } + info!("Trying to pin map {:?} in map path: {:?}", name, &map_path); + map_obj.pin(&map_path)?; + //owned_maps.push(map_obj); + owned_bpf_maps_data.bpf_obj_names.push(name); + owned_bpf_maps_data.bpf_obj_map.push(map_obj); + } + + Ok(owned_bpf_maps_data) // return a BpfMapsData type +} + +#[cfg(feature = "map-handlers")] +pub async fn populate_blocklist() -> Result<(), Error> { + use aya::maps::MapData; + // load mapdata from path + + let mapdata = MapData::from_pin("/sys/fs/bpf/maps/Blocklist") + .map_err(|e| anyhow::anyhow!("Failed to load blocklist_map: {}", e))?; + + let map = Map::HashMap(mapdata); + let mut blocklist_map = HashMap::<_, [u8; 4], [u8; 4]>::try_from(map)?; + + let client = Client::try_default() + .await + .expect("Cannot connect to Kubernetes Client"); + let namespace = "cortexflow"; + let configmap = "cortexbrain-client-config"; + + let api: Api = Api::namespaced(client, namespace); + match api.get(configmap).await { + std::result::Result::Ok(configs) => { + info!("Configmap : {} loaded correctly ", configmap); + info!("[CONFIGMAP]: {:?} ", configs); + if let Some(data) = configs.data { + if let Some(blocklist) = data.get("blocklist") { + let addresses: Vec = blocklist + .lines() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + //String parsing from "x y" to ["x","y"] + if addresses.is_empty() { + warn!("No addresses found in the blocklist. Skipping load"); + } + for item in &addresses { + info!("Inserting addresses: {:?}", &item); + let addr = Ipv4Addr::from_str(&item)?.octets(); + let _ = blocklist_map.insert(addr, addr, 0); + } + } + } + Ok(()) + } + std::result::Result::Err(e) => { + error!("An error occured while reading configmap: {}", e); + return Err(e.into()); + } + } +} + +#[cfg(feature = "map-handlers")] +// TODO: modify this to accept also HashMap types +pub fn load_perf_event_array_from_mapdata( + path: &'static str, +) -> Result, Error> { + use aya::maps::MapData; + use aya::maps::PerfEventArray; + + let map_data = MapData::from_pin(path) + .map_err(|e| anyhow::anyhow!("Cannot load mapdata from pin {:?} .Reason: {}", &path, e))?; + + let map = Map::PerfEventArray(map_data); + + let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { + anyhow::anyhow!("Cannot initialize perf_event_array from map. Reason: {}", e) + })?; + Ok(perf_event_array) +} + +#[cfg(feature = "map-handlers")] +pub fn map_manager( + maps: BpfMapsData, +) -> Result< + std::collections::HashMap< + String, + ( + aya::maps::PerfEventArray, + Vec>, + ), + >, + Error, +> { + use aya::maps::PerfEventArray; + use aya::maps::{MapData, perf::PerfEventArrayBuffer}; + use tracing::debug; + + let mut map_manager = std::collections::HashMap::< + String, // this will store the bpf map name + (PerfEventArray, Vec>), // this will manage the BPF_MAP_TYPE_PERF_EVENT_ARRAY and its buffer + >::new(); + + // map_manager creates an hashmap that contains: + // MAP NAME as String (KEY) + // + // VALUES (tuple) + // a PERF_EVENT_ARRAY + // a vector of PERF_EVENT_ARRAY_BUFFER + // + // the map manager helps the event listener to specifically call a map by its pinned name + // e.g. veth_identity_map and returns the associated PERF_EVENT_ARRAY and PERF_EVENT_ARRAY_BUFFERS (1 per CPU) + // also the map manager helps to write a more complete debug context by linking map names with arrays and buffers. + // actually i cannot return the extact information using only the Aya library + + // create the PerfEventArrays and the buffers from the BpfMapsData Objects + for (map, name) in maps + .bpf_obj_map + .into_iter() + .zip(maps.bpf_obj_names.into_iter()) + // zip two iterators at the same time for map object and map names + { + debug!("Debugging map type:{:?} for map name {:?}", map, &name); + info!("Creating PerfEventArray for map name {:?}", &name); + + // save the map in a registry if is a PerfEventArray to access them by name + if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { + map_manager.insert(name.clone(), (perf_event_array, Vec::new())); + } else { + warn!("Map {:?} is not a PerfEventArray, skipping load", &name); + } + } + Ok(map_manager) +} diff --git a/core/common/src/otel_metrics.rs b/core/common/src/otel_metrics.rs new file mode 100644 index 00000000..ae8c9dbe --- /dev/null +++ b/core/common/src/otel_metrics.rs @@ -0,0 +1,133 @@ +//! OpenTelemetry metric instruments for eBPF perf-buffer events. +//! +//! This module centralises every [`Meter`]-backed instrument that the +//! `metrics` crate uses to observe raw eBPF events. It provides a single +//! [`Metrics`] handle that is cheap to [`Arc`]-clone and safe to use from +//! multiple asynchronous tasks concurrently. +//! +//! - An [`Arc`] is moved into each Tokio +//! task that reads a perf buffer. All instrument operations are lock-free. +//! - Every observation is tagged with `tgid` and `comm` +//! extracted from the eBPF struct, allowing downstream collectors to group +//! telemetry by process. + +use crate::buffer_type::{NetworkMetrics, TimeStampMetrics}; +use opentelemetry::KeyValue; +use opentelemetry::metrics::{Counter, Gauge, Histogram, Meter}; +pub struct Metrics { + /// Total number of eBPF events processed across all perf buffers. + pub events_total: Counter, + + /// Total number of network-related events produced by the `net_metrics` + /// eBPF map. + pub packets_total: Counter, + + /// Observed socket drop count (`sk_drops`) from the kernel sock struct. + pub sk_drops: Gauge, + + /// Observed socket error count (`sk_err`) from the kernel sock struct. + pub sk_err: Gauge, + + /// Histogram of `delta_us` values supplied by the `time_stamp_events` + /// perf buffer. + pub delta_us: Histogram, + + /// Histogram of `ts_us` values seen in both `net_metrics` and + /// `time_stamp_events`. + pub ts_us: Histogram, +} + +impl Metrics { + /// Initialise all instruments backed by the supplied [`Meter`]. + pub fn new(meter: &Meter) -> Self { + // total events + let events_total = meter + .u64_counter("cortexbrain_events_total") + .with_description("Total number of eBPF events processed") + .build(); + + // total packets + let packets_total = meter + .u64_counter("cortexbrain_packets_total") + .with_description("Total number of network events processed") + .build(); + + // socket drops + let sk_drops = meter + .i64_gauge("cortexbrain_sk_drops") + .with_description("Socket drop count per event") + .build(); + + // socket errors + let sk_err = meter + .i64_gauge("cortexbrain_sk_err") + .with_description("Socket error count per event") + .build(); + + // delta microseconds + let delta_us = meter + .u64_histogram("cortexbrain_delta_us") + .with_description("Distribution of delta_us values from timestamp events") + .build(); + + // timestamp microseconds grouped + let ts_us = meter + .u64_histogram("cortexbrain_ts_us") + .with_description("Distribution of timestamp values from eBPF events") + .build(); + + Self { + events_total, + packets_total, + sk_drops, + sk_err, + delta_us, + ts_us, + } + } + + /// Record a single [`NetworkMetrics`] event. + /// + /// Increments `events_total` and `packets_total`, records `sk_drops` and + /// `sk_err` as gauges, and observes `ts_us` in the timestamp histogram. + /// + /// Every observation carries: + /// + /// -`tgid` – task group ID. + /// - `comm` – command name (null-terminated bytes converted to a UTF-8 + /// string and trimmed). + pub fn record_network_metrics(&self, m: &NetworkMetrics) { + let comm = String::from_utf8_lossy(&m.comm); + let comm_trimmed = comm.trim_end_matches('\0').to_string(); + let attrs = &[ + KeyValue::new("tgid", m.tgid as i64), + KeyValue::new("comm", comm_trimmed), + ]; + + self.events_total.add(1, attrs); + self.packets_total.add(1, attrs); + self.sk_drops.record(m.sk_drops as i64, attrs); + self.sk_err.record(m.sk_err as i64, attrs); + self.ts_us.record(m.ts_us, attrs); + } + + /// Record a single [`TimeStampMetrics`] event. + /// + /// Increments `events_total`, and records `delta_us` and `ts_us` in their + /// respective histograms. + /// + /// Every observation carries `tgid` and `comm` (see + /// [`record_network_metrics`]). + pub fn record_timestamp_metrics(&self, m: &TimeStampMetrics) { + let comm = String::from_utf8_lossy(&m.comm); + let comm_trimmed = comm.trim_end_matches('\0').to_string(); + let attrs = &[ + KeyValue::new("tgid", m.tgid as i64), + KeyValue::new("comm", comm_trimmed), + ]; + + self.events_total.add(1, attrs); + self.delta_us.record(m.delta_us, attrs); + self.ts_us.record(m.ts_us, attrs); + } +} diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs new file mode 100644 index 00000000..347be51f --- /dev/null +++ b/core/common/src/program_handlers.rs @@ -0,0 +1,50 @@ +use aya::{Ebpf, programs::KProbe}; +use std::convert::TryInto; +use std::sync::{Arc, Mutex}; +use tracing::{error, info}; + +#[cfg(feature = "program-handlers")] +pub fn load_program( + bpf: Arc>, + program_name: &str, + kernel_symbol: &str, +) -> Result<(), anyhow::Error> { + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; + + // Load and attach the eBPF program + let program: &mut KProbe = bpf_new + .program_mut(program_name) + .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? + .try_into() + .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; + + // STEP 1: load program + + program + .load() + .map_err(|e| anyhow::anyhow!("Cannot load program: {}. Error: {}", &program_name, e))?; + + // STEP 2: Attach the loaded program to kernel symbol + match program.attach(kernel_symbol, 0) { + Ok(_) => info!( + "{} program attached successfully to kernel symbol {}", + &program_name, &kernel_symbol + ), + Err(e) => { + error!( + "Error attaching {} program to kernel symbol {}. Reason: {:?}", + &program_name, &kernel_symbol, e + ); + return Err(anyhow::anyhow!( + "Failed to attach program {} to kernel symbol {}. Reason {:?}", + &program_name, + &kernel_symbol, + e + )); + } + }; + + Ok(()) +} diff --git a/core/src/components/conntracker/src/data_structures.rs b/core/src/components/conntracker/src/data_structures.rs index 35861a84..c55cd3f4 100644 --- a/core/src/components/conntracker/src/data_structures.rs +++ b/core/src/components/conntracker/src/data_structures.rs @@ -1,20 +1,19 @@ use aya_ebpf::{ macros::map, - maps::{LruPerCpuHashMap, PerfEventArray,HashMap}, + maps::{HashMap, LruPerCpuHashMap, PerfEventArray}, }; // docs: // PacketLog structure used to track an incoming network packet -// +// // proto: packet protol (ex. TCP,UDP,ICMP) -// src_ip: source address ip +// src_ip: source address ip // src_port: source address port // dst_ip: destination ip // dst_port: destination port // pid: kernel process ID // - #[repr(C)] #[derive(Clone, Copy)] pub struct PacketLog { @@ -37,75 +36,71 @@ pub struct ConnArray { pub proto: u8, } - // docs: // VethLog structure used to track virtual ethernet interfaces creation and deletion -// +// // name: veth name -// state: socket state +// state: socket state // dev_addr: veth device addresses // event_type: creation or deletion // netns: veth network namespace // pid: kernel process ID // -#[repr(C)] -#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +#[derive(Clone, Copy)] pub struct VethLog { - pub name: [u8; 16], - pub state: u64, // state var type: long unsigned int - pub dev_addr: [u32; 8], - pub event_type: u8, // i choose 1 for veth creation or 2 for veth destruction - pub netns: u32, - pub pid: u32 - + pub name: [u8; 16], // 16 bytes: veth interface name + pub state: u64, // 8 bytes: state variable (unsigned long in kernel) + pub dev_addr: [u8; 6], // 6 bytes: device address + pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction + pub netns: u32, // 4 bytes: network namespace inode number + pub pid: u32, // 4 bytes: PID that triggered the event } // TODO: write documentation about this structure #[repr(C)] -#[derive(Clone,Copy,Debug)] -pub struct TcpPacketRegistry{ +#[derive(Clone, Copy, Debug)] +pub struct TcpPacketRegistry { pub proto: u8, pub src_ip: u32, pub dst_ip: u32, pub src_port: u16, pub dst_port: u16, pub pid: u32, - pub command: [u8;16], + pub command: [u8; 16], pub cgroup_id: u64, - } // docs: // -// BPF maps used in the conntracker programs -// +// BPF maps used in the conntracker programs +// // VETH_EVENTS: PerfEventArray used in the veth_tracer functions (veth_tracer.rs module) // -// BLOCKLIST: an hashmap used to block addresses -----> TODO: key and values are the same for semplicity but we need to +// BLOCKLIST: an hashmap used to block addresses -----> TODO: key and values are the same for semplicity but we need to // investigate the possibility to save the service name or the timestamp registered when the command was executed or a simple int index // - -#[map(name = "EventsMap", pinning = "by_name")] +#[map(name = "events_map", pinning = "by_name")] pub static mut EVENTS: PerfEventArray = PerfEventArray::new(0); -// FIXME: this might be useless -#[map(name = "ConnectionMap")] -pub static mut ACTIVE_CONNECTIONS: LruPerCpuHashMap = - LruPerCpuHashMap::with_max_entries(65536, 0); - // FIXME: this might be useless #[map(name = "ConnectionTrackerMap")] pub static mut CONNTRACKER: LruPerCpuHashMap = LruPerCpuHashMap::with_max_entries(65536, 0); -#[map(name = "veth_identity_map")] +#[map(name = "veth_identity_map", pinning = "by_name")] pub static mut VETH_EVENTS: PerfEventArray = PerfEventArray::new(0); -#[map(name = "Blocklist")] -pub static mut BLOCKLIST: HashMap<[u8;4], [u8;4]> = HashMap::<[u8;4], [u8;4]>::with_max_entries(1024, 0); +#[map(name = "Blocklist", pinning = "by_name")] +pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = HashMap::with_max_entries(1024, 0); //here i need to pass an address like this: [135,171,168,192] -#[map(name = "TcpPacketRegistry",pinning = "by_name")] -pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); \ No newline at end of file +#[map(name = "TcpPacketRegistry", pinning = "by_name")] +pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); + +#[map(name = "tracked_veth", pinning = "by_name")] +// This map takes a registry of tracked veth interfaces +// The maximum number of characters is 16 of type u8 +pub static mut TRACKED_VETH: HashMap<[u8; 16], [u8; 8]> = HashMap::with_max_entries(1024, 0); diff --git a/core/src/components/conntracker/src/main.rs b/core/src/components/conntracker/src/main.rs index 7a12642d..8438838f 100644 --- a/core/src/components/conntracker/src/main.rs +++ b/core/src/components/conntracker/src/main.rs @@ -29,14 +29,17 @@ use aya_ebpf::{ }; use crate::tc::try_identity_classifier; -use crate::veth_tracer::try_veth_tracer; use crate::tcp_analyzer::try_tcp_analyzer; +use crate::veth_tracer::try_veth_tracer; +// TODO: add function to track +// 1. kprobe:tcp_enter_memory_pressure +// 2. kprobe:tcp_create_openreq_child (https://elixir.bootlin.com/linux/v6.18.6/source/net/ipv4/tcp_ipv4.c#L1776) [function: *tcp_v4_syn_recv_sock] // docs: // // virtual ethernet (veth) interface tracer: -// This function is triggered when a virtual ethernet interface is created +// This function is triggered when a virtual ethernet interface is created // #[kprobe] @@ -50,7 +53,7 @@ pub fn veth_creation_trace(ctx: ProbeContext) -> u32 { // docs: // // virtual ethernet (veth) interface tracer: -// This function is triggered when a virtual ethernet interface is deleted +// This function is triggered when a virtual ethernet interface is deleted // #[kprobe] @@ -94,14 +97,29 @@ pub fn identity_classifier(ctx: TcContext) -> i32 { // // this kprobe retrieves pid data and task id of an incoming packet +// this kprobe separation is needed because every kprobe program can be attached only one time. +// if you try to attach the same program the kernel returns this error: "Program is already attached" +// this is the reason why we have tcp_message_tracer_connect and tcp_message_tracer_rcv that are essentially the same functions +// but in the kernel space one is attached to the tcp_v4_connect kprobe and one to the tcp_v4_rcv kprobe +// TODO: a good addition to the library will be a function that check if the program is already attached: +// if the program is attached it creates a safe copy of the program to attach a second kernel symbol (kprobes) +// if the program is not attached we have the traditional behaviour (load the program + attach the program to the kernel symbol (kprobes)) + #[kprobe] -pub fn tcp_message_tracer(ctx: ProbeContext) -> u32 { +pub fn tcp_message_tracer_connect(ctx: ProbeContext) -> u32 { match try_tcp_analyzer(ctx) { Ok(ret_val) => ret_val, Err(ret_val) => ret_val.try_into().unwrap_or(1), } } +#[kprobe] +pub fn tcp_message_tracer_rcv(ctx: ProbeContext) -> u32 { + match try_tcp_analyzer(ctx) { + Ok(ret_val) => ret_val, + Err(ret_val) => ret_val.try_into().unwrap_or(1), + } +} //ref:https://elixir.bootlin.com/linux/v6.15.1/source/include/uapi/linux/ethtool.h#L536 //https://elixir.bootlin.com/linux/v6.15.1/source/drivers/net/veth.c#L268 diff --git a/core/src/components/conntracker/src/veth_tracer.rs b/core/src/components/conntracker/src/veth_tracer.rs index e2f07e7c..cf66a749 100644 --- a/core/src/components/conntracker/src/veth_tracer.rs +++ b/core/src/components/conntracker/src/veth_tracer.rs @@ -25,7 +25,7 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { } let mut name_buf = [0u8; 16]; - let mut dev_addr_buf = [0u32; 8]; + let mut dev_addr_buf = [0u8; 6]; // name field let name_field_offset = 304; // reading the name field offset @@ -35,12 +35,12 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { // state field let state_offset = 168; - let state: u8 = read_linux_inner_value::(net_device_pointer as *const u8, state_offset)?; + let state: u64 = read_linux_inner_value::(net_device_pointer as *const u8, state_offset)?; // dev_addr let dev_addr_offset = 1080; - let dev_addr_array: [u32; 8] = - read_linux_inner_value::<[u32; 8]>(net_device_pointer as *const u8, dev_addr_offset)?; + let dev_addr_array: [u8; 6] = + read_linux_inner_value::<[u8; 6]>(net_device_pointer as *const u8, dev_addr_offset)?; let inum: u32 = extract_netns_inum(net_device_pointer as *const u8)?; let pid: u32 = bpf_get_current_pid_tgid() as u32; // extracting lower 32 bit corresponding to the PID @@ -52,7 +52,7 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { // compose the structure let veth_data = VethLog { name: name_buf, - state: state.into(), + state: state, dev_addr: dev_addr_buf, event_type: mode, netns: inum, diff --git a/core/src/components/identity/Cargo.toml b/core/src/components/identity/Cargo.toml index 08d753eb..1e96cc9a 100644 --- a/core/src/components/identity/Cargo.toml +++ b/core/src/components/identity/Cargo.toml @@ -10,12 +10,10 @@ homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" [features] -default = ["map-handlers", "struct", "enums"] -map-handlers = [] +default = ["struct", "enums"] struct = [] enums = [] -experimental = ["map-handlers", "struct", "enums"] - +experimental = ["struct", "enums"] [dependencies] aya = "0.13.1" @@ -28,13 +26,16 @@ tokio = { version = "1.48.0", features = [ "time", "macros", ] } -anyhow = "1.0" tracing = "0.1.41" -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -libc = "0.2.172" bytemuck = { version = "1.23.0", features = ["derive"] } -bytemuck_derive = "1.10.1" -cortexbrain-common = "0.1.0" +cortexbrain-common = { path = "../../../common/", features = [ + "map-handlers", + "program-handlers", + "network-structs", + "buffer-reader", +] } nix = { version = "0.30.1", features = ["net"] } kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } +bytemuck_derive = "1.10.2" +anyhow = "1.0.100" diff --git a/core/src/components/identity/src/enums.rs b/core/src/components/identity/src/enums.rs deleted file mode 100644 index b0b271ba..00000000 --- a/core/src/components/identity/src/enums.rs +++ /dev/null @@ -1,12 +0,0 @@ -/* - * IpProtocols enum to reconstruct the packet protocol based on the - * IPV4 Header Protocol code - */ -#[cfg(feature="enums")] -#[derive(Debug)] -#[repr(u8)] -pub enum IpProtocols { - ICMP = 1, - TCP = 6, - UDP = 17, -} \ No newline at end of file diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index 7855edc4..50414bfd 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -1,191 +1,23 @@ -#![allow(warnings)] -use crate::enums::IpProtocols; -use crate::structs::{PacketLog, TcpPacketRegistry, VethLog}; -use anyhow::Error; -use aya::programs::tc::SchedClassifierLinkId; -use aya::{ - Bpf, - maps::{MapData, perf::PerfEventArrayBuffer}, - programs::{SchedClassifier, TcAttachType}, -}; -use bytes::BytesMut; -use k8s_openapi::api::core::v1::Pod; -use kube::api::ObjectList; -use kube::{Api, Client}; use nix::net::if_::if_nameindex; -use std::collections::HashMap; -use std::fs; use std::result::Result::Ok; -use std::sync::Mutex; -use std::{ - borrow::BorrowMut, - net::Ipv4Addr, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; -use tokio::time; -use tracing::{debug, error, info, warn}; - -/* - * TryFrom Trait implementation for IpProtocols enum - * This is used to reconstruct the packet protocol based on the - * IPV4 Header Protocol code - */ - -impl TryFrom for IpProtocols { - type Error = (); - fn try_from(proto: u8) -> Result { - match proto { - 1 => Ok(IpProtocols::ICMP), - 6 => Ok(IpProtocols::TCP), - 17 => Ok(IpProtocols::UDP), - _ => Err(()), - } - } -} - -/* helper functions to read and log net events in the container */ -pub async fn display_events>( - mut perf_buffers: Vec>, - running: Arc, - mut buffers: Vec, -) { - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let pl: PacketLog = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - let src = reverse_be_addr(pl.src_ip); - let dst = reverse_be_addr(pl.dst_ip); - let src_port = u16::from_be(pl.src_port); - let dst_port = u16::from_be(pl.dst_port); - let event_id = pl.pid; - - match IpProtocols::try_from(pl.proto) { - std::result::Result::Ok(proto) => { - info!( - "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", - event_id, proto, src, src_port, dst, dst_port - ); - } - Err(_) => { - info!( - "Event Id: {} Protocol: Unknown ({})", - event_id, pl.proto - ); - } - }; - } else { - warn!("Received packet data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} - -pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { - let mut octects = addr.to_be_bytes(); - let [a, b, c, d] = [octects[3], octects[2], octects[1], octects[0]]; - let reversed_ip = Ipv4Addr::new(a, b, c, d); - reversed_ip -} - -pub async fn display_veth_events>( - bpf: Arc>, - mut perf_buffers: Vec>, - running: Arc, - mut buffers: Vec, - mut link_ids: Arc>>, -) { - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let vethlog: VethLog = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - - let name_bytes = vethlog.name; - - let dev_addr_bytes = vethlog.dev_addr.to_vec(); - let name = std::str::from_utf8(&name_bytes); - let state = vethlog.state; - - let dev_addr = dev_addr_bytes; - let netns = vethlog.netns; - let mut event_type = String::new(); - match vethlog.event_type { - 1 => { - event_type = "creation".to_string(); - } - 2 => { - event_type = "deletion".to_string(); - } - _ => warn!("unknown event_type"), - } - match name { - std::result::Result::Ok(veth_name) => { - info!( - "[{}] Triggered action: register_netdevice event_type:{:?} Manipulated veth: {:?} state:{:?} dev_addr:{:?}", - netns, - event_type, - veth_name.trim_end_matches("\0").to_string(), - state, - dev_addr - ); - match attach_detach_veth( - bpf.clone(), - vethlog.event_type, - veth_name, - link_ids.clone(), - ) - .await - { - std::result::Result::Ok(_) => { - info!("Attach/Detach veth function attached correctly"); - } - Err(e) => error!( - "Error attaching Attach/Detach function. Error : {}", - e - ), - } - } - Err(_) => info!("Unknown name or corrupted field"), - } - } else { - warn!("Corrupted data"); - } - } - } - Err(e) => { - error!("Error reading veth events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} - +use tracing::info; + +// docs: +// This function checks if the given interface name is in the list of ignored interfaces +// Takes a interface name (iface) as &str and returns true if the interface should be ignored +// Typically we want to ignore eth0,docker0,tunl0,lo interfaces because they are not relevant for the internal monitoring +// +#[inline(always)] pub fn ignore_iface(iface: &str) -> bool { let ignored_interfaces = ["eth0", "docker0", "tunl0", "lo"]; ignored_interfaces.contains(&iface) } -//filter the interfaces,exclude docker0,eth0,lo interfaces +// docs: +// This function retrieves the list of veth interfaces on the system, filtering out ignored interfaces with +// the ignore_iface function. +// +#[inline(always)] pub fn get_veth_channels() -> Vec { //filter interfaces and save the output in the let mut interfaces: Vec = Vec::new(); @@ -204,408 +36,30 @@ pub fn get_veth_channels() -> Vec { interfaces } -async fn attach_detach_veth( - bpf: Arc>, - event_type: u8, - iface: &str, - link_ids: Arc>>, -) -> Result<(), anyhow::Error> { - info!( - "attach_detach_veth called: event_type={}, iface={}", - event_type, iface - ); - match event_type { - 1 => { - let mut bpf = bpf.lock().unwrap(); - let program: &mut SchedClassifier = bpf - .program_mut("identity_classifier") - .ok_or_else(|| anyhow::anyhow!("program 'identity_classifier' not found"))? - .try_into()?; - - let iface = iface.trim_end_matches('\0'); - - if ignore_iface(iface) { - info!("Skipping ignored interface: {}", iface); - return Ok(()); - } - - let mut link_ids = link_ids.lock().unwrap(); - match program.attach(iface, TcAttachType::Ingress) { - std::result::Result::Ok(link_id) => { - info!( - "Program 'identity_classifier' attached to interface {}", - iface - ); - link_ids.insert(iface.to_string(), link_id); - } - Err(e) => error!("Error attaching program to interface {}: {:?}", iface, e), - } - } - 2 => { - // INFO: Detaching occurs automatically when veth is deleted by kernel itself - let mut link_ids = link_ids.lock().unwrap(); - match link_ids.remove(iface) { - Some(_) => { - info!("Successfully detached program from interface {}", iface); - } - None => { - error!("Interface {} not found in link_ids", iface); - return Err(anyhow::anyhow!("Interface {} not found in link_ids", iface)); - } - } - } - _ => { - error!("Unknown event type: {}", event_type); - } - } - Ok(()) -} - -// CHECK THIS DIR: /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice -/* helper functions to display events from the TcpPacketRegistry structure */ -pub async fn display_tcp_registry_events>( - mut perf_buffers: Vec>, - running: Arc, - mut buffers: Vec, -) { - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let tcp_pl: TcpPacketRegistry = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - let src = reverse_be_addr(tcp_pl.src_ip); - let dst = reverse_be_addr(tcp_pl.dst_ip); - let src_port = u16::from_be(tcp_pl.src_port); - let dst_port = u16::from_be(tcp_pl.dst_port); - let event_id = tcp_pl.pid; - let command = tcp_pl.command.to_vec(); - let end = command - .iter() - .position(|&x| x == 0) - .unwrap_or(command.len()); - let command_str = String::from_utf8_lossy(&command[..end]).to_string(); - let cgroup_id = tcp_pl.cgroup_id; - - match IpProtocols::try_from(tcp_pl.proto) { - std::result::Result::Ok(proto) => { - info!( - "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{} Command: {} Cgroup_id: {}", - event_id, - proto, - src, - src_port, - dst, - dst_port, - command_str, - cgroup_id //proc_content - ); - } - Err(_) => { - info!( - "Event Id: {} Protocol: Unknown ({})", - event_id, tcp_pl.proto - ); - } - }; - } else { - warn!("Received packet data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} - -#[cfg(feature = "experimental")] -pub async fn scan_cgroup_paths(path: String) -> Result, Error> { - let mut cgroup_paths: Vec = Vec::new(); - let default_path = "/sys/fs/cgroup/kubepods.slice".to_string(); - - let target_path = if fs::metadata(&path).is_err() { - error!("Using default path: {}", &default_path); - default_path - } else { - path - }; - let entries = match fs::read_dir(&target_path) { - Ok(entries) => entries, - Err(e) => { - error!( - "Error reading cgroup directory {:?}: {}", - &target_path.clone(), - e - ); - return Ok(cgroup_paths); - } - }; - for entry in entries { - if let Ok(entry) = entry { - let path = entry.path(); - if path.is_dir() { - if let Some(path_str) = path.to_str() { - cgroup_paths.push(path_str.to_string()); - } - } - } - } - - Ok(cgroup_paths) -} - -#[cfg(feature = "experimental")] -struct ServiceIdentity { - uid: String, - container_id: String, -} - -#[cfg(feature = "experimental")] -pub async fn scan_cgroup_cronjob(time_delta: u64) -> Result<(), Error> { - let interval = std::time::Duration::from_secs(time_delta); - loop { - let scanned_paths = scan_cgroup_paths("/sys/fs/cgroup/kubelet.slice".to_string()) - .await - .expect("An error occured during the cgroup scan"); - //--> this should return : - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice - // /sys/fs/cgroup/kubelet.slice/kubelet.service - let mut scanned_subpaths = Vec::::new(); - for path in scanned_paths { - //info!("Scanned cgroup path: {}", path); - // scan the subgroups - let subpaths = scan_cgroup_paths(path.to_string()).await; - match subpaths { - Ok(paths) => { - for subpath in paths { - scanned_subpaths.push(subpath); - } - // ---> this should return the cgroups files and also : - // kubelet-kubepods-burstable.slice - // kubelet-kubepods-besteffort.slice - - // this directories needs to be scanned again to get further information about the pods - // for example: - // kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice - // where pod088f8704_24f0_4636_a8e2_13f75646f370 is the pod UID - } - Err(e) => { - error!("An error occured during the cgroup subpath scan: {}", e); - continue; - } - } - } - - let mut scanned_subpaths_v2 = Vec::::new(); - // second cgroup scan level to get the pod UIDs - for scanned_subpath in &scanned_subpaths { - let subpaths_v2 = scan_cgroup_paths(scanned_subpath.to_string()).await; - match subpaths_v2 { - Ok(paths) => { - for sub2 in paths { - info!("Debugging sub2: {}", &sub2); //return e.g. /sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podb8701d38_3791_422d_ad15_890ad1a0844b.slice/docker-f2e265659293676231ecb38fafccc97b1a42b75be192c32a602bc8ea579dc866.scope - scanned_subpaths_v2.push(sub2); - // this contains the addressed like this - //kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice - } - } - Err(e) => { - error!("An error occured during the cgroup subpath v2 scan: {}", e); - continue; - } - } - } - - let mut uids = Vec::::new(); - let mut identites = Vec::::new(); - - //read the subpaths to extract the pod uid - for subpath in scanned_subpaths_v2 { - let uid = extract_pod_uid(subpath.clone()) - .expect("An error occured during the extraction of pod UIDs"); - let container_id = extract_container_id(subpath.clone()) - .expect("An error occured during the extraction of the docker container id"); - debug!("Debugging extracted UID: {:?}", &uid); - // create a linked list for each service - let service_identity = ServiceIdentity { uid, container_id }; - identites.push(service_identity); //push the linked list in a vector of ServiceIdentity structure. Each struct contains the uid and the container id - } - - // get pod information from UID and store the info in an HashMqp for O(1) access - let service_map = get_pod_info().await?; - - //info!("Debugging Identites vector: {:?}", identites); - for service in identites { - let name = service_cache(service_map.clone(), service.uid.clone()); - let uid = service.uid; - let id = service.container_id; - info!( - "[Identity]: name: {:?} uid: {:?} docker container id {:?} ", - name, uid, id - ); - } - - info!( - "Cronjob completed a cgroup scan cycle. Next scan will be in {} seconds", - time_delta - ); - time::sleep(interval).await; - } -} -#[cfg(feature = "experimental")] -fn service_cache(service_map: HashMap, uid: String) -> String { - service_map.get(&uid).cloned().unwrap_or_else(|| { - error!("Service not found for uid: {}", uid); - "unknown".to_string() - }) -} -#[cfg(feature = "experimental")] -fn extract_container_id(cgroup_path: String) -> Result { - let splits: Vec<&str> = cgroup_path.split("/").collect(); - - let index = extract_target_from_splits(splits.clone(), "docker-")?; - let docker_id_split = splits[index] - .trim_start_matches("docker-") - .trim_end_matches(".scope"); - Ok(docker_id_split.to_string()) -} - -// IDEA: add cgroup docker process mapping in ServiceIdentity structure -#[cfg(feature = "experimental")] -fn extract_pod_uid(cgroup_path: String) -> Result { - // example of cgroup path: - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod93580201_87d5_44e6_9779_f6153ca17637.slice - // or - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-poddd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb.slice - - // split the path by "/" - let splits: Vec<&str> = cgroup_path.split("/").collect(); - debug!("Debugging splits: {:?}", &splits); - - let index = extract_target_from_splits(splits.clone(), "-pod")?; - - let pod_split = splits[index] - .trim_start_matches("kubelet-kubepods-besteffort-") - .trim_start_matches("kubelet-kubepods-burstable-") - .trim_start_matches("kubepods-besteffort-") - .trim_start_matches("kubepods-burstable-"); - - let uid_ = pod_split - .trim_start_matches("pod") - .trim_end_matches(".slice"); //return uids with underscore (_) [ex.dd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb] - - let uid = uid_.replace("_", "-"); - Ok(uid.to_string()) -} -#[cfg(feature = "experimental")] -fn extract_target_from_splits(splits: Vec<&str>, target: &str) -> Result { - for (index, split) in splits.iter().enumerate() { - // find the split that contains the word 'pod' - if split.contains(target) { - debug!("Target index; {}", index); - return Ok(index); - } - } - Err(Error::msg("'-pod' word not found in split")) -} - -/* unfortunately you cannot query the pods using the uids directly from ListParams */ -#[cfg(feature = "experimental")] -async fn query_all_pods() -> Result, Error> { - let client = Client::try_default() - .await - .expect("Cannot connect to kubernetes client"); - let pods: Api = Api::all(client); - let lp = kube::api::ListParams::default(); // default list params - let pod_list = pods - .list(&lp) - .await - .expect("An error occured during the pod list extraction"); - - Ok(pod_list) -} - -// fast pod caching system -#[cfg(feature = "experimental")] -async fn get_pod_info() -> Result, Error> { - let all_pods = query_all_pods().await?; - - let mut service_map = HashMap::::new(); - - for pod in all_pods { - if let (Some(name), Some(uid)) = (pod.metadata.name, pod.metadata.uid) { - service_map.insert(uid, name); - } - } // insert the pod name and uid from the KubeAPI - - Ok(service_map) -} - -#[cfg(feature = "experimental")] +#[cfg(test)] mod tests { - use tracing_subscriber::fmt::format; - - use crate::helpers::{extract_container_id, extract_pod_uid, extract_target_from_splits}; - - #[test] - fn extract_uid_from_string() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; - - let mut uid_vec = Vec::::new(); - - for cgroup_path in cgroup_paths { - let uid = extract_pod_uid(cgroup_path) - .map_err(|e| format!("An error occured {}", e)) - .unwrap(); - uid_vec.push(uid); - } - - let check = vec![ - "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), - "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), - ]; - - assert_eq!(uid_vec, check); - } - + use cortexbrain_common::buffer_type::VethLog; #[test] - fn test_extract_target_index() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; - - let mut index_vec = Vec::::new(); - for cgroup_path in cgroup_paths { - let splits: Vec<&str> = cgroup_path.split("/").collect(); - - let target_index = extract_target_from_splits(splits, "-pod").unwrap(); - index_vec.push(target_index); - } - let index_check = vec![6, 7]; - assert_eq!(index_vec, index_check); + fn check_veth_log_struct_mem() { + let mem_test = std::mem::size_of::(); + assert_eq!(mem_test, 39); } - #[test] - fn extract_docker_id() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string()]; - - let mut id_vec = Vec::::new(); - for cgroup_path in cgroup_paths { - let id = extract_container_id(cgroup_path).unwrap(); - id_vec.push(id); - } - let id_check = vec![ - "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), - "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), - ]; - assert_eq!(id_vec, id_check); + fn test_vethlog_buffer_len() { + let vethlog = VethLog { + name: [0; 16], + dev_addr: [0; 6], + state: 1, + netns: 123, + event_type: 1, + pid: 1, + }; + let buffer = unsafe { + std::slice::from_raw_parts( + (&vethlog as *const VethLog) as *const u8, + std::mem::size_of::(), + ) + }; + assert_eq!(buffer.len(), 39); } } diff --git a/core/src/components/identity/src/lib.rs b/core/src/components/identity/src/lib.rs index e3bb59e0..ceaedc26 100644 --- a/core/src/components/identity/src/lib.rs +++ b/core/src/components/identity/src/lib.rs @@ -1,4 +1,3 @@ pub mod helpers; -pub mod structs; -pub mod enums; -pub mod map_handlers; \ No newline at end of file +#[cfg(feature = "experimental")] +pub mod service_discovery; \ No newline at end of file diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 56887158..8d13e223 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -7,59 +7,50 @@ * 4. [Experimental]: cgroup scanner * */ -#![allow(warnings)] -mod enums; mod helpers; -mod map_handlers; -mod structs; +mod service_discovery; +use crate::helpers::get_veth_channels; use aya::{ Ebpf, - maps::{ - Map, MapData, - perf::{PerfEventArray, PerfEventArrayBuffer}, - }, - programs::{KProbe, SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, + maps::{Map, MapData}, + programs::{SchedClassifier, TcAttachType}, util::online_cpus, }; -use crate::helpers::{ - display_events, display_tcp_registry_events, display_veth_events, get_veth_channels, -}; - #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use crate::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; - -use bytes::BytesMut; +use cortexbrain_common::{ + buffer_type::{BufferSize, BufferType, read_perf_buffer}, + constants, logger, + map_handlers::BpfMapsData, + map_handlers::{init_bpf_maps, map_manager, map_pinner, populate_blocklist}, + program_handlers::load_program, +}; use std::{ convert::TryInto, path::Path, - sync::{ - Arc, Mutex, - atomic::{AtomicBool, Ordering}, - }, + sync::{Arc, Mutex}, }; -use anyhow::{Context, Ok}; -use cortexbrain_common::{constants, logger}; +use anyhow::{Context, Ok, anyhow}; + +//use std::collections::HashMap; use tokio::{fs, signal}; use tracing::{error, info}; -use std::collections::HashMap; - #[tokio::main] async fn main() -> Result<(), anyhow::Error> { - //init tracing subscriber - logger::init_default_logger(); + //init otlè tracing subscriber + let otlp_provider = logger::otlp_logger_init("identity_service-OTLP".to_string()); info!("Starting identity service..."); info!("fetching data"); // To Store link_ids they can be used to detach tc - let link_ids = Arc::new(Mutex::new(HashMap::::new())); + //let mut link_ids = HashMap::::new(); //init conntracker data path let bpf_path = @@ -72,14 +63,20 @@ async fn main() -> Result<(), anyhow::Error> { let bpf = Arc::new(Mutex::new(Ebpf::load(&data)?)); let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) .context("PIN_MAP_PATH environment variable required")?; - - match init_bpf_maps(bpf.clone()) { - std::result::Result::Ok(mut bpf_maps) => { + let map_data = vec![ + "events_map".to_string(), + "veth_identity_map".to_string(), + "TcpPacketRegistry".to_string(), + "Blocklist".to_string(), + "tracked_veth".to_string(), + ]; + match init_bpf_maps(bpf.clone(), map_data) { + std::result::Result::Ok(bpf_maps) => { info!("Successfully loaded bpf maps"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); info!("About to call map_pinner with path: {:?}", pin_path); - match map_pinner(&bpf_maps, &pin_path) { - std::result::Result::Ok(_) => { + match map_pinner(bpf_maps, &pin_path) { + std::result::Result::Ok(maps) => { info!("maps pinned successfully"); //load veth_trace program ref veth_trace.rs { @@ -91,12 +88,12 @@ async fn main() -> Result<(), anyhow::Error> { info!("Found interfaces: {:?}", interfaces); { - populate_blocklist(&mut bpf_maps.2).await; + populate_blocklist().await?; } { - init_tc_classifier(bpf.clone(), interfaces, link_ids.clone()).await.context( - "An error occured during the execution of attach_bpf_program function" + init_tc_classifier(bpf.clone(), interfaces).await.context( + "An error occured during the execution of attach_bpf_program function", )?; } { @@ -105,9 +102,9 @@ async fn main() -> Result<(), anyhow::Error> { )?; } - event_listener(bpf_maps, link_ids.clone(), bpf.clone()) - .await - .context("Error initializing event_listener")?; + event_listener(maps).await.map_err(|e| { + anyhow::anyhow!("Error inizializing event_listener. Reason: {}", e) + })?; } Err(e) => { error!("Error while pinning bpf_maps: {}", e); @@ -116,7 +113,8 @@ async fn main() -> Result<(), anyhow::Error> { } Err(e) => { error!("Error while loading bpf maps {}", e); - signal::ctrl_c(); + let _ = signal::ctrl_c().await; + let _ = otlp_provider.shutdown(); } } @@ -124,15 +122,17 @@ async fn main() -> Result<(), anyhow::Error> { } //attach the tc classifier program to a vector of interfaces +// TODO: consider to create a load schedule classifier in the common functions async fn init_tc_classifier( bpf: Arc>, ifaces: Vec, - link_ids: Arc>>, ) -> Result<(), anyhow::Error> { //this funtion initialize the tc classifier program info!("Loading programs"); - let mut bpf_new = bpf.lock().unwrap(); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; let program: &mut SchedClassifier = bpf_new .program_mut("identity_classifier") @@ -140,10 +140,33 @@ async fn init_tc_classifier( .try_into() .context("Failed to init SchedClassifier program")?; + // load classifier program + program .load() .context("Failed to load identity_classifier program")?; + // attach program only to desired interfaces. We can skip the dock0,tunl0,lo and eth0 interface + // we also save the interfaces to a BPF_HASH_MAP to easily monitor the interfaces using the agent + + // decleare link_ids HashMap which is a shared hashmap between kernel and userspace + // Link_ids hashmap has type of HashMap<[u8; 16], [u8; 8]>. The key is the program name and the value is the state + + // at this point the pinning is already successfull so we can invoque the maps from the pin + + let link_ids_mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .map_err(|e| anyhow!("Cannot return link_ids_mapdata. Reason: {}", e))?; + + let link_ids_map = Map::HashMap(link_ids_mapdata); + + let mut link_ids: aya::maps::HashMap = + aya::maps::HashMap::try_from(link_ids_map).map_err(|e| { + anyhow!( + "Cannot create link_ids HashMap from link_ids_map. Reason:{}", + e + ) + })?; + for interface in ifaces { match program.attach(&interface, TcAttachType::Ingress) { std::result::Result::Ok(link_id) => { @@ -151,8 +174,34 @@ async fn init_tc_classifier( "Program 'identity_classifier' attached to interface {}", interface ); - let mut map = link_ids.lock().unwrap(); - map.insert(interface.clone(), link_id); + let interface_bytes = interface.as_bytes(); + + let mut if_bytes = [0u8; 16]; + + // to set the len compare the interface_bytes.len() with the if_bytes.len() [16] and take the minimum + // if we have interface_bytes.len() < than 16 we set the len + let len = interface_bytes.len().min(if_bytes.len()); + + // now we can copy the bytes from the slice into the if_bytes variable + if_bytes[..len].copy_from_slice(&interface_bytes[..len]); + + // we compute the same process for the state_bytes + let mut state_bytes = [0u8; 8]; + let state = b"attached"; // prints "attached" as [u8;8] sequence of bytes + let state_len = state.len().min(state_bytes.len()); + state_bytes[..state_len].copy_from_slice(&state[..state_len]); + + match link_ids.insert(if_bytes, state_bytes, 0) { + std::result::Result::Ok(_) => { + info!("Veth interface {} added into map", &interface); + } + Err(e) => { + error!( + "Cannot add Veth interface {} into map. Reason: {}", + &interface, e + ); + } + } } Err(e) => error!( "Error attaching program to interface {}: {:?}", @@ -166,161 +215,101 @@ async fn init_tc_classifier( async fn init_veth_tracer(bpf: Arc>) -> Result<(), anyhow::Error> { //this functions init the veth_tracer used to make the InterfacesRegistry - - let mut bpf_new = bpf.lock().unwrap(); - //creation tracer - let veth_creation_tracer: &mut KProbe = bpf_new - .program_mut("veth_creation_trace") - .ok_or_else(|| anyhow::anyhow!("program 'veth_creation_trace' not found"))? - .try_into()?; - veth_creation_tracer.load()?; - - match veth_creation_tracer.attach("register_netdevice", 0) { - std::result::Result::Ok(_) => info!("veth_creation_tracer program attached successfully"), - Err(e) => error!("Error attaching veth_creation_tracer program {:?}", e), - } - //deletion tracer - let veth_deletion_tracer: &mut KProbe = bpf_new - .program_mut("veth_deletion_trace") - .ok_or_else(|| anyhow::anyhow!("program 'veth_deletion_trace' not found"))? - .try_into()?; - veth_deletion_tracer - .load() - .context("Failed to load deletetion_tracer program")?; + load_program(bpf.clone(), "veth_creation_trace", "register_netdevice")?; - match veth_deletion_tracer.attach("unregister_netdevice_queue", 0) { - std::result::Result::Ok(_) => info!("veth_deletion_trace program attached successfully"), - Err(e) => error!("Error attaching veth_deletetion_trace program {:?}", e), - } + //deletion tracer + load_program(bpf, "veth_deletion_trace", "unregister_netdevice_queue")?; Ok(()) } async fn init_tcp_registry(bpf: Arc>) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); - // init tcp registry - let tcp_analyzer: &mut KProbe = bpf_new - .program_mut("tcp_message_tracer") - .ok_or_else(|| anyhow::anyhow!("program 'tcp_message_tracer' not found"))? - .try_into()?; - tcp_analyzer - .load() - .context("Failed to load tcp_message_tracer")?; + // .clone() increments the reference count of the shared Ebpf instance. + load_program(bpf.clone(), "tcp_message_tracer_rcv", "tcp_v4_rcv")?; info!("initializing tcp tracing functions"); - match tcp_analyzer.attach("tcp_v4_rcv", 0) { - std::result::Result::Ok(_) => { - info!("tcp_message_tracer attached successfully to the tcp_v4_rcv function ") - } - Err(e) => error!( - "Error attaching tcp_message_tracer to the tcp_v4_rcv function. Error: {:?}", - e - ), - } - - match tcp_analyzer.attach("tcp_v4_connect", 0) { - std::result::Result::Ok(_) => { - info!("tcp_message_tracer attached successfully to the tcp_v4_connect function ") - } - Err(e) => error!( - "Error attaching tcp_message_tracer to the tcp_v4_connect function. Error: {:?}", - e - ), - } + load_program(bpf, "tcp_message_tracer_connect", "tcp_v4_connect")?; Ok(()) } -async fn event_listener( - bpf_maps: (Map, Map, Map, Map), - link_ids: Arc>>, - bpf: Arc>, -) -> Result<(), anyhow::Error> { - // this function init the event listener. Listens for veth events (creation/deletion) and network events (pod to pod communications) - /* Doc: - - perf_net_events_array: contains is associated with the network events stored in the events_map (EventsMap) - perf_veth_array: contains is associated with the network events stored in the veth_map (veth_identity_map) - - */ - +// this function init the event listener. Listens for veth events (creation/deletion) and network events (pod to pod communications) +// Doc: +// +// perf_net_events_array: contains is associated with the network events stored in the events_map (EventsMap) +// perf_veth_array: contains is associated with the network events stored in the veth_map (veth_identity_map) +// +// +async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Preparing perf_buffers and perf_arrays"); //TODO: try to change from PerfEventArray to a RingBuffer data structure - //let m0=bpf_maps[0]; - //let m1 = bpf_maps[1]; - //let mut ring1=RingBuf::try_from(m0)?; - //let mut ring2=RingBuf::try_from(m1)?; - - //TODO:create an helper function that initialize the data structures and the running - // init PerfEventArrays - let mut perf_veth_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.1)?; - let mut perf_net_events_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.0)?; - let mut tcp_registry_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.3)?; - // init PerfEventArrays buffers - let mut perf_veth_buffer: Vec> = Vec::new(); - let mut perf_net_events_buffer: Vec> = Vec::new(); - let mut tcp_registry_buffer: Vec> = Vec::new(); + let mut maps = map_manager(bpf_maps)?; - // fill the input buffers - - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let veth_buf: PerfEventArrayBuffer = perf_veth_array.open(cpu_id, None)?; - perf_veth_buffer.push(veth_buf); - } + // fill the input buffers with data from the PerfEventArrays for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let events_buf: PerfEventArrayBuffer = perf_net_events_array.open(cpu_id, None)?; - perf_net_events_buffer.push(events_buf); - } - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let tcp_registry_buf: PerfEventArrayBuffer = - tcp_registry_array.open(cpu_id, None)?; - tcp_registry_buffer.push(tcp_registry_buf); + for (name, (perf_evt_array, perf_evt_array_buffer)) in maps.iter_mut() { + let buf = perf_evt_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; + info!( + "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", + name, + cpu_id, + std::mem::size_of_val(&buf) + ); + perf_evt_array_buffer.push(buf); + } } info!("Listening for events..."); - // init runnings - let veth_running = Arc::new(AtomicBool::new(true)); - let net_events_running = Arc::new(AtomicBool::new(true)); - let tcp_registry_running = Arc::new(AtomicBool::new(true)); + // i need to use remove to move the values from the Map Manager to the the async tasks + let (perf_veth_array, perf_veth_buffers) = maps + .remove("veth_identity_map") + .expect("Cannot create perf_veth buffer"); + let (perf_net_events_array, perf_net_events_buffers) = maps + .remove("events_map") + .expect("Cannot create perf_net_events buffer"); + let (tcp_registry_array, tcp_registry_buffers) = maps + .remove("TcpPacketRegistry") + .expect("Cannot create tcp_registry buffer"); // init output buffers - let mut veth_buffers = vec![BytesMut::with_capacity(1024); 10]; - let mut events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - let mut tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - - // init running signals - let veth_running_signal = veth_running.clone(); - let net_events_running_signal = net_events_running.clone(); - let tcp_registry_running_signal = tcp_registry_running.clone(); - - let veth_link_ids = link_ids.clone(); + let veth_buffers = BufferSize::VethEvents.set_buffer(); + let events_buffers = BufferSize::ClassifierNetEvents.set_buffer(); + let tcp_buffers = BufferSize::TcpEvents.set_buffer(); + // spawn async tasks let veth_events_displayer = tokio::spawn(async move { - display_veth_events( - bpf.clone(), - perf_veth_buffer, - veth_running, - veth_buffers, - veth_link_ids, - ) - .await; + read_perf_buffer(perf_veth_buffers, veth_buffers, BufferType::VethLog).await; }); - // IDEA: Maybe we don't need to display all this events let net_events_displayer = tokio::spawn(async move { - display_events(perf_net_events_buffer, net_events_running, events_buffers).await; + read_perf_buffer( + perf_net_events_buffers, + events_buffers, + BufferType::PacketLog, + ) + .await; }); let tcp_registry_events_displayer: tokio::task::JoinHandle<()> = tokio::spawn(async move { - display_tcp_registry_events(tcp_registry_buffer, tcp_registry_running, tcp_buffers).await; + read_perf_buffer( + tcp_registry_buffers, + tcp_buffers, + BufferType::TcpPacketRegistry, + ) + .await; }); #[cfg(feature = "experimental")] @@ -330,12 +319,6 @@ async fn event_listener( #[cfg(not(feature = "experimental"))] tokio::select! { - /* result = scan_cgroup_cronjob=>{ - match result{ - Err(e)=>error!("scan_cgroup_cronjob panicked {:?}",e), - std::result::Result::Ok(_) => info!("cgroup scan cronjob exited"), - } - } */ result = veth_events_displayer=>{ match result{ Err(e)=>error!("veth_event_displayer panicked {:?}",e), @@ -359,9 +342,6 @@ async fn event_listener( _= signal::ctrl_c()=>{ info!("Triggered Exiting..."); - veth_running_signal.store(false, Ordering::SeqCst); - net_events_running_signal.store(false, Ordering::SeqCst); - tcp_registry_running_signal.store(false, Ordering::SeqCst); } } @@ -396,9 +376,6 @@ async fn event_listener( _= signal::ctrl_c()=>{ info!("Triggered Exiting..."); - veth_running_signal.store(false, Ordering::SeqCst); - net_events_running_signal.store(false, Ordering::SeqCst); - tcp_registry_running_signal.store(false, Ordering::SeqCst); } } diff --git a/core/src/components/identity/src/map_handlers.rs b/core/src/components/identity/src/map_handlers.rs deleted file mode 100644 index a225a470..00000000 --- a/core/src/components/identity/src/map_handlers.rs +++ /dev/null @@ -1,113 +0,0 @@ -use anyhow::Error; -use anyhow::Ok; -use aya::Ebpf; -use aya::maps::HashMap; -use aya::maps::Map; -use k8s_openapi::api::core::v1::ConfigMap; -use kube::{Api, Client}; -use std::net::Ipv4Addr; -use std::path::PathBuf; -use std::str::FromStr; -use std::sync::Arc; -use std::sync::Mutex; -use tracing::warn; -use tracing::{error, info}; - -pub fn init_bpf_maps(bpf: Arc>) -> Result<(Map, Map, Map, Map), anyhow::Error> { - // this function init the bpfs maps used in the main program - /* - index 0: events_map - index 1: veth_map - index 2: blocklist map - */ - let mut bpf_new = bpf.lock().unwrap(); - - let events_map = bpf_new - .take_map("EventsMap") - .ok_or_else(|| anyhow::anyhow!("EventsMap map not found"))?; - - let veth_map = bpf_new - .take_map("veth_identity_map") - .ok_or_else(|| anyhow::anyhow!("veth_identity_map map not found"))?; - - let blocklist_map = bpf_new - .take_map("Blocklist") - .ok_or_else(|| anyhow::anyhow!("Blocklist map not found"))?; - - let tcp_registry_map = bpf_new - .take_map("TcpPacketRegistry") - .ok_or_else(|| anyhow::anyhow!("TcpPacketRegistry map not found"))?; - - Ok((events_map, veth_map, blocklist_map, tcp_registry_map)) -} - -//TODO: save bpf maps path in the cli metadata -//takes an array of bpf maps and pin them to persiste session data -//TODO: change maps type with a Vec instead of (Map,Map). This method is only for fast development and it's not optimized -//TODO: add bpf mounts during cli installation -pub fn map_pinner(maps: &(Map, Map, Map, Map), path: &PathBuf) -> Result<(), Error> { - if !path.exists() { - info!("Pin path {:?} does not exist. Creating it...", path); - std::fs::create_dir_all(&path)?; - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755))?; - } - } - - let configs = [ - (&maps.0, "events_map"), - (&maps.1, "veth_map"), - (&maps.2, "blocklist_map"), - (&maps.3, "tcp_packet_registry"), - ]; - - for (name, paths) in configs { - let map_path = path.join(paths); - if map_path.exists() { - warn!("Path {} already exists", paths); - warn!("Removing path {}", paths); - let _ = std::fs::remove_file(&map_path); - } - info!("Trying to pin map {:?} in map path: {:?}", name, &map_path); - name.pin(&map_path)?; - } - - Ok(()) -} -pub async fn populate_blocklist(map: &mut Map) -> Result<(), Error> { - let client = Client::try_default().await.unwrap(); - let namespace = "cortexflow"; - let configmap = "cortexbrain-client-config"; - - let mut blocklist_map = HashMap::<_, [u8; 4], [u8; 4]>::try_from(map)?; - - let api: Api = Api::namespaced(client, namespace); - match api.get(configmap).await { - std::result::Result::Ok(configs) => { - info!("Configmap : {} loaded correctly ", configmap); - info!("[CONFIGMAP]: {:?} ", configs); - if let Some(data) = configs.data { - if let Some(blocklist) = data.get("blocklist") { - let addresses: Vec = blocklist - .lines() - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); - //String parsing from "x y" to ["x","y"] - info!("Inserting addresses: {:?}", addresses); - for item in addresses { - let addr = Ipv4Addr::from_str(&item)?.octets(); - let _ = blocklist_map.insert(addr, addr, 0); - } - } - } - Ok(()) - } - std::result::Result::Err(e) => { - error!("An error occured while reading configmap: {}", e); - return Err(e.into()); - } - } -} diff --git a/core/src/components/identity/src/mod.rs b/core/src/components/identity/src/mod.rs index e3bb59e0..ceaedc26 100644 --- a/core/src/components/identity/src/mod.rs +++ b/core/src/components/identity/src/mod.rs @@ -1,4 +1,3 @@ pub mod helpers; -pub mod structs; -pub mod enums; -pub mod map_handlers; \ No newline at end of file +#[cfg(feature = "experimental")] +pub mod service_discovery; \ No newline at end of file diff --git a/core/src/components/identity/src/service_discovery.rs b/core/src/components/identity/src/service_discovery.rs new file mode 100644 index 00000000..bc43f3d7 --- /dev/null +++ b/core/src/components/identity/src/service_discovery.rs @@ -0,0 +1,297 @@ +#[cfg(feature = "experimental")] +use anyhow::Error; +#[cfg(feature = "experimental")] +use k8s_openapi::api::core::v1::Pod; +#[cfg(feature = "experimental")] +use kube::api::ObjectList; +#[cfg(feature = "experimental")] +use kube::{Api, Client}; +#[cfg(feature = "experimental")] +use std::fs; +#[cfg(feature = "experimental")] +use tokio::time; + +#[cfg(feature = "experimental")] +pub async fn scan_cgroup_paths(path: String) -> Result, Error> { + let mut cgroup_paths: Vec = Vec::new(); + let default_path = "/sys/fs/cgroup/kubepods.slice".to_string(); + + let target_path = if fs::metadata(&path).is_err() { + error!("Using default path: {}", &default_path); + default_path + } else { + path + }; + let entries = match fs::read_dir(&target_path) { + Ok(entries) => entries, + Err(e) => { + error!( + "Error reading cgroup directory {:?}: {}", + &target_path.clone(), + e + ); + return Ok(cgroup_paths); + } + }; + for entry in entries { + if let Ok(entry) = entry { + let path = entry.path(); + if path.is_dir() { + if let Some(path_str) = path.to_str() { + cgroup_paths.push(path_str.to_string()); + } + } + } + } + + Ok(cgroup_paths) +} + +#[cfg(feature = "experimental")] +struct ServiceIdentity { + uid: String, + container_id: String, +} + +#[cfg(feature = "experimental")] +pub async fn scan_cgroup_cronjob(time_delta: u64) -> Result<(), Error> { + let interval = std::time::Duration::from_secs(time_delta); + loop { + let scanned_paths = scan_cgroup_paths("/sys/fs/cgroup/kubelet.slice".to_string()) + .await + .expect("An error occured during the cgroup scan"); + //--> this should return : + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice + // /sys/fs/cgroup/kubelet.slice/kubelet.service + let mut scanned_subpaths = Vec::::new(); + for path in scanned_paths { + //info!("Scanned cgroup path: {}", path); + // scan the subgroups + let subpaths = scan_cgroup_paths(path.to_string()).await; + match subpaths { + Ok(paths) => { + for subpath in paths { + scanned_subpaths.push(subpath); + } + // ---> this should return the cgroups files and also : + // kubelet-kubepods-burstable.slice + // kubelet-kubepods-besteffort.slice + + // this directories needs to be scanned again to get further information about the pods + // for example: + // kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice + // where pod088f8704_24f0_4636_a8e2_13f75646f370 is the pod UID + } + Err(e) => { + error!("An error occured during the cgroup subpath scan: {}", e); + continue; + } + } + } + + let mut scanned_subpaths_v2 = Vec::::new(); + // second cgroup scan level to get the pod UIDs + for scanned_subpath in &scanned_subpaths { + let subpaths_v2 = scan_cgroup_paths(scanned_subpath.to_string()).await; + match subpaths_v2 { + Ok(paths) => { + for sub2 in paths { + info!("Debugging sub2: {}", &sub2); //return e.g. /sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podb8701d38_3791_422d_ad15_890ad1a0844b.slice/docker-f2e265659293676231ecb38fafccc97b1a42b75be192c32a602bc8ea579dc866.scope + scanned_subpaths_v2.push(sub2); + // this contains the addressed like this + //kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice + } + } + Err(e) => { + error!("An error occured during the cgroup subpath v2 scan: {}", e); + continue; + } + } + } + + let mut uids = Vec::::new(); + let mut identites = Vec::::new(); + + //read the subpaths to extract the pod uid + for subpath in scanned_subpaths_v2 { + let uid = extract_pod_uid(subpath.clone()) + .expect("An error occured during the extraction of pod UIDs"); + let container_id = extract_container_id(subpath.clone()) + .expect("An error occured during the extraction of the docker container id"); + debug!("Debugging extracted UID: {:?}", &uid); + // create a linked list for each service + let service_identity = ServiceIdentity { uid, container_id }; + identites.push(service_identity); //push the linked list in a vector of ServiceIdentity structure. Each struct contains the uid and the container id + } + + // get pod information from UID and store the info in an HashMqp for O(1) access + let service_map = get_pod_info().await?; + + //info!("Debugging Identites vector: {:?}", identites); + for service in identites { + let name = service_cache(service_map.clone(), service.uid.clone()); + let uid = service.uid; + let id = service.container_id; + info!( + "[Identity]: name: {:?} uid: {:?} docker container id {:?} ", + name, uid, id + ); + } + + info!( + "Cronjob completed a cgroup scan cycle. Next scan will be in {} seconds", + time_delta + ); + time::sleep(interval).await; + } +} +#[cfg(feature = "experimental")] +fn service_cache(service_map: HashMap, uid: String) -> String { + service_map.get(&uid).cloned().unwrap_or_else(|| { + error!("Service not found for uid: {}", uid); + "unknown".to_string() + }) +} +#[cfg(feature = "experimental")] +fn extract_container_id(cgroup_path: String) -> Result { + let splits: Vec<&str> = cgroup_path.split("/").collect(); + + let index = extract_target_from_splits(splits.clone(), "docker-")?; + let docker_id_split = splits[index] + .trim_start_matches("docker-") + .trim_end_matches(".scope"); + Ok(docker_id_split.to_string()) +} + +// IDEA: add cgroup docker process mapping in ServiceIdentity structure +#[cfg(feature = "experimental")] +fn extract_pod_uid(cgroup_path: String) -> Result { + // example of cgroup path: + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod93580201_87d5_44e6_9779_f6153ca17637.slice + // or + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-poddd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb.slice + + // split the path by "/" + let splits: Vec<&str> = cgroup_path.split("/").collect(); + debug!("Debugging splits: {:?}", &splits); + + let index = extract_target_from_splits(splits.clone(), "-pod")?; + + let pod_split = splits[index] + .trim_start_matches("kubelet-kubepods-besteffort-") + .trim_start_matches("kubelet-kubepods-burstable-") + .trim_start_matches("kubepods-besteffort-") + .trim_start_matches("kubepods-burstable-"); + + let uid_ = pod_split + .trim_start_matches("pod") + .trim_end_matches(".slice"); //return uids with underscore (_) [ex.dd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb] + + let uid = uid_.replace("_", "-"); + Ok(uid.to_string()) +} +#[cfg(feature = "experimental")] +fn extract_target_from_splits(splits: Vec<&str>, target: &str) -> Result { + for (index, split) in splits.iter().enumerate() { + // find the split that contains the word 'pod' + if split.contains(target) { + debug!("Target index; {}", index); + return Ok(index); + } + } + Err(Error::msg("'-pod' word not found in split")) +} + +/* unfortunately you cannot query the pods using the uids directly from ListParams */ +#[cfg(feature = "experimental")] +async fn query_all_pods() -> Result, Error> { + let client = Client::try_default() + .await + .expect("Cannot connect to kubernetes client"); + let pods: Api = Api::all(client); + let lp = kube::api::ListParams::default(); // default list params + let pod_list = pods + .list(&lp) + .await + .expect("An error occured during the pod list extraction"); + + Ok(pod_list) +} + +// fast pod caching system +#[cfg(feature = "experimental")] +async fn get_pod_info() -> Result, Error> { + let all_pods = query_all_pods().await?; + + let mut service_map = HashMap::::new(); + + for pod in all_pods { + if let (Some(name), Some(uid)) = (pod.metadata.name, pod.metadata.uid) { + service_map.insert(uid, name); + } + } // insert the pod name and uid from the KubeAPI + + Ok(service_map) +} + +#[cfg(feature = "experimental")] +mod tests { + use tracing_subscriber::fmt::format; + + use crate::helpers::{extract_container_id, extract_pod_uid, extract_target_from_splits}; + + #[test] + fn extract_uid_from_string() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; + + let mut uid_vec = Vec::::new(); + + for cgroup_path in cgroup_paths { + let uid = extract_pod_uid(cgroup_path) + .map_err(|e| format!("An error occured {}", e)) + .unwrap(); + uid_vec.push(uid); + } + + let check = vec![ + "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), + "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), + ]; + + assert_eq!(uid_vec, check); + } + + #[test] + fn test_extract_target_index() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; + + let mut index_vec = Vec::::new(); + for cgroup_path in cgroup_paths { + let splits: Vec<&str> = cgroup_path.split("/").collect(); + + let target_index = extract_target_from_splits(splits, "-pod").unwrap(); + index_vec.push(target_index); + } + let index_check = vec![6, 7]; + assert_eq!(index_vec, index_check); + } + + #[test] + fn extract_docker_id() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string()]; + + let mut id_vec = Vec::::new(); + for cgroup_path in cgroup_paths { + let id = extract_container_id(cgroup_path).unwrap(); + id_vec.push(id); + } + let id_check = vec![ + "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), + "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), + ]; + assert_eq!(id_vec, id_check); + } +} diff --git a/core/src/components/identity/src/structs.rs b/core/src/components/identity/src/structs.rs deleted file mode 100644 index d8cff939..00000000 --- a/core/src/components/identity/src/structs.rs +++ /dev/null @@ -1,56 +0,0 @@ -use bytemuck_derive::Zeroable; - -/* - * Structure PacketLog - * This structure is used to store the packet information - */ -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct PacketLog { - pub proto: u8, - pub src_ip: u32, - pub src_port: u16, - pub dst_ip: u32, - pub dst_port: u16, - pub pid: u32, -} -unsafe impl aya::Pod for PacketLog {} - -/* - * Connection Array that contains the hash_id associated with an active connection - */ -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct ConnArray { - pub src_ip: u32, - pub dst_ip: u32, - pub src_port: u16, - pub dst_port: u16, - pub proto: u8, -} - -unsafe impl aya::Pod for ConnArray {} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct VethLog { - pub name: [u8; 16], - pub state: u64, - pub dev_addr: [u32; 8], - pub event_type: u8, - pub netns: u32, - pub pid: u32, -} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TcpPacketRegistry{ - pub proto: u8, - pub src_ip: u32, - pub dst_ip: u32, - pub src_port: u16, - pub dst_port: u16, - pub pid: u32, - pub command: [u8;16], - pub cgroup_id: u64, -} \ No newline at end of file diff --git a/core/src/components/metrics/Cargo.toml b/core/src/components/metrics/Cargo.toml index 112872e8..1c7d420a 100644 --- a/core/src/components/metrics/Cargo.toml +++ b/core/src/components/metrics/Cargo.toml @@ -7,11 +7,27 @@ edition = "2024" aya = "0.13.1" aya-log = "0.2.1" bytes = "1.4" -tokio = { version = "1.48.0", features = ["rt","macros","time","fs","signal","rt-multi-thread"] } +tokio = { version = "1.48.0", features = [ + "rt", + "macros", + "time", + "fs", + "signal", + "rt-multi-thread", +] } anyhow = "1.0" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } libc = "0.2.172" bytemuck = "1.23.0" -cortexbrain-common = { path = "../../../common" } -nix ={version="0.30.1",features=["net"]} +cortexbrain-common = { path = "../../../common/", features = [ + "map-handlers", + "program-handlers", + "buffer-reader", + "monitoring-structs", + "network-structs" +] } +nix = { version = "0.30.1", features = ["net"] } +opentelemetry = "0.32.0" +opentelemetry_sdk = "0.32.0" +opentelemetry-otlp = { version = "0.32.0", features = ["grpc-tonic"] } diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index 1b4628e4..804e9306 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,179 +1,124 @@ -use aya::{maps::{ - perf::PerfEventArrayBuffer, Map, MapData, PerfEventArray - }, util::online_cpus}; - -use bytes::BytesMut; +use anyhow::anyhow; +use aya::util::online_cpus; +use cortexbrain_common::map_handlers::map_manager; +use cortexbrain_common::{buffer_type::BufferSize, map_handlers::BpfMapsData}; +use opentelemetry::metrics::Meter; +use std::sync::Arc; use tokio::signal; -use std::{ - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; - use tracing::{error, info}; -use crate::structs::NetworkMetrics; -use crate::structs::TimeStampMetrics; - -pub async fn display_metrics_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting metrics event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} metric events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let net_metrics: NetworkMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let tgid = net_metrics.tgid; - let comm = String::from_utf8_lossy(&net_metrics.comm); - let ts_us = net_metrics.ts_us; - let sk_drop_count = net_metrics.sk_drops; - let sk_err = net_metrics.sk_err; - let sk_err_soft = net_metrics.sk_err_soft; - let sk_backlog_len = net_metrics.sk_backlog_len; - let sk_write_memory_queued = net_metrics.sk_write_memory_queued; - let sk_ack_backlog = net_metrics.sk_ack_backlog; - let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; - info!( - "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", - tgid, comm, ts_us, sk_drop_count, sk_err, sk_err_soft, sk_backlog_len, sk_write_memory_queued, sk_ack_backlog, sk_receive_buffer_size - ); - } else { - info!("Received data too small: {} bytes, expected: {}", data.len(), std::mem::size_of::()); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - info!("Metrics event listener stopped"); -} +use cortexbrain_common::buffer_type::{BufferType, read_perf_buffer}; +use cortexbrain_common::otel_metrics::Metrics; -pub async fn display_time_stamp_events_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting timestamp event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} timestamp events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let time_stamp_event: TimeStampMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let delta_us = time_stamp_event.delta_us; - let ts_us = time_stamp_event.ts_us; - let tgid = time_stamp_event.tgid; - let comm = String::from_utf8_lossy(&time_stamp_event.comm); - let lport = time_stamp_event.lport; - let dport_be = time_stamp_event.dport_be; - let af = time_stamp_event.af; - info!( - "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", - delta_us, ts_us, tgid, comm, lport, dport_be, af - ); - } else { - info!("Received timestamp data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading timestamp events: {:?}", e); - } - } +/// Listen for eBPF perf-buffer events and record OpenTelemetry metrics. +/// +/// This function bridges the eBPF perf-buffer layer with the OpenTelemetry +/// metrics pipeline. It opens per-CPU buffers for the two maps of interest +/// (`net_metrics` and `time_stamp_events`), spawns asynchronous consumers, +/// and parks until a `Ctrl-C` signal is received or one of the consumers +/// terminates. +/// +/// # Arguments +/// +/// -`bpf_maps` – handles for the pinned BPF maps produced by +/// [`cortexbrain_common::map_handlers::map_pinner`]. +/// - `meter` – an initialised OpenTelemetry [`Meter`]. +/// +/// # Errors +/// +/// Returns `Err` if the map manager or CPU enumeration fails. +/// +pub async fn event_listener(bpf_maps: BpfMapsData, meter: Meter) -> Result<(), anyhow::Error> { + info!("Getting CPU count..."); + + let mut maps = map_manager(bpf_maps)?; + + let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; + + for cpu_id in cpu_count { + for (name, (perf_event_array, perf_event_buffer)) in maps.iter_mut() { + let buf = perf_event_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; + info!( + "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", + name, + cpu_id, + std::mem::size_of_val(&buf) + ); + perf_event_buffer.push(buf); } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; } - info!("Timestamp event listener stopped"); -} -pub async fn event_listener(bpf_maps: (Map, Map)) -> Result<(), anyhow::Error> { - info!("Getting CPU count..."); - let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?.len(); - info!("CPU count: {}", cpu_count); - - info!("Creating perf buffers..."); - let mut net_perf_buffer: Vec> = Vec::new(); - let mut net_perf_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.0)?; - let mut time_stamp_events_perf_buffer: Vec> = Vec::new(); - let mut time_stamp_events_perf_array: PerfEventArray = - PerfEventArray::try_from(bpf_maps.1)?; - - info!("Opening perf buffers for {} CPUs...", cpu_count); - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let buf: PerfEventArrayBuffer = net_perf_array.open(cpu_id, None)?; - net_perf_buffer.push(buf); - } - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let buf: PerfEventArrayBuffer = time_stamp_events_perf_array.open(cpu_id, None)?; - time_stamp_events_perf_buffer.push(buf); - } info!("Perf buffers created successfully"); - // Create shared running flags - let net_metrics_running = Arc::new(AtomicBool::new(true)); - let time_stamp_events_running = Arc::new(AtomicBool::new(true)); - - // Create proper sized buffers - let net_metrics_buffers = vec![BytesMut::with_capacity(1024); cpu_count]; - let time_stamp_events_buffers = vec![BytesMut::with_capacity(1024); cpu_count]; - - // Clone for the signal handler - let net_metrics_running_signal = net_metrics_running.clone(); - let time_stamp_events_running_signal = time_stamp_events_running.clone(); - + let (_time_stamp_events_array, time_stamp_events_perf_buffer) = maps + .remove("time_stamp_events") + .expect("Cannot create time_stamp_events_buffer"); + let (_net_perf_array, net_perf_buffer) = maps + .remove("net_metrics") + .expect("Cannot create net_perf_buffer"); + + // Allocate byte-buffers sized for each structure type + let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); + + let metrics = Arc::new(Metrics::new(&meter)); + info!("Starting event listener tasks..."); - let metrics_map_displayer = tokio::spawn(async move { - display_metrics_map(net_perf_buffer, net_metrics_running, net_metrics_buffers).await; - }); - let time_stamp_events_displayer = tokio::spawn(async move { - display_time_stamp_events_map(time_stamp_events_perf_buffer, time_stamp_events_running, time_stamp_events_buffers).await - }); + let net_metrics_handle = { + let metrics = Arc::clone(&metrics); + let mut array_buffers = net_perf_buffer; + let mut buffers = net_metrics_buffers; + tokio::spawn(async move { + read_perf_buffer( + array_buffers, + buffers, + BufferType::NetworkMetrics, + Some(metrics), + ) + .await; + }) + }; + + let time_stamp_handle = { + let metrics = Arc::clone(&metrics); + let mut array_buffers = time_stamp_events_perf_buffer; + let mut buffers = time_stamp_events_buffers; + tokio::spawn(async move { + read_perf_buffer( + array_buffers, + buffers, + BufferType::TimeStampMetrics, + Some(metrics), + ) + .await; + }) + }; info!("Event listeners started, entering main loop..."); tokio::select! { - result = metrics_map_displayer => { + result = net_metrics_handle => { if let Err(e) = result { - error!("Metrics map displayer task failed: {:?}", e); + error!("Network metrics task failed: {:?}", e); } } - result = time_stamp_events_displayer => { + result = time_stamp_handle => { if let Err(e) = result { - error!("Time stamp events displayer task failed: {:?}", e); + error!("Timestamp events task failed: {:?}", e); } } _ = signal::ctrl_c() => { info!("Ctrl-C received, shutting down..."); - // Stop the event loops - net_metrics_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); - time_stamp_events_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); } } - // return success Ok(()) -} \ No newline at end of file +} diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index 6b22a865..0211be68 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,86 +1,106 @@ -use aya::{ - Ebpf -}; - +//! CortexBrain metrics service – eBPF-based telemetry with OpenTelemetry export. +//! +//! This binary is the node-level metrics agent for CortexBrain. It: +//! +//! 1. Initialises an OpenTelemetry metrics pipeline (OTLP / gRPC). +//! 2. Loads a compiled eBPF object and pins its maps to the BPF filesystem. +//! 3. Attaches a set of kernel kprobe programs. +//! 4. Starts asynchronous consumers that read per-CPU perf buffers and +//! emit OpenTelemetry instruments for every event. +//! 5. Blocks until `Ctrl-C` is received, then shuts down cleanly. + +use anyhow::Context; +use aya::Ebpf; use std::{ env, fs, path::Path, - sync::{ - Arc, Mutex, - }, + sync::{Arc, Mutex}, }; - -use anyhow::{Context, Ok}; use tracing::{error, info}; -use cortexbrain_common::{constants, logger}; - mod helpers; -use crate::{helpers::event_listener, maps_handlers::map_pinner, program_handlers::load_and_attach_tcp_programs}; - -mod maps_handlers; -use crate::maps_handlers::init_ebpf_maps; - -mod program_handlers; -use crate::program_handlers::load_program; - -mod structs; +mod otel_init; +use crate::helpers::event_listener; +use crate::otel_init::{init_opentelemetry, shutdown_opentelemetry}; + +use cortexbrain_common::{ + constants, + logger::otlp_logger_init, + map_handlers::{init_bpf_maps, map_pinner}, + program_handlers::load_program, +}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { - //init tracing subscriber - logger::init_default_logger(); + let _otlp_log_provider = otlp_logger_init("metrics-service".to_string()); info!("Starting metrics service..."); info!("fetching data"); - let bpf_path = env::var(constants::BPF_PATH).context("BPF_PATH environment variable required")?; + let meter = + init_opentelemetry().context("Failed to initialise OpenTelemetry metrics pipeline")?; + + let bpf_path = + env::var(constants::BPF_PATH).context("BPF_PATH environment variable required")?; let data = fs::read(Path::new(&bpf_path)).context("Failed to load file from path")?; let bpf = Arc::new(Mutex::new(Ebpf::load(&data)?)); let tcp_bpf = bpf.clone(); let tcp_rev_bpf = bpf.clone(); + let tcp_v6_bpf = bpf.clone(); info!("Running Ebpf logger"); info!("loading programs"); + let bpf_map_save_path = - std::env::var(constants::PIN_MAP_PATH).context("PIN_MAP_PATH environment variable required")?; + env::var(constants::PIN_MAP_PATH).context("PIN_MAP_PATH environment variable required")?; - match init_ebpf_maps(bpf.clone()) { - std::result::Result::Ok(maps) => { + let map_data = vec!["time_stamp_events".to_string(), "net_metrics".to_string()]; + + match init_bpf_maps(bpf.clone(), map_data) { + Ok(bpf_maps) => { info!("BPF maps loaded successfully"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); info!("About to call map_pinner with path: {:?}", pin_path); - match map_pinner(&maps, &pin_path).await { - std::result::Result::Ok(_) => { + + match map_pinner(bpf_maps, &pin_path) { + Ok(maps) => { info!("BPF maps pinned successfully to {}", bpf_map_save_path); { load_program(bpf.clone(), "metrics_tracer", "tcp_identify_packet_loss") - .context("An error occured during the execution of load_program function")?; - } - - { - load_and_attach_tcp_programs(tcp_bpf.clone()) - .context("An error occured during the execution of load_and_attach_tcp_programs function")?; + .context( + "An error occurred during the execution of load_program function", + )?; + + load_program(tcp_bpf, "tcp_v4_connect", "tcp_v4_connect") + .context("An error occurred during the execution of load_and_attach_tcp_programs function")?; + + load_program(tcp_v6_bpf, "tcp_v6_connect", "tcp_v6_connect") + .context("An error occurred during the execution of load_and_attach_tcp_programs function")?; + + load_program( + tcp_rev_bpf, + "tcp_rcv_state_process", + "tcp_rcv_state_process", + ) + .context( + "An error occurred during the execution of load_program function", + )?; } - { - load_program(tcp_rev_bpf.clone(), "tcp_rcv_state_process", "tcp_rcv_state_process") - .context("An error occured during the execution of load_program function")?; - } - - event_listener(maps).await?; + // Hand off to the async event consumer + event_listener(maps, meter).await } Err(e) => { error!("Error pinning BPF maps: {:?}", e); - return Err(e); + shutdown_opentelemetry(); + Err(e) } } } Err(e) => { error!("Error initializing BPF maps: {:?}", e); - return Err(e); + shutdown_opentelemetry(); + Err(e) } } - - Ok(()) -} \ No newline at end of file +} diff --git a/core/src/components/metrics/src/maps_handlers.rs b/core/src/components/metrics/src/maps_handlers.rs deleted file mode 100644 index 12c3d0a2..00000000 --- a/core/src/components/metrics/src/maps_handlers.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::{path::PathBuf, sync::{Arc, Mutex}}; -use tokio::fs; -use anyhow::Error; -use aya::{maps::Map, Ebpf}; -use tracing::info; - - - -pub fn init_ebpf_maps(bpf: Arc>) -> Result<(Map, Map), anyhow::Error> { - // this function init the bpfs maps used in the main program - /* - index 0: net_metrics - index 1: time_stamp_events - */ - let mut bpf_new = bpf.lock().unwrap(); - - let net_metrics_map = bpf_new - .take_map("net_metrics") - .ok_or_else(|| anyhow::anyhow!("net_metrics map not found"))?; - - let time_stamps_events_map = bpf_new - .take_map("time_stamp_events") - .ok_or_else(|| anyhow::anyhow!("time_stamp_events map not found"))?; - - Ok((net_metrics_map, time_stamps_events_map)) -} - -pub async fn map_pinner(maps: &(Map, Map), path: &PathBuf) -> Result<(), Error> { - // check if the map exists - if !path.exists() { - info!("Pin path {:?} does not exist. Creating it...", path); - fs::create_dir_all(&path).await?; - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).await?; - } - } - - let map1_path = path.join("net_metrics"); - let map2_path = path.join("time_stamp_events"); - - // maps pinning - maps.0.pin(&map1_path)?; - maps.1.pin(&map2_path)?; - - Ok(()) -} diff --git a/core/src/components/metrics/src/mod.rs b/core/src/components/metrics/src/mod.rs index 8c4a839a..c5e28062 100644 --- a/core/src/components/metrics/src/mod.rs +++ b/core/src/components/metrics/src/mod.rs @@ -1,5 +1,2 @@ -mod structs; -mod enums; -mod map_handlers; mod helpers; -mod program_handlers; \ No newline at end of file +mod otel_init; diff --git a/core/src/components/metrics/src/otel_init.rs b/core/src/components/metrics/src/otel_init.rs new file mode 100644 index 00000000..e472c7ec --- /dev/null +++ b/core/src/components/metrics/src/otel_init.rs @@ -0,0 +1,120 @@ +//! docs +//! This module configures and bootstraps the OpenTelemetry SDK (OTel SDK) +//! within the `metrics` binary. Its goal is to expose a [`Meter`] --- the +//! primary entry-point for creating counters, gauges and histograms --- +//! backed by an **OTLP/gRPC** metric exporter. +//! +//! # Relationship to the rest of the crate +//! +//! `otel_init::init_opentelemetry()` is invoked **once** in [`main`], before +//! any eBPF program is loaded. The returned [`Meter`] is then passed through +//! the call chain into [`event_listener`](crate::helpers::event_listener) +//! where it is used by the async tasks that read eBPF perf-buffers. See +//! [`crate::helpers`] for the consumption side. +//! +//! When the application exits (either because `Ctrl-C` was received or because +//! an error bubbled up), [`shutdown_opentelemetry`] is called. This flushes +//! every remaining aggregated metric to the OTLP collector before the process +//! terminates. +//! + +use opentelemetry::global; +use opentelemetry::metrics::{Meter, MeterProvider}; +use opentelemetry_otlp::{MetricExporter, WithExportConfig}; +use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; +use std::env; +use std::sync::OnceLock; +use std::time::Duration; + +/// Environment variable that holds the OTLP collector endpoint. +/// +/// Expected format: `"http://collector:4317"` (gRPC transport). +/// +pub const OTEL_EXPORTER_OTLP_ENDPOINT: &str = "OTEL_EXPORTER_OTLP_ENDPOINT"; + +/// Default OTLP endpoint used when [`OTEL_EXPORTER_OTLP_ENDPOINT`] is not +/// present in the environment. +/// +/// Points to a locally-running OpenTelemetry Collector on the standard +/// **gRPC** port `4317`. Note that OTLP over HTTP typically uses `4318` --- +/// make sure your Collector is actually listening for **gRPC** traffic on the +/// port you configure. +pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; + +/// Singleton that owns the concrete `SdkMeterProvider` instance. +/// OnceLock guarantees single initialisation, we avoid accidentally creating two providers (and +/// two background export tasks) if `init_opentelemetry()` were ever called +/// twice. +/// +/// # Thread safety +/// +/// `OnceLock` is `Sync`, so the static can be read safely from any thread +/// or Tokio task once populated. +static METER_PROVIDER: OnceLock = OnceLock::new(); +/// docs: +/// Initialise the OpenTelemetry SDK, wire up the OTLP/gRPC exporter, and +/// return a [`Meter`] ready for instrumenting the `metrics` crate. +/// +/// 1. Read the endpoint from [`OTEL_EXPORTER_OTLP_ENDPOINT`] with the +/// hard-coded default [`DEFAULT_OTLP_ENDPOINT`]. +/// 2. Build a `MetricExporter` using the Tonic / gRPC transport: +/// - with_tonic()` enables the Tonic-based gRPC client. +/// - `with_endpoint()` sets the target Collector URL. +/// - `with_timeout(Duration::from_secs(10))` caps each export RPC to 10 +/// seconds; if the Collector is unreachable the RPC aborts instead of +/// hanging indefinitely. +/// 3. Wrap the exporter in a `PeriodicReader`. The reader collects +/// aggregated metrics from every instrument every 5 seconds and hands +/// them to the exporter. This is the "push" model --- metrics leave the +/// process automatically without an external scraper. +/// 4. Construct an `SdkMeterProvider` and register it as the global +/// meter provider (`global::set_meter_provider`). The global handle is +/// needed for instrumenting code spawned in other Tokio tasks (see +/// [`helpers::event_listener`](crate::helpers::event_listener)). +/// 5. Keep a clone of the concrete provider in `METER_PROVIDER` so that +/// [`shutdown_opentelemetry`] can later call `SdkMeterProvider::shutdown()`. +/// 6. Create a `Meter named `"cortexbrain-metrics"` and return it. +/// +/// Potential causes of errors: +/// +/// * An invalid endpoint URL (malformed string). +/// * Network-level failure during exporter construction. +/// * The provider already having been initialised +/// +pub fn init_opentelemetry() -> Result { + let endpoint = + env::var(OTEL_EXPORTER_OTLP_ENDPOINT).unwrap_or_else(|_| DEFAULT_OTLP_ENDPOINT.to_string()); + + let exporter = MetricExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_timeout(Duration::from_secs(10)) + .build()?; + + let reader = PeriodicReader::builder(exporter) + .with_interval(Duration::from_secs(5)) + .build(); + + let provider = SdkMeterProvider::builder().with_reader(reader).build(); + + // Make the provider globally discoverable. This clone is cheap because + // SdkMeterProvider is an Arc-backed handle. + global::set_meter_provider(provider.clone()); + + // Stash the concrete handle so shutdown_opentelemetry can flush. + METER_PROVIDER + .set(provider.clone()) + .map_err(|_| anyhow::anyhow!("OpenTelemetry meter provider already initialised"))?; + + let meter = provider.meter("cortexbrain-metrics"); + Ok(meter) +} +/// docs: +/// Flush every buffered metric to the OTLP collector and shut down the SDK. +pub fn shutdown_opentelemetry() { + if let Some(provider) = METER_PROVIDER.get() + && let Err(e) = provider.shutdown() + { + tracing::error!("Failed to shut down OpenTelemetry meter provider: {:?}", e); + } +} diff --git a/core/src/components/metrics/src/program_handlers.rs b/core/src/components/metrics/src/program_handlers.rs deleted file mode 100644 index 24d18cbd..00000000 --- a/core/src/components/metrics/src/program_handlers.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::sync::{Arc, Mutex}; - -use aya::{programs::KProbe, Ebpf}; -use tracing::{info, error}; -use std::convert::TryInto; - -pub fn load_program(bpf: Arc>, program_name: &str, actual_program: &str) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); - - // Load and attach the eBPF programs - let program: &mut KProbe = bpf_new - .program_mut(program_name) - .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? - .try_into() - .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; - - program.load()?; - - match program.attach(actual_program, 0) { - Ok(_) => info!("{} program attached successfully", actual_program), - Err(e) => { - error!("Error attaching {} program {:?}", actual_program, e); - return Err(anyhow::anyhow!("Failed to attach {}: {:?}", actual_program, e)); - } - }; - - info!("eBPF program {} loaded and attached successfully", program_name); - Ok(()) -} - -pub fn load_and_attach_tcp_programs(bpf: Arc>) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); - - // Load and attach the eBPF programs - let tcp_prog: &mut KProbe = bpf_new - .program_mut("tcp_connect") - .ok_or_else(|| anyhow::anyhow!("Program tcp_connect not found"))? - .try_into() - .map_err(|e| anyhow::anyhow!("Failed to convert program tcp_connect: {:?}", e))?; - tcp_prog.load()?; - - match tcp_prog.attach("tcp_v4_connect", 0) { - Ok(_) => info!("tcp_v4_connect program attached successfully"), - Err(e) => { - error!("Error attaching tcp_v4_connect: {:?}", e); - return Err(anyhow::anyhow!("Failed to attach tcp_v4_connect: {:?}", e)); - } - }; - - match tcp_prog.attach("tcp_v6_connect", 0) { - Ok(_) => info!("tcp_v6_connect program attached successfully"), - Err(e) => { - error!("Error attaching tcp_v6_connect: {:?}", e); - return Err(anyhow::anyhow!("Failed to attach tcp_v6_connect: {:?}", e)); - } - }; - - Ok(()) -} \ No newline at end of file diff --git a/core/src/components/metrics/src/structs.rs b/core/src/components/metrics/src/structs.rs deleted file mode 100644 index dc63ace3..00000000 --- a/core/src/components/metrics/src/structs.rs +++ /dev/null @@ -1,33 +0,0 @@ - -pub const TASK_COMM_LEN: usize = 16; // linux/sched.h - -#[repr(C, packed)] -#[derive(Clone, Copy)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, // Offset 284 - pub sk_err_soft: i32, // Offset 600 - pub sk_backlog_len: i32, // Offset 196 - pub sk_write_memory_queued: i32, // Offset 376 - pub sk_receive_buffer_size: i32, // Offset 244 - pub sk_ack_backlog: u32, // Offset 604 - pub sk_drops: i32, // Offset 136 -} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} \ No newline at end of file diff --git a/core/src/components/metrics_tracer/src/data_structures.rs b/core/src/components/metrics_tracer/src/data_structures.rs index f6d7afed..e9866a83 100644 --- a/core/src/components/metrics_tracer/src/data_structures.rs +++ b/core/src/components/metrics_tracer/src/data_structures.rs @@ -2,7 +2,7 @@ use aya_ebpf::{macros::map, maps::{LruPerCpuHashMap, HashMap, PerfEventArray}}; pub const TASK_COMM_LEN: usize = 16; - +#[repr(C,packed)] pub struct NetworkMetrics { pub tgid: u32, pub comm: [u8; TASK_COMM_LEN], @@ -16,7 +16,7 @@ pub struct NetworkMetrics { pub sk_drops: i32, // Offset 136 } -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampStartInfo { pub comm: [u8; TASK_COMM_LEN], @@ -25,7 +25,7 @@ pub struct TimeStampStartInfo { } // Event we send to userspace when latency is computed -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampEvent { pub delta_us: u64, diff --git a/core/src/components/metrics_tracer/src/main.rs b/core/src/components/metrics_tracer/src/main.rs index 2f5e5a14..216a6aca 100644 --- a/core/src/components/metrics_tracer/src/main.rs +++ b/core/src/components/metrics_tracer/src/main.rs @@ -78,7 +78,13 @@ fn try_metrics_tracer(ctx: ProbeContext) -> Result { // Monitor on tcp_sendmsg, tcp_v4_connect #[kprobe] -fn tcp_connect(ctx: ProbeContext) -> u32 { +fn tcp_v6_connect(ctx: ProbeContext) -> u32 { + match on_connect(ctx) { Ok(_) => 0, Err(e) => e as u32 } +} + +// Monitor on tcp_sendmsg, tcp_v4_connect +#[kprobe] +fn tcp_v4_connect(ctx: ProbeContext) -> u32 { match on_connect(ctx) { Ok(_) => 0, Err(e) => e as u32 } } diff --git a/core/src/testing/agent.yaml b/core/src/testing/agent.yaml index e5c54f0e..4633408e 100644 --- a/core/src/testing/agent.yaml +++ b/core/src/testing/agent.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: agent - image: lorenzotettamanti/cortexflow-agent:latest + image: lorenzotettamanti/cortexflow-agent:veth-command-test4 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/identity.yaml b/core/src/testing/identity.yaml index 44fc5b99..aeb9ebcc 100644 --- a/core/src/testing/identity.yaml +++ b/core/src/testing/identity.yaml @@ -28,7 +28,6 @@ spec: echo "checking permissions" ls -ld /sys/fs/bpf - volumeMounts: - name: bpf mountPath: /sys/fs/bpf @@ -53,7 +52,7 @@ spec: - SYS_PTRACE containers: - name: identity - image: lorenzotettamanti/cortexflow-identity:latest + image: lorenzotettamanti/cortexflow-identity:0.1.2 command: ["/bin/bash", "-c"] args: - | @@ -70,6 +69,16 @@ spec: echo "Running application..." exec /usr/local/bin/cortexflow-identity-service || echo "Application exited with code $?" + env: + - name: OTEL_SERVICE_NAME + value: cortexflow-identity + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://localhost:4317 + - name: OTEL_EXPORTER_OTLP_PROTOCOL + value: grpc + - name: OTEL_RESOURCE_ATTRIBUTES + value: service.namespace=cortexflow,service.version=0.1.5 + resources: limits: cpu: "1" diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index 3f74c71e..a106c4e8 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: lorenzotettamanti/cortexflow-metrics:latest + image: lorenzotettamanti/cortexflow-metrics:otel-test-1 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/otel_agent.yaml b/core/src/testing/otel_agent.yaml new file mode 100644 index 00000000..c5165ac4 --- /dev/null +++ b/core/src/testing/otel_agent.yaml @@ -0,0 +1,217 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-agent-conf + namespace: cortexflow + labels: + app: opentelemetry + component: otel-agent-conf +data: + otel-agent-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + exporters: + otlp: + endpoint: otel-collector.cortexflow.svc.cluster.local:4317 + tls: + insecure: true + logging: + loglevel: info + + service: + pipelines: + traces: + receivers: [otlp] + exporters: [otlp, logging] + logs: + receivers: [otlp] + exporters: [otlp, logging] + metrics: + receivers: [otlp] + exporters: [otlp, logging] + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: otel-agent + namespace: cortexflow + labels: + app: opentelemetry + component: otel-agent +spec: + selector: + matchLabels: + app: opentelemetry + component: otel-agent + template: + metadata: + labels: + app: opentelemetry + component: otel-agent + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: otel-agent + image: otel/opentelemetry-collector:0.95.0 + command: + - "/otelcol" + - "--config=/conf/otel-agent-config.yaml" + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 4317 + hostPort: 4317 + protocol: TCP + - containerPort: 4318 + hostPort: 4318 + protocol: TCP + env: + - name: GOMEMLIMIT + value: 400MiB + volumeMounts: + - name: otel-agent-config-vol + mountPath: /conf + volumes: + - name: otel-agent-config-vol + configMap: + name: otel-agent-conf + items: + - key: otel-agent-config + path: otel-agent-config.yaml + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector-conf +data: + otel-collector-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + processors: + memory_limiter: + limit_mib: 1500 + spike_limit_mib: 512 + check_interval: 5s + + exporters: + # otlp: + # endpoint: otel-collector.cortexflow.svc.cluster.local:4317 + # tls: + # insecure: true + logging: {} + + service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] + logs: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] + metrics: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] + +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector +spec: + selector: + app: opentelemetry + component: otel-collector + ports: + - name: otlp-grpc + port: 4317 + targetPort: 4317 + - name: otlp-http + port: 4318 + targetPort: 4318 + - name: metrics + port: 8888 + targetPort: 8888 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector +spec: + replicas: 1 + selector: + matchLabels: + app: opentelemetry + component: otel-collector + template: + metadata: + labels: + app: opentelemetry + component: otel-collector + spec: + containers: + - name: otel-collector + image: otel/opentelemetry-collector:0.95.0 + command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: 200m + memory: 400Mi + ports: + - containerPort: 4317 + - containerPort: 4318 + - containerPort: 8888 + env: + - name: GOMEMLIMIT + value: 1600MiB + volumeMounts: + - name: otel-collector-config-vol + mountPath: /conf + volumes: + - name: otel-collector-config-vol + configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml \ No newline at end of file