diff --git a/serverless-fleets/README.md b/serverless-fleets/README.md index 9d6a358a..7562c023 100644 --- a/serverless-fleets/README.md +++ b/serverless-fleets/README.md @@ -19,6 +19,7 @@ Using both the intuitive graphical user interface and the command line, you will - [Tutorial: Docling](./tutorials/docling/README.md) - [Tutorial: Batch Inferencing](./tutorials/inferencing/README.md) - [Tutorial: Monte Carlo Simulation](./tutorials/simulation/README.md) +- [Example: HTTP egress control](#http-egress-control) - [HowTo](#howto) - [Troubleshooting](#troubleshooting) @@ -522,6 +523,13 @@ Download the results from the output COS bucket to `./data/output` - [Tutorial: Simulation](./tutorials/simulation/README.md) +## Examples + +### HTTP egress control + +In order to control/monitor/capture HTTP egress traffic on your tasks for which security group based network controls are not sufficient i.e. hostname based controls: you can use after startup hooks to deploy a HTTP proxy on each worker node that intercepts all HTTP traffic of the tasks running on that worker node. An example using squid proxy to allowlist only traffic to a single domain can be found in `run_hook_squid_http_proxy`. + + ## HowTo ### How to use your own container and image @@ -551,7 +559,6 @@ Once the push is complete, you can run the fleet by modifying `./run` and replac - the environment variables, e.g. `--env foo=bar` - ### How to access logs An IBM Cloud Logs instance is being setup and enabled by default during the automated One Time Setup. Each fleet worker will ingest logs to the IBM Cloud Logs instance by default. [Navigating to the UI](https://cloud.ibm.com/docs/cloud-logs?topic=cloud-logs-instance-launch) and use [Using Livetail](https://cloud.ibm.com/docs/cloud-logs?topic=cloud-logs-livetail) or [Filtering log data](https://cloud.ibm.com/docs/cloud-logs?topic=cloud-logs-query-data-filter) to view the logs. diff --git a/serverless-fleets/run_hook_squid_http_proxy b/serverless-fleets/run_hook_squid_http_proxy new file mode 100755 index 00000000..b3dd0e99 --- /dev/null +++ b/serverless-fleets/run_hook_squid_http_proxy @@ -0,0 +1,109 @@ +uuid=$(uuidgen | tr '[:upper:]' '[:lower:]' | awk -F- '{print $1}') + +PREHOOK=$(cat <<'OUTER' +#!/usr/bin/env bash +set -Eeuo pipefail + +### ===== User-tunable variables ===== + +export NETWORK_NAME="podman" +export SUBNET_CIDR="10.88.0.0/16" +export GATEWAY_IP="10.88.0.1" + +export SQUID_IP="10.88.0.10" +export SQUID_CONTAINER="http-proxy" +export SQUID_IMAGE="docker.io/ubuntu/squid:latest" +export SQUID_HTTP_PORT="3129" + +export WORKDIR="$PWD/podman-transparent-proxy-lab" + +### ===== Derived variables ===== + +SQUID_CONF_DIR="/etc/squid" + +mkdir -p "$SQUID_CONF_DIR" + +echo "==> Checking dependencies" +command -v podman >/dev/null +command -v sudo >/dev/null +command -v iptables >/dev/null + +echo "==> Writing squid.conf" +cat > "${SQUID_CONF_DIR}/squid.conf" < Creating Podman network if needed" +if ! podman network exists "${NETWORK_NAME}"; then + podman network create \ + --subnet "${SUBNET_CIDR}" \ + --gateway "${GATEWAY_IP}" \ + "${NETWORK_NAME}" +fi + +echo "==> Removing old containers if present" +podman rm -f "${SQUID_CONTAINER}" 2>/dev/null || true + +echo "==> Preparing Squid cache and logs" +mkdir -p "${SQUID_CONF_DIR}/cache" "${SQUID_CONF_DIR}/logs" + +echo "==> Starting Squid HTTP transparent proxy" +podman run -d \ + --name "${SQUID_CONTAINER}" \ + --network host \ + --entrypoint sh \ + -v "${SQUID_CONF_DIR}/:/etc/squid/:Z,rw,rbind" \ + "${SQUID_IMAGE}" \ + -c 'exec squid -N -f /etc/squid/squid.conf' + +echo "==> Enabling IPv4 forwarding on host" +sudo sysctl -w net.ipv4.ip_forward=1 >/dev/null + +echo "==> Preparing iptables chain" +sudo iptables -t nat -N SQUID_PROXY 2>/dev/null || true +sudo iptables -t nat -F SQUID_PROXY + +sudo iptables -t nat -A SQUID_PROXY -d 127.0.0.0/8 -j RETURN +sudo iptables -t nat -A SQUID_PROXY -d ${SUBNET_CIDR} -j RETURN +sudo iptables -t nat -A SQUID_PROXY -p tcp --dport 80 -j REDIRECT --to-ports 3129 + +sudo iptables -t nat -A PREROUTING -s ${SUBNET_CIDR} -p tcp -j SQUID_PROXY +OUTER +) + +ibmcloud ce fleet create --name "fleet-${uuid}" \ + --tasks-state-store fleet-task-store \ + --image registry.access.redhat.com/ubi10/ubi-minimal \ + --cpu "2" \ + --memory "4G" \ + --tasks-from-local-file run_hook_squid_http_proxy_commands.jsonl \ + --max-scale 2 \ + --retrylimit 0 \ + --subnetpool-name fleet-subnetpool \ + --env __CE_INTERNAL_HOOK_AFTER_STARTUP="${PREHOOK}" + diff --git a/serverless-fleets/run_hook_squid_http_proxy_commands.jsonl b/serverless-fleets/run_hook_squid_http_proxy_commands.jsonl new file mode 100644 index 00000000..a54e7897 --- /dev/null +++ b/serverless-fleets/run_hook_squid_http_proxy_commands.jsonl @@ -0,0 +1,2 @@ +{ "command":"curl", "args": ["--silent", "--write-out", "%{url} %{http_code}", "--output", "/dev/null", "http://example.com"]} +{ "command":"curl", "args": ["--silent", "--write-out", "%{url} %{http_code}", "--output", "/dev/null", "http://google.com"]}