Skip to content

Commit b791ffb

Browse files
authored
Merge pull request #456 from unity-sds/453-smce-deployment-keycloak
453 smce deployment keycloak
2 parents 189e0a8 + 017e4a4 commit b791ffb

5 files changed

Lines changed: 323 additions & 6 deletions

File tree

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Keycloak Direct OIDC Authentication for Airflow
2+
# Airflow authenticates directly with Keycloak (no proxy layer)
3+
4+
import os
5+
import logging
6+
from airflow.www.security import AirflowSecurityManager
7+
from flask_appbuilder.security.manager import AUTH_OAUTH
8+
9+
log = logging.getLogger(__name__)
10+
11+
# Enable OAuth authentication
12+
AUTH_TYPE = AUTH_OAUTH
13+
14+
# Keycloak OIDC Configuration
15+
OIDC_ISSUER = "${keycloak_provider_url}"
16+
OIDC_CLIENT_ID = "${keycloak_client_id}"
17+
18+
# Client secret must be provided via environment variable
19+
OIDC_CLIENT_SECRET = os.getenv("OIDC_CLIENT_SECRET", "CHANGE_ME")
20+
21+
# OAuth provider configuration
22+
OAUTH_PROVIDERS = [
23+
{
24+
"name": "keycloak",
25+
"icon": "fa-key",
26+
"token_key": "access_token",
27+
"remote_app": {
28+
"client_id": OIDC_CLIENT_ID,
29+
"client_secret": OIDC_CLIENT_SECRET,
30+
"api_base_url": OIDC_ISSUER,
31+
"client_kwargs": {
32+
"scope": "openid email profile groups"
33+
},
34+
"access_token_url": f"{OIDC_ISSUER}/protocol/openid-connect/token",
35+
"authorize_url": f"{OIDC_ISSUER}/protocol/openid-connect/auth",
36+
"request_token_url": None,
37+
"server_metadata_url": f"{OIDC_ISSUER}/.well-known/openid-configuration",
38+
},
39+
}
40+
]
41+
42+
# Auto-register users on first login (only if they have approved Keycloak groups)
43+
# Users without approved groups will be rejected during authentication
44+
AUTH_USER_REGISTRATION = True
45+
AUTH_USER_REGISTRATION_ROLE = "Viewer" # Not used - role determined by Keycloak group mapping
46+
47+
# Role mapping configuration
48+
class CustomSecurityManager(AirflowSecurityManager):
49+
"""
50+
Custom security manager to map Keycloak groups to Airflow roles.
51+
52+
IMPORTANT: Users must have at least one approved Keycloak group to access Airflow.
53+
Users without approved groups will be denied access during authentication.
54+
"""
55+
56+
def oauth_user_info(self, provider, response):
57+
"""
58+
Get user info from OAuth provider and map groups to roles.
59+
60+
Args:
61+
provider: OAuth provider name
62+
response: OAuth response containing tokens
63+
64+
Returns:
65+
Dictionary with user information
66+
"""
67+
if provider == "keycloak":
68+
import json
69+
import base64
70+
71+
# Log the OAuth response structure (without sensitive token values)
72+
log.info(f"OAuth callback from provider: {provider}")
73+
log.info(f"OAuth response keys: {list(response.keys())}")
74+
75+
# Get access token
76+
access_token = response.get("access_token")
77+
if not access_token:
78+
log.error(f"No access token in OAuth response. Response keys: {list(response.keys())}")
79+
log.error(f"Full response (for debugging): {response}")
80+
return {}
81+
82+
try:
83+
# Decode JWT to get user info and groups
84+
# JWT structure: header.payload.signature
85+
parts = access_token.split('.')
86+
if len(parts) != 3:
87+
log.error(f"Invalid JWT format. Expected 3 parts, got {len(parts)}")
88+
return {}
89+
90+
payload = parts[1]
91+
# Add padding if needed
92+
payload += '=' * (4 - len(payload) % 4)
93+
decoded = json.loads(base64.urlsafe_b64decode(payload))
94+
95+
# Log what we received from Keycloak (useful for debugging)
96+
log.info(f"JWT payload keys: {list(decoded.keys())}")
97+
log.info(f"Available claims: username={decoded.get('preferred_username')}, email={decoded.get('email')}")
98+
log.info(f"Groups in token: {decoded.get('groups', [])}")
99+
100+
# Extract user information (with fallbacks for different claim names)
101+
username = decoded.get("preferred_username") or decoded.get("username") or decoded.get("sub")
102+
email = decoded.get("email", f"{username}@example.com")
103+
first_name = decoded.get("given_name") or decoded.get("first_name") or username
104+
last_name = decoded.get("family_name") or decoded.get("last_name") or ""
105+
106+
# Groups might be in different formats depending on Keycloak mapper config
107+
groups = decoded.get("groups", [])
108+
if isinstance(groups, str):
109+
groups = [groups]
110+
111+
# Some Keycloak configs put groups in realm_access or resource_access
112+
if not groups and "realm_access" in decoded:
113+
groups = decoded["realm_access"].get("roles", [])
114+
if not groups and "resource_access" in decoded:
115+
client_access = decoded["resource_access"].get(OIDC_CLIENT_ID, {})
116+
groups = client_access.get("roles", [])
117+
118+
user_info = {
119+
"username": username,
120+
"email": email,
121+
"first_name": first_name,
122+
"last_name": last_name,
123+
"groups": groups,
124+
}
125+
126+
log.info(f"Keycloak user login: username={user_info['username']}, email={user_info['email']}, groups={user_info['groups']}")
127+
128+
# Map groups to roles
129+
user_info["role_keys"] = self._map_groups_to_roles(user_info["groups"])
130+
log.info(f"Mapped to Airflow roles: {user_info['role_keys']}")
131+
132+
return user_info
133+
134+
except Exception as e:
135+
log.error(f"Error decoding access token: {e}", exc_info=True)
136+
log.error(f"Token (first 50 chars): {access_token[:50]}...")
137+
return {}
138+
139+
return {}
140+
141+
def _map_groups_to_roles(self, keycloak_groups):
142+
"""
143+
Map Keycloak groups to Airflow roles.
144+
145+
Role mapping (configured via Terraform):
146+
%{ for group, roles in keycloak_role_mapping ~}
147+
- ${group} → ${join(", ", roles)}
148+
%{ endfor ~}
149+
150+
Users with multiple groups get the highest priority role.
151+
Priority: Admin > Op > User > Viewer > Public
152+
153+
IMPORTANT: Users without any approved Keycloak groups will be rejected.
154+
155+
Args:
156+
keycloak_groups: List of Keycloak group names from OIDC token
157+
158+
Returns:
159+
List of Airflow role names
160+
161+
Raises:
162+
Exception: If user has no approved Keycloak groups (access denied)
163+
"""
164+
# Keycloak group to Airflow role mapping (from Terraform configuration)
165+
group_role_mapping = {
166+
%{ for group, roles in keycloak_role_mapping ~}
167+
'${group}': '${roles[0]}',
168+
%{ endfor ~}
169+
}
170+
171+
log.debug(f"Group role mapping: {group_role_mapping}")
172+
log.debug(f"User's Keycloak groups: {keycloak_groups}")
173+
174+
# Role priority (higher index = higher priority)
175+
role_priority = ['Public', 'Viewer', 'User', 'Op', 'Admin']
176+
177+
# Find highest priority role from user's groups
178+
highest_role_name = None
179+
highest_priority = -1
180+
181+
for group in keycloak_groups:
182+
# Handle group paths (e.g., "/airflow/admin" or "airflow_admin")
183+
group_name = group.split('/')[-1] # Get last part of path
184+
185+
if group_name in group_role_mapping:
186+
role_name = group_role_mapping[group_name]
187+
if role_name in role_priority:
188+
priority = role_priority.index(role_name)
189+
if priority > highest_priority:
190+
highest_priority = priority
191+
highest_role_name = role_name
192+
log.info(f"Group '{group}' maps to role '{role_name}' (priority {priority})")
193+
194+
# Return the highest priority role
195+
if highest_role_name:
196+
return [highest_role_name]
197+
else:
198+
# Reject users who don't have any approved Keycloak groups
199+
log.error(f"Access denied: User has no approved Keycloak groups. User groups: {keycloak_groups}")
200+
log.error("User must be assigned to one of these Keycloak groups to access Airflow:")
201+
log.error(f" Approved groups: {list(group_role_mapping.keys())}")
202+
raise Exception(
203+
"Access denied: You are not assigned to any approved Keycloak groups. "
204+
"Please contact your administrator to request access."
205+
)
206+
207+
# Set the custom security manager
208+
SECURITY_MANAGER_CLASS = CustomSecurityManager
209+
210+
# Security settings
211+
WTF_CSRF_ENABLED = True
212+
WTF_CSRF_TIME_LIMIT = None
213+
214+
# Session configuration
215+
PERMANENT_SESSION_LIFETIME = 28800 # 8 hours
216+
217+
log.info("Airflow webserver configured for direct Keycloak OIDC authentication")
218+
log.info(f"Keycloak provider: {OIDC_ISSUER}")
219+
log.info(f"Keycloak client: {OIDC_CLIENT_ID}")

airflow/helm/values.tmpl.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ webserverSecretKeySecretName: ${webserver_secret_name}
165165
webserver:
166166
replicas: 3
167167

168+
# Keycloak OIDC Authentication Configuration
169+
webserverConfig: |-
170+
${webserver_config}
171+
168172
startupProbe:
169173
timeoutSeconds: 20
170174
failureThreshold: 60 # Number of tries before giving up (10 minutes with periodSeconds of 10)
@@ -368,3 +372,10 @@ extraEnv: |
368372
value: "1024"
369373
- name: AIRFLOW__WEBSERVER__EXPOSE_CONFIG
370374
value: "True"
375+
- name: AIRFLOW__WEBSERVER__BASE_URL
376+
value: "${airflow_base_url}"
377+
- name: OIDC_CLIENT_SECRET
378+
valueFrom:
379+
secretKeyRef:
380+
name: airflow-oidc-secret
381+
key: client-secret

terraform-unity/modules/terraform-unity-sps-airflow/locals.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,6 @@ locals {
2626
"dev" = "#58cc35"
2727
"sbg-dev" = "#58cc35"
2828
}[var.venue]
29+
# BASE_URL uses placeholder initially, updated by null_resource after LB is created
30+
airflow_base_url = "http://placeholder:${local.load_balancer_port}"
2931
}

terraform-unity/modules/terraform-unity-sps-airflow/main.tf

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,23 @@ resource "kubernetes_secret" "airflow_webserver" {
4949
}
5050
}
5151

52+
# Keycloak OIDC client secret for direct authentication
53+
resource "kubernetes_secret" "airflow_oidc" {
54+
count = var.enable_oidc_auth ? 1 : 0
55+
metadata {
56+
name = "airflow-oidc-secret"
57+
namespace = data.kubernetes_namespace.service_area.metadata[0].name
58+
}
59+
data = {
60+
"client-secret" = data.aws_ssm_parameter.keycloak_client_secret[0].value
61+
}
62+
}
63+
64+
data "aws_ssm_parameter" "keycloak_client_secret" {
65+
count = var.enable_oidc_auth ? 1 : 0
66+
name = var.keycloak_client_secret_ssm_param
67+
}
68+
5269
# TODO evaluate if this role is still necessary
5370
resource "kubernetes_role" "airflow_pod_creator" {
5471
metadata {
@@ -413,6 +430,13 @@ resource "helm_release" "airflow" {
413430
unity_cluster_name = data.aws_eks_cluster.cluster.name
414431
karpenter_node_pools = join(",", var.karpenter_node_pools)
415432
cwl_dag_ecr_uri = "${data.aws_caller_identity.current.account_id}.dkr.ecr.us-west-2.amazonaws.com"
433+
airflow_base_url = local.airflow_base_url
434+
# Keycloak Direct OIDC authentication configuration
435+
webserver_config = indent(4, templatefile("${path.module}/../../../airflow/config/webserver_config.py.tpl", {
436+
keycloak_role_mapping = var.keycloak_role_mapping
437+
keycloak_provider_url = var.keycloak_provider_url
438+
keycloak_client_id = var.keycloak_client_id
439+
}))
416440
})
417441
]
418442
set_sensitive {
@@ -431,6 +455,30 @@ resource "helm_release" "airflow" {
431455
]
432456
}
433457

458+
# Update Airflow BASE_URL after LoadBalancer is created
459+
resource "null_resource" "update_airflow_base_url" {
460+
triggers = {
461+
lb_hostname = data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname
462+
}
463+
464+
provisioner "local-exec" {
465+
command = <<EOT
466+
kubectl set env deployment/airflow-webserver \
467+
-n ${data.kubernetes_namespace.service_area.metadata[0].name} \
468+
AIRFLOW__WEBSERVER__BASE_URL=http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:${local.load_balancer_port}
469+
kubectl set env deployment/airflow-scheduler \
470+
-n ${data.kubernetes_namespace.service_area.metadata[0].name} \
471+
AIRFLOW__WEBSERVER__BASE_URL=http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:${local.load_balancer_port}
472+
EOT
473+
}
474+
475+
depends_on = [
476+
helm_release.airflow,
477+
kubernetes_service.airflow_ingress_internal,
478+
time_sleep.wait_for_airflow_lb
479+
]
480+
}
481+
434482
/* Note: re-enable this to allow access via the JPL network
435483
resource "aws_security_group" "airflow_ingress_sg" {
436484
name = "${var.project}-${var.venue}-airflow-ingress-sg"
@@ -563,9 +611,10 @@ resource "kubernetes_service" "airflow_ingress_internal" {
563611
}
564612
}
565613
wait_for_load_balancer = true
566-
lifecycle { # this is necessary or terraform will try to recreate this every run
567-
ignore_changes = all
568-
}
614+
# Temporarily disabled to allow updating load balancer scheme to internet-facing
615+
# lifecycle { # this is necessary or terraform will try to recreate this every run
616+
# ignore_changes = all
617+
# }
569618
depends_on = [helm_release.airflow]
570619
}
571620

@@ -662,8 +711,8 @@ resource "aws_ssm_parameter" "airflow_ui_url" {
662711
name = format("/%s", join("/", compact(["", var.project, var.venue, var.service_area, "processing", "airflow", "ui_url"])))
663712
description = "The URL of the Airflow UI."
664713
type = "String"
665-
# Updated to use LoadBalancer instead of shared services domain
666-
value = "http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:${local.load_balancer_port}/"
714+
# Updated to use LoadBalancer instead of shared services domain (no trailing slash)
715+
value = "http://${data.kubernetes_service.airflow_ingress_internal.status[0].load_balancer[0].ingress[0].hostname}:${local.load_balancer_port}"
667716
tags = merge(local.common_tags, {
668717
Name = format(local.resource_name_prefix, "endpoints-airflow_ui")
669718
Component = "SSM"
@@ -738,7 +787,7 @@ resource "aws_ssm_parameter" "airflow_api_health_check_endpoint" {
738787

739788
resource "aws_ssm_parameter" "unity_proxy_airflow_ui" {
740789
name = format("/%s", join("/", compact(["unity", var.project, var.venue, "cs", "management", "proxy", "configurations", "015-sps-airflow-ui"])))
741-
description = "The unity-proxy configuration for the Airflow UI."
790+
description = "The unity-proxy configuration for the Airflow UI"
742791
type = "String"
743792
value = <<-EOT
744793

terraform-unity/modules/terraform-unity-sps-airflow/variables.tf

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,39 @@ variable "karpenter_node_pools" {
8181
description = "Names of the Karpenter node pools"
8282
type = list(string)
8383
}
84+
85+
variable "keycloak_provider_url" {
86+
description = "Keycloak OIDC provider URL including realm (e.g., https://keycloak.example.com/realms/MAAP)"
87+
type = string
88+
default = "https://dit.kc-test-maap.xyz/realms/MAAP"
89+
}
90+
91+
variable "keycloak_client_id" {
92+
description = "Keycloak OIDC client ID for Airflow authentication"
93+
type = string
94+
default = "airflow"
95+
}
96+
97+
variable "keycloak_client_secret_ssm_param" {
98+
description = "SSM parameter path containing Keycloak OIDC client secret"
99+
type = string
100+
default = "/sps/keycloak/client_secret"
101+
}
102+
103+
variable "enable_oidc_auth" {
104+
description = "Enable Keycloak OIDC authentication for Airflow"
105+
type = bool
106+
default = true
107+
}
108+
109+
variable "keycloak_role_mapping" {
110+
description = "Mapping of Keycloak groups to Airflow roles"
111+
type = map(list(string))
112+
default = {
113+
"airflow_admin" = ["Admin"]
114+
"airflow_op" = ["Op"]
115+
"airflow_user" = ["User"]
116+
"airflow_viewer" = ["Viewer"]
117+
"airflow_public" = ["Public"]
118+
}
119+
}

0 commit comments

Comments
 (0)