-
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathDockerfile
More file actions
50 lines (40 loc) · 1.56 KB
/
Dockerfile
File metadata and controls
50 lines (40 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# syntax=docker/dockerfile:1.20.0@sha256:26147acbda4f14c5add9946e2fd2ed543fc402884fd75146bd342a7f6271dc1d
# spark-builder: provides client libs for spark-connect
FROM local-image/spark-k8s AS spark-builder
ARG PRODUCT_VERSION
ARG PYTHON_VERSION
ARG RELEASE_VERSION
ARG STACKABLE_USER_UID
LABEL name="Stackable Spark Connect Client" \
maintainer="info@stackable.tech" \
vendor="Stackable GmbH" \
version="${PRODUCT_VERSION}" \
release="${RELEASE_VERSION}" \
summary="Spark Connect Examples" \
description="Spark Connect client libraries for Python and the JVM."
# Need root to install setuptools
USER root
COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
RUN <<EOF
microdnf update
# python{version}-setuptools: needed to build the pyspark[connect] package
microdnf install --nodocs \
"python${PYTHON_VERSION}-setuptools"
microdnf clean all
rm -rf /var/cache/yum
# All files and folders owned by root group to support running as arbitrary users.
# This is best practice as all container users will belong to the root group (0).
chown -R ${STACKABLE_USER_UID}:0 /stackable
chmod -R g=u /stackable
EOF
USER ${STACKABLE_USER_UID}
# Install python packages.
# Packages are intentionally installed in "user mode" to reduce the container attack surface.
# - pyspark[connect] = spark connect client libs
# - jupyterlab = notebook client used in demos
RUN pip install --no-cache-dir --user \
"pyspark[connect]==${PRODUCT_VERSION}" \
"jupyterlab==4.4.1" \
"scikit-learn==1.3.1" \
"matplotlib==3.10.1"
WORKDIR /stackable