Skip to content

Commit 9efce66

Browse files
TEZ-4682: [Cloud] Tez AM docker image
1 parent 1fc0035 commit 9efce66

9 files changed

Lines changed: 572 additions & 1 deletion

File tree

tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2429,7 +2429,7 @@ public static void main(String[] args) {
24292429
Objects.requireNonNull(appSubmitTimeStr,
24302430
ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null");
24312431

2432-
Configuration conf = new Configuration();
2432+
Configuration conf = new TezConfiguration();
24332433

24342434
AMExtensions amExtensions = getFrameworkService(conf).getAMExtensions();
24352435
DAGProtos.ConfigurationProto confProto = amExtensions.loadConfigurationProto();

tez-dist/pom.xml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,38 @@
118118
</dependency>
119119
</dependencies>
120120
</profile>
121+
<profile>
122+
<id>docker</id>
123+
<build>
124+
<plugins>
125+
<plugin>
126+
<groupId>org.codehaus.mojo</groupId>
127+
<artifactId>exec-maven-plugin</artifactId>
128+
<executions>
129+
<execution>
130+
<id>build-docker-image</id>
131+
<phase>package</phase>
132+
<goals>
133+
<goal>exec</goal>
134+
</goals>
135+
<configuration>
136+
<executable>/bin/bash</executable>
137+
<arguments>
138+
<argument>${project.basedir}/src/docker/build-docker.sh</argument>
139+
<argument>-hadoop</argument>
140+
<argument>${hadoop.version}</argument>
141+
<argument>-tez</argument>
142+
<argument>${project.version}</argument>
143+
<argument>-repo</argument>
144+
<argument>apache</argument>
145+
</arguments>
146+
</configuration>
147+
</execution>
148+
</executions>
149+
</plugin>
150+
</plugins>
151+
</build>
152+
</profile>
121153
</profiles>
122154

123155
<build>

tez-dist/src/docker/Dockerfile

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
ARG BUILD_ENV=unarchive
19+
20+
# hadolint ignore=DL3006
21+
FROM ubuntu AS unarchive
22+
ONBUILD COPY hadoop-*.tar.gz /opt
23+
# UPDATED: Matches "tez-1.0.0-SNAPSHOT.tar.gz" pattern
24+
ONBUILD COPY tez-*.tar.gz /opt
25+
26+
FROM ${BUILD_ENV} AS env
27+
ARG HADOOP_VERSION
28+
ARG TEZ_VERSION
29+
30+
RUN mkdir -p /opt/hadoop \
31+
&& tar -xzv \
32+
--exclude="hadoop-$HADOOP_VERSION/share/doc" \
33+
--exclude="*/jdiff" \
34+
--exclude="*/sources" \
35+
--exclude="*tests.jar" \
36+
--exclude="*/webapps" \
37+
-f /opt/hadoop-$HADOOP_VERSION.tar.gz \
38+
-C /opt/hadoop --strip-components 1 \
39+
&& mkdir -p /opt/tez \
40+
&& tar -xzv \
41+
-f /opt/tez-$TEZ_VERSION.tar.gz \
42+
-C /opt/tez \
43+
&& rm -rf /opt/hadoop-$HADOOP_VERSION.tar.gz /opt/tez-$TEZ_VERSION.tar.gz
44+
45+
FROM eclipse-temurin:21.0.3_9-jre-ubi9-minimal AS run
46+
47+
ARG UID=1000
48+
ARG HADOOP_VERSION
49+
ARG TEZ_VERSION
50+
51+
# Install dependencies
52+
# hadolint ignore=DL3041
53+
RUN set -ex; \
54+
microdnf update -y; \
55+
microdnf -y install procps gettext findutils; \
56+
microdnf clean all; \
57+
useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez
58+
59+
# Set necessary environment variables
60+
ENV HADOOP_HOME=/opt/hadoop \
61+
TEZ_HOME=/opt/tez \
62+
TEZ_CONF_DIR=/opt/tez/conf \
63+
HADOOP_CONF_DIR=/opt/tez/conf
64+
65+
ENV TEZ_CLIENT_VERSION=$TEZ_VERSION
66+
67+
ENV PATH=$TEZ_HOME/bin:$HADOOP_HOME/bin:$PATH
68+
69+
COPY --from=env --chown=tez /opt/hadoop $HADOOP_HOME
70+
# UPDATED: Copy from the normalized directory name created in 'env' stage
71+
COPY --from=env --chown=tez /opt/tez $TEZ_HOME
72+
73+
RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR
74+
75+
COPY --chown=tez entrypoint.sh /
76+
COPY --chown=tez conf $TEZ_CONF_DIR
77+
78+
# Create Extension Point Directory
79+
RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins
80+
81+
RUN chmod +x /entrypoint.sh
82+
83+
USER tez
84+
WORKDIR $TEZ_HOME
85+
86+
# Expose AM ports via -p flag in docker command
87+
# EXPOSE 10001 10002 10003 8042
88+
89+
ENTRYPOINT ["/entrypoint.sh"]

tez-dist/src/docker/README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
19+
# Tez AM Docker
20+
21+
1. Building the docker image:
22+
23+
```bash
24+
mvn clean install -DskipTests -Pdocker,tools
25+
```
26+
27+
2. Install zookeeper in mac by:
28+
29+
```bash
30+
brew install zookeeper
31+
zkServer start
32+
```
33+
34+
3. Running the Tez AM container:
35+
36+
```bash
37+
docker run \
38+
-p 10001:10001 -p 8042:8042 \
39+
--name tez-am \
40+
apache/tez-am:1.0.0-SNAPSHOT
41+
```
42+
43+
4. Debugging the Tez AM container:
44+
45+
```bash
46+
docker run \
47+
-p 10001:10001 -p 8042:8042 -p 5005:5005 \
48+
-e TEZ_FRAMEWORK_MODE="STANDALONE_ZOOKEEPER" \
49+
-e JAVA_TOOL_OPTIONS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' \
50+
--name tez-am \
51+
apache/tez-am:1.0.0-SNAPSHOT
52+
```
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
set -xeou pipefail
21+
22+
HADOOP_VERSION=
23+
TEZ_VERSION=
24+
REPO=
25+
26+
usage() {
27+
cat <<EOF 1>&2
28+
Usage: $0 [-h] [-hadoop <Hadoop version>] [-tez <Tez version>] [-repo <Docker repo>]
29+
Build the Apache Tez AM Docker image
30+
-help Display help
31+
-hadoop Build image with the specified Hadoop version
32+
-tez Build image with the specified Tez version
33+
-repo Docker repository
34+
EOF
35+
}
36+
37+
while [ $# -gt 0 ]; do
38+
case "$1" in
39+
-h)
40+
usage
41+
exit 0
42+
;;
43+
-hadoop)
44+
shift
45+
HADOOP_VERSION=$1
46+
shift
47+
;;
48+
-tez)
49+
shift
50+
TEZ_VERSION=$1
51+
shift
52+
;;
53+
-repo)
54+
shift
55+
REPO=$1
56+
shift
57+
;;
58+
*)
59+
shift
60+
;;
61+
esac
62+
done
63+
64+
SCRIPT_DIR=$(
65+
cd "$(dirname "$0")"
66+
pwd
67+
)
68+
69+
DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../.."}
70+
PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../.."}
71+
72+
repo=${REPO:-apache}
73+
WORK_DIR="$(mktemp -d)"
74+
CACHE_DIR="$SCRIPT_DIR/cache"
75+
mkdir -p "$CACHE_DIR"
76+
77+
# Defaults Hadoop and Tez versions from pom.xml if not provided
78+
HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)}
79+
TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)}
80+
81+
######################
82+
# HADOOP FETCH LOGIC #
83+
######################
84+
HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz"
85+
HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"}
86+
if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then
87+
echo "Downloading Hadoop from $HADOOP_URL..."
88+
if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then
89+
echo "Fail to download Hadoop, exiting...."
90+
exit 1
91+
fi
92+
mv "$CACHE_DIR/$HADOOP_FILE_NAME.tmp" "$CACHE_DIR/$HADOOP_FILE_NAME"
93+
fi
94+
95+
#####################################
96+
# Pick tez tarball from local build #
97+
#####################################
98+
TEZ_FILE_NAME="tez-$TEZ_VERSION.tar.gz"
99+
LOCAL_DIST_PATH="$DIST_DIR/target/$TEZ_FILE_NAME"
100+
101+
if [ -f "$LOCAL_DIST_PATH" ]; then
102+
echo "--> Found local Tez build artifact at: $LOCAL_DIST_PATH"
103+
cp "$LOCAL_DIST_PATH" "$WORK_DIR/"
104+
else
105+
echo "--> Error: Local Tez artifact not found at $LOCAL_DIST_PATH"
106+
echo "--> Please build the project first (e.g., mvn clean install -DskipTests)."
107+
exit 1
108+
fi
109+
110+
# -------------------------------------------------------------------------
111+
# BUILD CONTEXT PREPARATION
112+
# -------------------------------------------------------------------------
113+
cp "$CACHE_DIR/$HADOOP_FILE_NAME" "$WORK_DIR/"
114+
cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf"
115+
cp "$SCRIPT_DIR/entrypoint.sh" "$WORK_DIR/"
116+
cp "$SCRIPT_DIR/Dockerfile" "$WORK_DIR/"
117+
118+
echo "Building Docker image..."
119+
docker build \
120+
"$WORK_DIR" \
121+
-f "$WORK_DIR/Dockerfile" \
122+
-t "$repo/tez-am:$TEZ_VERSION" \
123+
--build-arg "BUILD_ENV=unarchive" \
124+
--build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
125+
--build-arg "TEZ_VERSION=$TEZ_VERSION"
126+
127+
rm -r "${WORK_DIR}"
128+
echo "Docker image $repo/tez-am:$TEZ_VERSION built successfully."
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
appender.console.type = Console
19+
appender.console.name = console
20+
appender.console.target = SYSTEM_ERR
21+
appender.console.layout.type = PatternLayout
22+
appender.console.layout.pattern = %d{ISO8601} %5p [%t] %c{2}: %m%n
23+
24+
rootLogger.level = INFO
25+
rootLogger.appenderRef.console.ref = console

0 commit comments

Comments
 (0)