Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions dev/spark-test-image-util/docs/build-docs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# limitations under the License.
#

set -e

if ! [ -x "$(command -v docker)" ]; then
echo "Error: Docker is not installed." >&2
exit 1
Expand All @@ -29,7 +31,7 @@ IMG_TAG=$(date +%s)
IMG_NAME="${REPOSITORY}:${IMG_TAG}"
IMG_URL="$REPO_OWNER/$IMG_NAME"
DOCKER_MOUNT_SPARK_HOME="/__w/spark/spark"
BUILD_DOCS_SCRIPT_PATH="${DOCKER_MOUNT_SPARK_HOME}/dev/spark-test-image-util/docs/run-in-container"
BUILD_DOCS_SCRIPT_PATH="${DOCKER_MOUNT_SPARK_HOME}/dev/spark-test-image-util/docs/run-in-container.sh"

FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/../../..; pwd)"
Expand All @@ -41,10 +43,12 @@ build/sbt -Phive -Pkinesis-asl clean unidoc package
docker buildx build \
--cache-from type=registry,ref="${DOCKER_CACHE_IMG}" \
--tag "${IMG_URL}" "${FWDIR}" \
--file "${SPARK_HOME}/dev/spark-test-image/docs/Dockerfile"
--file "${SPARK_HOME}/dev/spark-test-image/docs/Dockerfile" \
--load

# 3.Build docs on container: `error docs`, `scala doc`, `python doc`, `sql doc`.
docker run \
--user "$(id -u):$(id -g)" \
--mount type=bind,source="${SPARK_HOME}",target="${DOCKER_MOUNT_SPARK_HOME}" \
--interactive --tty "${IMG_URL}" \
/bin/bash -c "sh ${BUILD_DOCS_SCRIPT_PATH}"
Expand All @@ -58,6 +62,7 @@ if [[ "$SKIP_RDOC" != "1" ]]; then
# and when writing to subsequent files, will throw an error as:
# `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js'
# to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied`
echo "Building final docs *OUTSIDE* of container."
export SKIP_ERRORDOC=1
export SKIP_SCALADOC=1
export SKIP_PYTHONDOC=1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
Expand All @@ -16,20 +17,29 @@
#

# 1.Set env variable.
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
export PATH=$JAVA_HOME/bin:$PATH
set -ex
_arch="$(uname -m)"
case "$_arch" in
"aarch64") export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64 ;;
"x86_64") export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 ;;
*) echo "Unexpected arch $_arch picking first java-17-openjdk in /usr/lib/jvm";
export JAVA_HOME=$(ls /usr/lib/jvm/java-17-openjdk-* | head -n 1);;
esac
export PATH="$HOME/.bin:$JAVA_HOME/bin:$PATH"
export SPARK_DOCS_IS_BUILT_ON_HOST=1
# We expect to compile the R document on the host.
export SKIP_RDOC=1
mkdir -p ~/.bin
mkdir -p ~/.gem

# 2.Install bundler.
gem install bundler -v 2.4.22
gem install bundler -v 2.4.22 --install-dir ~/.gem --bindir ~/.bin
cd /__w/spark/spark/docs
bundle install

# 3.Build docs, includes: `error docs`, `scala doc`, `python doc`, `sql doc`, excludes: `r doc`.
# We need this link to make sure `python3` points to `python3.11` which contains the prerequisite packages.
ln -s "$(which python3.11)" "/usr/local/bin/python3"
# Activate the venv with mkdocs and friends.
. $VIRTUAL_ENV/bin/activate

# Build docs first with SKIP_API to ensure they are buildable without requiring any
# language docs to be built beforehand.
Expand Down
12 changes: 2 additions & 10 deletions dev/spark-test-image/docs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,6 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# See more in SPARK-39959, roxygen2 < 7.2.1
RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 'rmarkdown', 'testthat', 'remotes'), repos='https://cloud.r-project.org/')" && \
Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"

# See more in SPARK-39735
ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"

# Setup virtual environment
ENV VIRTUAL_ENV=/opt/spark-venv
RUN python3.12 -m venv $VIRTUAL_ENV
Expand All @@ -87,7 +78,8 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe \
RUN . $VIRTUAL_ENV/bin/activate \
&& python3.12 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==26.3.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
Expand Down
14 changes: 4 additions & 10 deletions docs/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ GEM
addressable (2.8.7)
public_suffix (>= 2.0.2, < 7.0)
base64 (0.3.0)
bigdecimal (3.2.2)
colorator (1.1.0)
concurrent-ruby (1.3.5)
csv (3.3.5)
Expand All @@ -14,9 +13,6 @@ GEM
eventmachine (1.2.7)
ffi (1.17.2)
forwardable-extended (2.6.0)
google-protobuf (4.31.1)
bigdecimal
rake (>= 13)
http_parser.rb (0.8.0)
i18n (1.14.7)
concurrent-ruby (~> 1.0)
Expand All @@ -41,8 +37,8 @@ GEM
webrick (~> 1.7)
jekyll-redirect-from (0.16.0)
jekyll (>= 3.3, < 5.0)
jekyll-sass-converter (3.1.0)
sass-embedded (~> 1.75)
jekyll-sass-converter (2.2.0)
sassc (> 2.0.1, < 3.0)
jekyll-watch (2.2.1)
listen (~> 3.0)
json (2.12.2)
Expand All @@ -58,16 +54,14 @@ GEM
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (6.0.2)
rake (13.3.0)
rb-fsevent (0.11.2)
rb-inotify (0.11.1)
ffi (~> 1.0)
rexml (3.4.4)
rouge (4.5.2)
safe_yaml (1.0.5)
sass-embedded (1.89.2)
google-protobuf (~> 4.31)
rake (>= 13)
sassc (2.4.0)
ffi (~> 1.9)
terminal-table (3.0.2)
unicode-display_width (>= 1.1.1, < 3)
unicode-display_width (2.6.0)
Expand Down
4 changes: 2 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ To generate the Python or R API docs, you'll also need to [install Pandoc](https
If you'd like to generate R API documentation, install these libraries:

```sh
$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown", "remotes"), repos="https://cloud.r-project.org/")'
$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "7.1.2", repos="https://cloud.r-project.org/")'
$ sudo Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
$ sudo Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
Expand Down Expand Up @@ -136,6 +136,6 @@ Note: Before running it, you need to have `docker` installed.
$ dev/spark-test-image-util/docs/build-docs
```

It will generate all documents on the `container` and `host`.
It will generate all documents on the `container` except for R and `host` (R).
Especially when there are conflicts between the libraries required by Python development environment
and the libraries required by generating Python docs environment, this is a good choice.