Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions .bazelrc

This file was deleted.

87 changes: 87 additions & 0 deletions .github/workflows/build_and_publish_template.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# This workflow builds ArrayRecord wheels and uploads them as artifacts.

name: Build & Publish Template

on:
workflow_call:
inputs:
pypi_project_url:
required: true
type: string
is_nightly:
required: true
type: boolean

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
build-and-test:
name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}"
runs-on: "${{ matrix.os }}"

strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
os: [ubuntu-22.04, ubuntu-22.04-arm, macos-14]

env:
USE_BAZEL_VERSION: "7.2.1"
steps:
- name: Set up Bazel
uses: bazel-contrib/setup-bazel@0.15.0
- name: Check Bazel installation
run: |
which bazel
bazel version
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: "actions/checkout@v3"
- name: Create directory
run: |
mkdir -p /tmp/array_record
cp -r . /tmp/array_record
- name: Build package
run: |
set -xe
export PYTHON_VERSION=${{ matrix.python-version }}
export PYTHON_MAJOR_VERSION=$(echo $PYTHON_VERSION | cut -d. -f1)
export PYTHON_MINOR_VERSION=$(echo $PYTHON_VERSION | cut -d. -f2)
export BAZEL_VERSION="7.2.1"
export OUTPUT_DIR="/tmp/array_record"
export SOURCE_DIR="/tmp/array_record"
. "${SOURCE_DIR}"'/oss/runner_common.sh'
build_and_test_array_record
- name: Upload ArrayRecord artifacts
uses: actions/upload-artifact@v4
with:
name: built-array-record-wheels-${{ matrix.os }}-${{ matrix.python-version }}
path: /tmp/array_record/all_dist/*.whl

publish-wheel:
runs-on: ubuntu-22.04
needs: build-and-test
permissions:
id-token: write
environment:
name: pypi
url: ${{ inputs.pypi_project_url }}
steps:
- name: Download ArrayRecord artifacts
uses: actions/download-artifact@v4
with:
pattern: built-array-record-wheels-*
path: dist/
merge-multiple: true
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
attestations: false
verbose: true
13 changes: 13 additions & 0 deletions .github/workflows/publish_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Build and Publish Release

on: workflow_dispatch

jobs:
call-workflow:
uses: ./.github/workflows/build_and_publish_template.yml
permissions:
contents: read
id-token: write
with:
pypi_project_url: https://pypi.org/project/array-record
is_nightly: false
2 changes: 1 addition & 1 deletion .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
run: |
docker build --progress=plain --no-cache \
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
-t array_record:latest - < oss/build.Dockerfile
-t array_record:latest - < oss/Dockerfile
- name: Build wheels and test
run: |
docker run --rm -a stdin -a stdout -a stderr \
Expand Down
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Bazel outputs
bazel-array_record
bazel-bin
bazel-out
bazel-testlogs

MODULE.bazel.lock
32 changes: 26 additions & 6 deletions BUILD
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# ArrayRecord is a new file format for IO intensive applications.
# It supports efficient random access and various compression algorithms.

load("@rules_python//python:pip.bzl", "compile_pip_requirements")

load("@python//3.10:defs.bzl", compile_pip_requirements_3_10 = "compile_pip_requirements")
load("@python//3.11:defs.bzl", compile_pip_requirements_3_11 = "compile_pip_requirements")
load("@python//3.12:defs.bzl", compile_pip_requirements_3_12 = "compile_pip_requirements")
load("@python//3.13:defs.bzl", compile_pip_requirements_3_13 = "compile_pip_requirements")

package(default_visibility = ["//visibility:public"])

Expand All @@ -15,8 +17,26 @@ py_library(
srcs = ["setup.py"],
)

compile_pip_requirements(
name = "requirements",
requirements_in = "requirements.in",
requirements_txt = "requirements_lock.txt",
compile_pip_requirements_3_10(
name = "requirements_3_10",
requirements_in = "test_requirements.in",
requirements_txt = "test_requirements_lock_3_10.txt",
)

compile_pip_requirements_3_11(
name = "requirements_3_11",
requirements_in = "test_requirements.in",
requirements_txt = "test_requirements_lock_3_11.txt",
)

compile_pip_requirements_3_12(
name = "requirements_3_12",
requirements_in = "test_requirements.in",
requirements_txt = "test_requirements_lock_3_12.txt",
)

compile_pip_requirements_3_13(
name = "requirements_3_13",
requirements_in = "test_requirements.in",
requirements_txt = "test_requirements_lock_3_13.txt",
)
56 changes: 42 additions & 14 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO(fchern): automate version string alignment with setup.py
VERSION = "0.6.0"

module(
name = "array_record",
version = VERSION,
version = "0.7.3",
repo_name = "com_google_array_record",
)

Copy link
Copy Markdown
Contributor Author

@mtsokol mtsokol Jul 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use actual bazel_dep(name = "protobuf", version = ...) here and enforce errors if different is picked:

write_to_bazelrc "common --check_direct_dependencies=error"

Expand All @@ -32,21 +29,52 @@ bazel_dep(name = "eigen", version = "3.4.0.bcr.3")
bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c")
bazel_dep(name = "pybind11_bazel", version = "2.12.0")

PYTHON_VERSION = "3.10"
http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(
ignore_root_user_error = True, # Required for our containerized CI environments.
python_version = PYTHON_VERSION,
http_archive(
name = "pybind11",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why adding an extra pybind11 rule here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I mostly followed Grain setup as that's the repo we're mainly working with: https://github.com/google/grain/blob/3bc1d2582dffdddf00b0d436a24b6313b69c617d/MODULE.bazel#L31
I can check it without it that one - and remove it if it also builds successfully.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AR is using pybind11 through pybind_bazel already. So it should work without http_archive pulled pybind11.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Removed pybind11 rule from here.

build_file = "@pybind11_bazel//:pybind11.BUILD",
sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
strip_prefix = "pybind11-2.10.3",
urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"],
)

SUPPORTED_PYTHON_VERSIONS = [
"3.10",
"3.11",
"3.12",
"3.13",
]

DEFAULT_PYTHON_VERSION = "3.10"

python_configure = use_extension("@pybind11_bazel//:python_configure.bzl", "extension")
use_repo(python_configure, "local_config_python")

python = use_extension("@rules_python//python/extensions:python.bzl", "python")

[
python.toolchain(
ignore_root_user_error = True,
is_default = python_version == DEFAULT_PYTHON_VERSION,
python_version = python_version,
)
for python_version in SUPPORTED_PYTHON_VERSIONS
]

use_repo(python, python = "python_versions")

pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")

# requirements_lock.txt is generated by
# bazel run //:requirements.update
pip.parse(
hub_name = "pypi",
python_version = PYTHON_VERSION,
requirements_lock = "//:requirements_lock.txt",
)
[
pip.parse(
hub_name = "pypi",
python_version = version,
requirements_lock = "test_requirements_lock_" + version.replace(".", "_") + ".txt",
)
for version in SUPPORTED_PYTHON_VERSIONS
]

use_repo(pip, "pypi")
2 changes: 1 addition & 1 deletion cpp/array_record_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class ChunkOffset {
public:
virtual ~ChunkOffset() {}
virtual uint64_t operator[](size_t idx) const = 0;
virtual size_t size() const = 0;
virtual uint64_t size() const = 0;
bool empty() const { return size() == 0; }
};

Expand Down
10 changes: 6 additions & 4 deletions oss/build.Dockerfile → oss/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ ARG AUDITWHEEL_PLATFORM
FROM quay.io/pypa/${AUDITWHEEL_PLATFORM}

ARG PYTHON_VERSION
ARG PYTHON_BIN
ARG BAZEL_VERSION

ENV DEBIAN_FRONTEND=noninteractive

RUN ulimit -n 1024 && yum install -y rsync
ENV PATH="${PYTHON_BIN}:${PATH}"
ENV PYTHON_BIN_PATH=/opt/python/cp${PYTHON_VERSION}-cp${PYTHON_VERSION}/bin
ENV PATH="${PYTHON_BIN_PATH}:${PATH}"

ENV PYTHON_BIN=${PYTHON_BIN_PATH}/python

# Download the correct bazel version and make sure it's on path.
RUN BAZEL_ARCH_SUFFIX="$(uname -m | sed s/aarch64/arm64/)" \
Expand All @@ -21,7 +23,7 @@ RUN BAZEL_ARCH_SUFFIX="$(uname -m | sed s/aarch64/arm64/)" \

# Install dependencies needed for array_record.
RUN --mount=type=cache,target=/root/.cache \
${PYTHON_BIN}/python -m pip install -U \
$PYTHON_BIN -m pip install -U \
absl-py \
auditwheel \
etils[epath] \
Expand All @@ -30,4 +32,4 @@ RUN --mount=type=cache,target=/root/.cache \
twine \
wheel;

WORKDIR "/tmp/array_record"
WORKDIR "/tmp/array_record"
48 changes: 30 additions & 18 deletions oss/build_whl.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
#!/bin/bash

# Build wheel for the python version specified by $PYTHON_VERSION.
# Optionally, can set the environment variable $PYTHON_BIN to refer to a
# specific python interpreter.

set -e -x

if [ -z ${PYTHON_BIN} ]; then
if [ -z ${PYTHON_VERSION} ]; then
PYTHON_BIN=$(which python3)
else
PYTHON_BIN=$(which python${PYTHON_VERSION})
fi
fi

PYTHON_MAJOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.major)')
PYTHON_MINOR_VERSION=$(${PYTHON_BIN} -c 'import sys; print(sys.version_info.minor)')
PYTHON_VERSION="${PYTHON_MAJOR_VERSION}.${PYTHON_MINOR_VERSION}"
export PYTHON_VERSION="${PYTHON_VERSION}"
OUTPUT_DIR="${OUTPUT_DIR:-/tmp/array_record}"

function write_to_bazelrc() {
echo "$1" >> .bazelrc
Expand All @@ -26,11 +16,18 @@ function main() {
# Remove .bazelrc if it already exists
[ -e .bazelrc ] && rm .bazelrc

write_to_bazelrc "build --incompatible_default_to_explicit_init_py"
write_to_bazelrc "build --enable_platform_specific_config"
write_to_bazelrc "build --@rules_python//python/config_settings:python_version=${PYTHON_VERSION}"
write_to_bazelrc "test --@rules_python//python/config_settings:python_version=${PYTHON_VERSION}"
write_to_bazelrc "test --action_env PYTHON_VERSION=${PYTHON_VERSION}"
write_to_bazelrc "test --test_timeout=300"

write_to_bazelrc "build -c opt"
write_to_bazelrc "build --cxxopt=-std=c++17"
write_to_bazelrc "build --host_cxxopt=-std=c++17"
write_to_bazelrc "build --experimental_repo_remote_exec"
write_to_bazelrc "build --python_path=\"${PYTHON_BIN}\""
PLATFORM="$(uname)"

if [ -n "${CROSSTOOL_TOP}" ]; then
write_to_bazelrc "build --crosstool_top=${CROSSTOOL_TOP}"
Expand All @@ -39,8 +36,8 @@ function main() {

export USE_BAZEL_VERSION="${BAZEL_VERSION}"
bazel clean
bazel build ...
bazel test --verbose_failures --test_output=errors ...
bazel build ... --action_env MACOSX_DEPLOYMENT_TARGET='11.0' --action_env PYTHON_BIN_PATH="${PYTHON_BIN}"
bazel test --verbose_failures --test_output=errors ... --action_env PYTHON_BIN_PATH="${PYTHON_BIN}"

DEST="/tmp/array_record/all_dist"
# Create the directory, then do dirname on a non-existent file inside it to
Expand Down Expand Up @@ -68,19 +65,34 @@ function main() {

pushd ${TMPDIR}
echo $(date) : "=== Building wheel"
${PYTHON_BIN} setup.py bdist_wheel --python-tag py3${PYTHON_MINOR_VERSION}
if [ "$(uname)" = "Darwin" ]; then
"$PYTHON_BIN" setup.py bdist_wheel --python-tag py3"${PYTHON_MINOR_VERSION}" --plat-name macosx_11_0_"$(uname -m)"
else
"$PYTHON_BIN" setup.py bdist_wheel --python-tag py3"${PYTHON_MINOR_VERSION}"
fi

if [ -n "${AUDITWHEEL_PLATFORM}" ]; then
echo $(date) : "=== Auditing wheel"
auditwheel repair --plat ${AUDITWHEEL_PLATFORM} -w dist dist/*.whl
cp dist/*manylinux*.whl "${DEST}"
else
cp dist/*.whl "${DEST}"
fi

echo $(date) : "=== Listing wheel"
ls -lrt dist/*.whl
cp dist/*.whl "${DEST}"
ls -lrt "${DEST}"/*.whl
popd

echo $(date) : "=== Output wheel file is in: ${DEST}"

# Install ArrayRecord from the wheel and run smoke tests.
# TF is not available on Python 3.13 and above.
if (( "${PYTHON_MINOR_VERSION}" < 13 )); then
$PYTHON_BIN -m pip install --find-links="${DEST}" --pre array-record
$PYTHON_BIN -m pip install jax tensorflow grain
$PYTHON_BIN oss/test_with_grain.py
$PYTHON_BIN oss/test_with_tf.py
fi
}

main
Loading