From 7dbf0426352e3e59d77a655f8d5cb1bb2faf795b Mon Sep 17 00:00:00 2001 From: Cesar Canassa Date: Fri, 20 Feb 2026 23:51:38 +0100 Subject: [PATCH 1/2] GH-49351: [C++] Check TZDIR environment variable in vendored date library The vendored Howard Hinnant date library hardcodes /usr/share/zoneinfo as the timezone database path. This adds a TZDIR check in discover_tz_dir() before falling back to platform-specific defaults, consistent with POSIX conventions. This fixes timezone operations on non-FHS Linux distributions (e.g. NixOS) where zoneinfo lives under a non-standard path. --- cpp/src/arrow/public_api_test.cc | 29 +++++++++++++++++++++++ cpp/src/arrow/vendored/datetime/README.md | 5 +++- cpp/src/arrow/vendored/datetime/tz.cpp | 6 +++++ python/pyarrow/conftest.py | 3 ++- 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index ccc80dc93a5..dbda1863a4b 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -122,6 +123,34 @@ TEST(Misc, BuildInfo) { ASSERT_THAT(info.full_so_version, ::testing::HasSubstr(info.so_version)); } +#ifndef _WIN32 +TEST(Misc, TZDIREnvironmentVariable) { + // Find a valid zoneinfo directory + std::string tz_dir; + const char* env_tzdir = std::getenv("TZDIR"); + if (env_tzdir != nullptr && + std::filesystem::is_directory(env_tzdir)) { + tz_dir = env_tzdir; + } else if (std::filesystem::is_directory( + "/usr/share/zoneinfo")) { + tz_dir = "/usr/share/zoneinfo"; + } else { + GTEST_SKIP() << "No system zoneinfo directory found"; + } + + // Set TZDIR and verify timezone resolution works + EnvVarGuard guard("TZDIR", tz_dir); + auto arr = ArrayFromJSON( + timestamp(TimeUnit::SECOND, "UTC"), "[0]"); + ASSERT_OK_AND_ASSIGN( + auto result, + compute::Cast( + arr, + timestamp(TimeUnit::SECOND, "America/New_York"))); + ASSERT_NE(result.make_array(), nullptr); +} +#endif + TEST(Misc, SetTimezoneConfig) { #ifndef _WIN32 GTEST_SKIP() << "Can only set the Timezone database on Windows"; diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 4399191b737..88f7a9d17a7 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -25,7 +25,10 @@ The following changes are made: - enclose the `date` namespace inside the `arrow_vendored` namespace - fix 4 declarations like `CONSTCD11 date::day operator "" _d(unsigned long long d) NOEXCEPT;` to not have offending whitespace for modern clang: - `CONSTCD11 date::day operator ""_d(unsigned long long d) NOEXCEPT;` + `CONSTCD11 date::day operator ""_d(unsigned long long d) NOEXCEPT;` +- check the TZDIR environment variable in `discover_tz_dir()` (tz.cpp) before + falling back to hardcoded paths, for compatibility with non-FHS Linux + distributions (e.g. NixOS) ## How to update diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index 2cf6c62a84d..fdc1c593dd2 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -486,6 +486,12 @@ discover_tz_dir() { struct stat sb; using namespace std; + // Check TZDIR environment variable first (POSIX standard) + const char* tz_dir_env = std::getenv("TZDIR"); + if (tz_dir_env != nullptr + && stat(tz_dir_env, &sb) == 0 + && S_ISDIR(sb.st_mode)) + return tz_dir_env; # if defined(ANDROID) || defined(__ANDROID__) CONSTDATA auto tz_dir_default = "/apex/com.android.tzdata/etc/tz"; CONSTDATA auto tz_dir_fallback = "/system/usr/share/zoneinfo"; diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py index 41beaa14041..2ccb52b4578 100644 --- a/python/pyarrow/conftest.py +++ b/python/pyarrow/conftest.py @@ -111,7 +111,8 @@ if sys.platform == "win32": defaults['timezone_data'] = windows_has_tzdata() elif sys.platform == "emscripten": - defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo") + tz_dir = os.environ.get("TZDIR", "/usr/share/zoneinfo") + defaults['timezone_data'] = os.path.exists(tz_dir) try: import cython # noqa From 6d31f411f6a909c0b29bdac16574623837f76d1f Mon Sep 17 00:00:00 2001 From: Cesar Canassa Date: Sat, 21 Feb 2026 09:30:49 +0100 Subject: [PATCH 2/2] Fix linting error --- cpp/src/arrow/public_api_test.cc | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index dbda1863a4b..1f105e95420 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -128,11 +128,9 @@ TEST(Misc, TZDIREnvironmentVariable) { // Find a valid zoneinfo directory std::string tz_dir; const char* env_tzdir = std::getenv("TZDIR"); - if (env_tzdir != nullptr && - std::filesystem::is_directory(env_tzdir)) { + if (env_tzdir != nullptr && std::filesystem::is_directory(env_tzdir)) { tz_dir = env_tzdir; - } else if (std::filesystem::is_directory( - "/usr/share/zoneinfo")) { + } else if (std::filesystem::is_directory("/usr/share/zoneinfo")) { tz_dir = "/usr/share/zoneinfo"; } else { GTEST_SKIP() << "No system zoneinfo directory found"; @@ -140,13 +138,9 @@ TEST(Misc, TZDIREnvironmentVariable) { // Set TZDIR and verify timezone resolution works EnvVarGuard guard("TZDIR", tz_dir); - auto arr = ArrayFromJSON( - timestamp(TimeUnit::SECOND, "UTC"), "[0]"); + auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[0]"); ASSERT_OK_AND_ASSIGN( - auto result, - compute::Cast( - arr, - timestamp(TimeUnit::SECOND, "America/New_York"))); + auto result, compute::Cast(arr, timestamp(TimeUnit::SECOND, "America/New_York"))); ASSERT_NE(result.make_array(), nullptr); } #endif