From d89ea407821e685ba6c23387786fd6faec662975 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 9 Jun 2026 13:41:42 -0700 Subject: [PATCH 1/2] feat(raster): add ViewEntries view-spec module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the ViewEntries newtype over Vec with the view machinery — validation, identity check, visible-shape derivation, and composition — plus the view_entries! construction macro. Move ViewEntry into the module and re-export it from traits so existing import paths keep working. Pure, allocation-light spec logic with unit tests; no behavior change to band construction or reads yet. --- rust/sedona-raster/src/lib.rs | 1 + rust/sedona-raster/src/traits.rs | 25 +- rust/sedona-raster/src/view_entries.rs | 594 +++++++++++++++++++++++++ 3 files changed, 600 insertions(+), 20 deletions(-) create mode 100644 rust/sedona-raster/src/view_entries.rs diff --git a/rust/sedona-raster/src/lib.rs b/rust/sedona-raster/src/lib.rs index d5398804d..d22ef798c 100644 --- a/rust/sedona-raster/src/lib.rs +++ b/rust/sedona-raster/src/lib.rs @@ -21,3 +21,4 @@ pub mod builder; pub mod display; pub mod raster_loader; pub mod traits; +pub mod view_entries; diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index 22f5fbccf..3ead932d5 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -118,26 +118,11 @@ impl<'a> NdBuffer<'a> { } } -/// One per-dimension entry of a band's logical view. Describes how a -/// visible axis maps onto an axis of the underlying source buffer. -/// -/// - `source_axis`: index into the band's `source_shape` that this visible -/// axis reads from. Across a band's full view, `source_axis` values must -/// form a permutation of `0..ndim` — axis-dropping and axis-introducing -/// views are not supported today. -/// - `start`: starting index along the source axis (in elements, not bytes). -/// - `step`: stride between consecutive visible elements along the source -/// axis. `step == 0` means broadcast (the same source element is -/// exposed `steps` times); negative `step` means reverse iteration. -/// - `steps`: number of visible elements along this axis. `steps == 0` is -/// allowed (empty axis). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ViewEntry { - pub source_axis: i64, - pub start: i64, - pub step: i64, - pub steps: i64, -} +// `ViewEntry` (and the `ViewEntries` newtype with its validation / +// composition / visible-shape machinery) lives in `view_entries.rs`. +// Re-exported here so the `crate::traits::ViewEntry` import path keeps +// working. +pub use crate::view_entries::ViewEntry; /// Concrete raster metadata returned by `RasterRef::metadata()`. /// diff --git a/rust/sedona-raster/src/view_entries.rs b/rust/sedona-raster/src/view_entries.rs new file mode 100644 index 000000000..8411704f6 --- /dev/null +++ b/rust/sedona-raster/src/view_entries.rs @@ -0,0 +1,594 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! View entries — per-axis slice / broadcast / permutation specs for an +//! N-D raster band. The [`ViewEntries`] newtype owns a `Vec` +//! and is the entry point for all view-machinery operations +//! (validation, identity check, visible-shape derivation, composition). + +use arrow_schema::ArrowError; + +/// One per-dimension entry of a band's logical view. Describes how a +/// visible axis maps onto an axis of the underlying source buffer. +/// +/// - `source_axis`: index into the band's `source_shape` that this visible +/// axis reads from. Across a band's full view, `source_axis` values must +/// form a permutation of `0..ndim` — axis-dropping and axis-introducing +/// views are not supported today. +/// - `start`: starting index along the source axis (in elements, not bytes). +/// - `step`: stride between consecutive visible elements along the source +/// axis. `step == 0` means broadcast (the same source element is +/// exposed `steps` times); negative `step` means reverse iteration. +/// - `steps`: number of visible elements along this axis. `steps == 0` is +/// allowed (empty axis). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ViewEntry { + pub source_axis: i64, + pub start: i64, + pub step: i64, + pub steps: i64, +} + +/// A band's full view, one [`ViewEntry`] per visible axis. +/// +/// Use [`ViewEntries::new`] to wrap an existing `Vec` or +/// [`ViewEntries::identity_for_shape`] to build the canonical +/// no-op view over a given source shape. Operations on the view live +/// as methods on this type — `validate`, `visible_shape`, +/// `is_identity`, `compose`. The newtype gives Vec a name +/// and a single place to attach helpers and tests. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct ViewEntries(Vec); + +impl ViewEntries { + /// Wrap a pre-built vector of entries. The view is not validated + /// here — call [`Self::validate`] before relying on it. + pub fn new(inner: Vec) -> Self { + Self(inner) + } + + /// Build the canonical identity view over `source_shape`: + /// `[(source_axis=k, start=0, step=1, steps=source_shape[k])]` for + /// each k. Always self-validates. + pub fn identity_for_shape(source_shape: &[i64]) -> Self { + Self( + source_shape + .iter() + .enumerate() + .map(|(k, &s)| ViewEntry { + source_axis: k as i64, + start: 0, + step: 1, + steps: s, + }) + .collect(), + ) + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn as_slice(&self) -> &[ViewEntry] { + &self.0 + } + + pub fn iter(&self) -> std::slice::Iter<'_, ViewEntry> { + self.0.iter() + } + + /// Visible shape derived from `[v.steps for v in self]`. `validate` + /// guarantees `steps >= 0`, so callers can treat the result as + /// non-negative after a successful validation. + pub fn visible_shape(&self) -> Vec { + self.0.iter().map(|v| v.steps).collect() + } + + /// True iff this view is the canonical identity over a C-order + /// source buffer: every visible axis `k` is a no-op (no + /// permutation, no slice offset, no stride/reverse, full coverage + /// of the corresponding source axis). + /// + /// Identity views are always contiguous, so the reader borrows the + /// underlying `data` column directly through `NdBuffer::as_contiguous()` + /// with no allocation or copy. Non-identity views may still be + /// contiguous (e.g. an outer-axis slice); strided ones are rejected by + /// `as_contiguous()` and must be repacked via an explicit plan node. + pub fn is_identity(&self, source_shape: &[i64]) -> bool { + if self.0.len() != source_shape.len() { + return false; + } + self.0.iter().enumerate().all(|(k, v)| { + v.source_axis == k as i64 && v.start == 0 && v.step == 1 && v.steps == source_shape[k] + }) + } + + /// Validate against a band's `source_shape`. `Ok(())` iff: + /// + /// - `self.len() == source_shape.len()`. + /// - `source_axis` values across `self` form a permutation of + /// `0..source_shape.len()` (no axis duplicated, none missing). + /// - Every `source_shape[k] >= 0`. + /// - Every `steps >= 0`. + /// - When `steps > 0`: `start ∈ [0, source_shape[source_axis])`, + /// and when `step != 0` the last addressed element + /// `start + (steps - 1) * step` is also in that range. + pub fn validate(&self, source_shape: &[i64]) -> Result<(), ArrowError> { + let ndim = source_shape.len(); + if self.0.len() != ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "view length ({}) must equal source_shape length ({ndim})", + self.0.len() + ))); + } + let mut seen = vec![false; ndim]; + for (k, v) in self.0.iter().enumerate() { + if v.source_axis < 0 || (v.source_axis as usize) >= ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].source_axis = {} is out of range [0, {ndim})", + v.source_axis + ))); + } + let sa = v.source_axis as usize; + if seen[sa] { + return Err(ArrowError::InvalidArgumentError(format!( + "view source_axis values must be a permutation of 0..{ndim}; \ + axis {sa} appears more than once" + ))); + } + seen[sa] = true; + + if v.steps < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].steps = {} must be >= 0", + v.steps + ))); + } + if source_shape[sa] < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "source_shape[{sa}] = {} must be >= 0", + source_shape[sa] + ))); + } + if v.steps > 0 { + let s = source_shape[sa]; + if v.start < 0 || v.start >= s { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}].start = {} is out of range [0, {s}) for source axis {sa}", + v.start + ))); + } + if v.step != 0 { + // Checked arithmetic: a malicious or corrupted view can't + // wrap (steps-1)*step or start+… into an in-range value and + // bypass the bound check. Any overflow is a validation + // error. + let last = (v.steps - 1) + .checked_mul(v.step) + .and_then(|d| v.start.checked_add(d)) + .ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "view[{k}] last-element index overflows i64 for \ + start={}, step={}, steps={} on source axis {sa}", + v.start, v.step, v.steps + )) + })?; + if last < 0 || last >= s { + return Err(ArrowError::InvalidArgumentError(format!( + "view[{k}] addresses element {last} which is out of range \ + [0, {s}) for source axis {sa}" + ))); + } + } + } + } + Ok(()) + } + + /// Compose `next` (a view spec over `self`'s visible axes) with + /// `self` (a view spec over a band's `source_shape`), producing a + /// single view over the same `source_shape` that represents the + /// result of viewing through `next`. + /// + /// `next[k].source_axis` indexes into `self`'s visible axes + /// (`0..self.len()`), NOT into the underlying source axes. This + /// lets callers (e.g. lazy slicing) describe the new view in terms + /// of what they see, without knowing the input's internal layout. + /// + /// Math: for each output axis `k`, walking through `self` resolves + /// the source axis and translates start/step: + /// - `out.source_axis = self[next.source_axis].source_axis` + /// - `out.start = self.start + next.start * self.step` + /// - `out.step = next.step * self.step` + /// - `out.steps = next.steps` + /// + /// Uses checked arithmetic at every step. The caller must + /// [`validate`] the result against the source shape before use. + /// + /// [`validate`]: Self::validate + pub fn compose(&self, next: &Self) -> Result { + let mut out = Vec::with_capacity(next.0.len()); + for (k, next_entry) in next.0.iter().enumerate() { + if next_entry.source_axis < 0 || (next_entry.source_axis as usize) >= self.0.len() { + return Err(ArrowError::InvalidArgumentError(format!( + "compose: next[{k}].source_axis ({}) is out of range \ + for input with {} visible axes", + next_entry.source_axis, + self.0.len() + ))); + } + let input = &self.0[next_entry.source_axis as usize]; + let step = next_entry.step.checked_mul(input.step).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "compose: step product overflows i64 at axis {k} \ + (next.step={}, input.step={})", + next_entry.step, input.step + )) + })?; + let start_offset = next_entry.start.checked_mul(input.step).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "compose: next.start * input.step overflows i64 at axis {k} \ + (next.start={}, input.step={})", + next_entry.start, input.step + )) + })?; + let start = input.start.checked_add(start_offset).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "compose: composed start overflows i64 at axis {k} \ + (input.start={}, offset={})", + input.start, start_offset + )) + })?; + out.push(ViewEntry { + source_axis: input.source_axis, + start, + step, + steps: next_entry.steps, + }); + } + Ok(Self(out)) + } +} + +impl From> for ViewEntries { + fn from(inner: Vec) -> Self { + Self(inner) + } +} + +impl AsRef<[ViewEntry]> for ViewEntries { + fn as_ref(&self) -> &[ViewEntry] { + &self.0 + } +} + +impl std::ops::Index for ViewEntries { + type Output = ViewEntry; + fn index(&self, i: usize) -> &ViewEntry { + &self.0[i] + } +} + +impl<'a> IntoIterator for &'a ViewEntries { + type Item = &'a ViewEntry; + type IntoIter = std::slice::Iter<'a, ViewEntry>; + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +/// Build a [`ViewEntries`] from numpy-style slice syntax. Each `a:b` +/// becomes a `ViewEntry { source_axis: k, start: a, step: 1, steps: b - a }` +/// on the next axis. Step ≠ 1 isn't supported by the macro — use +/// the struct constructor directly for that. +/// +/// ```ignore +/// let v = view_entries![0:4, 1:5]; +/// // → axis 0: start=0, step=1, steps=4 +/// // → axis 1: start=1, step=1, steps=4 +/// ``` +#[macro_export] +macro_rules! view_entries { + ($($start:literal : $stop:literal),* $(,)?) => { + $crate::view_entries::ViewEntries::new({ + #[allow(unused_mut)] + let mut entries: Vec<$crate::view_entries::ViewEntry> = Vec::new(); + // Pre-increment so the last write into `axis` is always read by + // the following push — no dangling assignment on the final entry, + // and no warning on single-entry uses. + #[allow(unused_mut)] + let mut axis: i64 = -1; + $( + axis += 1; + entries.push($crate::view_entries::ViewEntry { + source_axis: axis, + start: $start, + step: 1, + steps: ($stop as i64) - ($start as i64), + }); + )* + entries + }) + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + fn ve(source_axis: i64, start: i64, step: i64, steps: i64) -> ViewEntry { + ViewEntry { + source_axis, + start, + step, + steps, + } + } + + fn entries(v: &[ViewEntry]) -> ViewEntries { + ViewEntries::new(v.to_vec()) + } + + // ---- macro ---- + + #[test] + fn macro_expands_python_style_slice_to_view_entries() { + let v = view_entries![0:4, 1:5]; + assert_eq!(v.len(), 2); + assert_eq!(v[0], ve(0, 0, 1, 4)); + assert_eq!(v[1], ve(1, 1, 1, 4)); + } + + // ---- visible_shape ---- + + #[test] + fn visible_shape_pulls_steps_from_each_entry() { + let v = entries(&[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert_eq!(v.visible_shape(), vec![4, 5]); + } + + // ---- identity_for_shape ---- + + #[test] + fn identity_for_shape_round_trips_through_is_identity() { + let v = ViewEntries::identity_for_shape(&[4, 5, 6]); + assert!(v.is_identity(&[4, 5, 6])); + } + + // ---- validate ---- + + #[test] + fn validate_accepts_identity() { + let v = entries(&[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + v.validate(&[4, 5]).unwrap(); + } + + #[test] + fn validate_rejects_length_mismatch() { + let v = entries(&[ve(0, 0, 1, 4)]); + let err = v.validate(&[4, 5]).unwrap_err(); + assert!(err.to_string().contains("must equal"), "got {err}"); + } + + #[test] + fn validate_rejects_negative_source_axis() { + let v = entries(&[ve(-1, 0, 1, 4)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("source_axis"), "got {err}"); + } + + #[test] + fn validate_rejects_oob_source_axis() { + let v = entries(&[ve(2, 0, 1, 4)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("source_axis"), "got {err}"); + } + + #[test] + fn validate_rejects_duplicate_source_axis() { + let v = entries(&[ve(0, 0, 1, 2), ve(0, 0, 1, 2)]); + let err = v.validate(&[2, 3]).unwrap_err(); + assert!(err.to_string().contains("permutation"), "got {err}"); + } + + #[test] + fn validate_rejects_negative_steps() { + let v = entries(&[ve(0, 0, 1, -1)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("steps"), "got {err}"); + } + + #[test] + fn validate_rejects_negative_start() { + let v = entries(&[ve(0, -1, 1, 1)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_rejects_start_at_source_size() { + let v = entries(&[ve(0, 4, 1, 1)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_rejects_negative_step_underrun() { + // start=0, step=-1, steps=2 addresses element 0 then -1 → underrun. + let v = entries(&[ve(0, 0, -1, 2)]); + let err = v.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("out of range"), "got {err}"); + } + + #[test] + fn validate_accepts_negative_step_full_reverse() { + // start=3, step=-1, steps=4 addresses 3,2,1,0 — all in range. + let v = entries(&[ve(0, 3, -1, 4)]); + v.validate(&[4]).unwrap(); + } + + #[test] + fn validate_accepts_steps_zero_with_unconstrained_start() { + let v = entries(&[ve(0, 999, 1, 0)]); + v.validate(&[4]).unwrap(); + } + + #[test] + fn validate_steps_one_only_checks_start() { + let v = entries(&[ve(0, 3, 999, 1)]); + v.validate(&[4]).unwrap(); + } + + #[test] + fn validate_step_zero_broadcast_within_bounds() { + let v_ok = entries(&[ve(0, 3, 0, 100)]); + v_ok.validate(&[4]).unwrap(); + let v_bad = entries(&[ve(0, 4, 0, 1)]); + let err = v_bad.validate(&[4]).unwrap_err(); + assert!(err.to_string().contains("start"), "got {err}"); + } + + #[test] + fn validate_permutation_with_slice_ok() { + let v = entries(&[ve(1, 0, 1, 3), ve(0, 1, 1, 1)]); + v.validate(&[2, 3]).unwrap(); + } + + #[test] + fn validate_rejects_i64_overflow_in_last_element() { + let v = entries(&[ve(0, 10, i64::MAX, 3)]); + let err = v.validate(&[100]).unwrap_err(); + assert!(err.to_string().contains("overflow"), "got: {err}"); + } + + #[test] + fn validate_rejects_i64_overflow_in_start_plus_offset() { + let v = entries(&[ve(0, 2, 1, i64::MAX)]); + let err = v.validate(&[100]).unwrap_err(); + assert!(err.to_string().contains("overflow"), "got: {err}"); + } + + // ---- is_identity ---- + + #[test] + fn is_identity_accepts_canonical_identity() { + let v = entries(&[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]); + assert!(v.is_identity(&[4, 5])); + } + + #[test] + fn is_identity_rejects_axis_permutation_even_with_identity_step() { + // start/step/steps all look identity-shaped, but source_axis is + // permuted. Indistinguishable from identity in start/step terms — + // the source_axis check is the only thing that catches it. + let v = entries(&[ve(1, 0, 1, 5), ve(0, 0, 1, 4)]); + assert!(!v.is_identity(&[4, 5])); + } + + #[test] + fn is_identity_rejects_partial_axis_coverage() { + let v = entries(&[ve(0, 0, 1, 3), ve(1, 0, 1, 5)]); + assert!(!v.is_identity(&[4, 5])); + } + + #[test] + fn is_identity_rejects_non_zero_start() { + let v = entries(&[ve(0, 1, 1, 3), ve(1, 0, 1, 5)]); + assert!(!v.is_identity(&[4, 5])); + } + + #[test] + fn is_identity_rejects_non_unit_step() { + let v = entries(&[ve(0, 0, 2, 2), ve(1, 0, 1, 5)]); + assert!(!v.is_identity(&[4, 5])); + } + + #[test] + fn is_identity_rejects_length_mismatch() { + let v = entries(&[ve(0, 0, 1, 4)]); + assert!(!v.is_identity(&[4, 5])); + } + + // ---- compose ---- + + #[test] + fn compose_identity_input_returns_next_unchanged() { + let identity = entries(&[ve(0, 0, 1, 8)]); + let next = entries(&[ve(0, 2, 3, 2)]); + let composed = identity.compose(&next).unwrap(); + assert_eq!(composed.len(), 1); + assert_eq!(composed[0], ve(0, 2, 3, 2)); + } + + #[test] + fn compose_slice_of_slice_collapses_into_one_view() { + // Input view samples source[0..8] at indices 1, 3, 5. + // Next view samples that visible region at index 1 only. + // Composed: samples source axis 0 at index 3 (= 1 + 1*2), step=2, steps=1. + let input = entries(&[ve(0, 1, 2, 3)]); + let next = entries(&[ve(0, 1, 1, 1)]); + let composed = input.compose(&next).unwrap(); + assert_eq!(composed.as_slice(), &[ve(0, 3, 2, 1)]); + } + + #[test] + fn compose_preserves_source_axis_through_permutation() { + // Input transposes axes: visible 0 = source 1, visible 1 = source 0. + // Next picks axis 1 (which is source axis 0), so the composed + // view's sole entry must have source_axis = 0. + let input = entries(&[ve(1, 0, 1, 5), ve(0, 0, 1, 4)]); + let next = entries(&[ve(1, 2, 1, 2)]); + let composed = input.compose(&next).unwrap(); + assert_eq!(composed.len(), 1); + assert_eq!(composed[0].source_axis, 0); + assert_eq!(composed[0].start, 2); + assert_eq!(composed[0].step, 1); + assert_eq!(composed[0].steps, 2); + } + + #[test] + fn compose_step_multiplies() { + // Input step=3, next step=2 → composed step=6. + let input = entries(&[ve(0, 0, 3, 5)]); + let next = entries(&[ve(0, 1, 2, 2)]); + let composed = input.compose(&next).unwrap(); + assert_eq!(composed.as_slice(), &[ve(0, 3, 6, 2)]); + } + + #[test] + fn compose_rejects_next_source_axis_out_of_range() { + let input = entries(&[ve(0, 0, 1, 4)]); + let next = entries(&[ve(2, 0, 1, 1)]); + let err = input.compose(&next).unwrap_err(); + assert!(err.to_string().contains("out of range"), "got {err}"); + } + + #[test] + fn compose_rejects_step_product_overflow() { + let input = entries(&[ve(0, 0, i64::MAX, 2)]); + let next = entries(&[ve(0, 0, 2, 1)]); + let err = input.compose(&next).unwrap_err(); + assert!( + err.to_string().contains("step product overflows"), + "got {err}" + ); + } +} From 29e9d8941bc83cb3b0c319aad2e267ab44575241 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 9 Jun 2026 13:47:49 -0700 Subject: [PATCH 2/2] feat(raster): wire ViewEntries into band construction and reads Integrate the view-spec layer into the raster type. The reader's band() composes a band's view into byte strides + offset (with overflow and buffer-bounds checks) so a non-identity view decodes instead of being rejected; nd_buffer() exposes the strided region and as_contiguous() borrows it zero-copy when packed. The builder gains start_band_with_view and with_view to construct sliced / broadcast / permuted / stacked views. Stacked on the ViewEntries module PR. --- rust/sedona-raster/src/array.rs | 734 +++++++++++++++++++----- rust/sedona-raster/src/builder.rs | 922 +++++++++++++++++++++++++++++- rust/sedona-raster/src/traits.rs | 160 +++++- 3 files changed, 1637 insertions(+), 179 deletions(-) diff --git a/rust/sedona-raster/src/array.rs b/rust/sedona-raster/src/array.rs index 0bc42607e..ca3a454ff 100644 --- a/rust/sedona-raster/src/array.rs +++ b/rust/sedona-raster/src/array.rs @@ -22,13 +22,15 @@ use arrow_array::{ use arrow_schema::ArrowError; use crate::traits::{BandRef, Bands, NdBuffer, RasterRef, ViewEntry}; -use sedona_schema::raster::{band_indices, raster_indices, BandDataType}; +use crate::view_entries::ViewEntries; +use sedona_schema::raster::{band_indices, band_view_indices, raster_indices, BandDataType}; /// Arrow-backed implementation of BandRef for a single band within a raster. /// -/// Today this handles only the canonical identity view: `view_entries` is -/// synthesised from `source_shape`, `visible_shape == source_shape`, -/// and `byte_strides` are plain C-order strides with `byte_offset = 0`. +/// View-derived layout (`visible_shape`, `byte_strides`, `byte_offset`, +/// `is_identity_view`) is computed once at construction and reused by every +/// accessor. Source-shape and dim-name slices are borrowed directly from +/// the underlying Arrow buffers. struct BandRefImpl<'a> { dim_names_list: &'a ListArray, dim_names_values: &'a StringArray, @@ -42,14 +44,20 @@ struct BandRefImpl<'a> { band_row: usize, /// Resolved at construction so accessors don't re-decode the discriminant. data_type: BandDataType, - /// Per-visible-axis view, length = ndim. Always identity today. - view_entries: Vec, - /// Visible shape, length = ndim. Equals `source_shape` today. + /// Per-visible-axis view, length = ndim + view_entries: ViewEntries, + /// Visible shape (== `[v.steps for v in view_entries]`), length = ndim. + /// `i64` to match `BandRef::shape()`'s return type and the surrounding + /// view-machinery arithmetic (strides, offsets). `validate_view` + /// guarantees entries are non-negative. visible_shape: Vec, - /// Byte strides per visible axis. C-order over `source_shape` today. + /// Byte strides per visible axis. May be 0 (broadcast) or negative. byte_strides: Vec, /// Byte offset into `data` of the visible region's `[0,...,0]` element. - byte_offset: u64, + /// Typed `i64` to match the surrounding stride arithmetic + /// (`byte_strides` are `i64` to allow negative steps). Always non-negative + /// by construction — `RasterRefImpl::band` asserts `>= 0` before storing. + byte_offset: i64, } impl<'a> BandRef for BandRefImpl<'a> { @@ -76,7 +84,7 @@ impl<'a> BandRef for BandRefImpl<'a> { } fn view(&self) -> &[ViewEntry] { - &self.view_entries + self.view_entries.as_slice() } fn data_type(&self) -> BandDataType { @@ -135,6 +143,80 @@ impl<'a> BandRef for BandRefImpl<'a> { } } +/// Verify that every byte the view can address lies within `buffer_len` +/// and that every stride × index product (and their accumulations) fits +/// in i64. +/// +/// **Load-bearing**: this is the *only* bound check between the view's +/// byte-stride description and the data buffer. Stride-aware consumers walk +/// the buffer with plain-arithmetic indexing and rely on this precheck +/// having proven every addressed byte is in range. Two corruption modes it +/// catches: +/// +/// 1. A writer that lies about `source_shape` (Arrow column shorter +/// than the view promises). +/// 2. A composed view whose stride × index product or accumulated +/// offset overflows i64 even though `validate_view` accepted the +/// per-entry bounds. +/// +/// Empty visible regions (any axis with `steps == 0`) address no bytes +/// and skip the check. +fn check_view_buffer_bounds( + buffer_len: usize, + visible_shape: &[i64], + byte_strides: &[i64], + byte_offset: i64, + dtype_size: usize, +) -> Result<(), ArrowError> { + if visible_shape.contains(&0) { + return Ok(()); + } + let mut min_offset = byte_offset; + let mut max_offset = byte_offset; + for (k, &stride) in byte_strides.iter().enumerate() { + // `validate_view` guarantees `steps >= 0`, so `visible_shape[k] >= 0` + // and `visible_shape[k] - 1` is in-range for any non-empty axis. + let last_idx = visible_shape[k] - 1; + let contribution = last_idx.checked_mul(stride).ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "max addressable offset on axis {k} overflows i64" + )) + })?; + if contribution > 0 { + max_offset = max_offset.checked_add(contribution).ok_or_else(|| { + ArrowError::InvalidArgumentError( + "max addressable offset accumulation overflows i64".to_string(), + ) + })?; + } else if contribution < 0 { + min_offset = min_offset.checked_add(contribution).ok_or_else(|| { + ArrowError::InvalidArgumentError( + "min addressable offset accumulation overflows i64".to_string(), + ) + })?; + } + } + let last_byte = max_offset + .checked_add(dtype_size as i64 - 1) + .ok_or_else(|| { + ArrowError::InvalidArgumentError("max addressable byte overflows i64".to_string()) + })?; + if min_offset < 0 { + return Err(ArrowError::InvalidArgumentError(format!( + "view addresses out-of-bounds negative byte offset {min_offset}" + ))); + } + let buffer_len_i64 = i64::try_from(buffer_len).map_err(|_| { + ArrowError::InvalidArgumentError(format!("buffer length {buffer_len} exceeds i64::MAX")) + })?; + if last_byte >= buffer_len_i64 { + return Err(ArrowError::InvalidArgumentError(format!( + "view addresses byte {last_byte} but buffer is only {buffer_len} bytes" + ))); + } + Ok(()) +} + /// Arrow-backed implementation of RasterRef for a single raster row. /// /// Holds flat references to the underlying Arrow arrays so the impl does @@ -159,6 +241,10 @@ pub struct RasterRefImpl<'a> { band_datatype_array: &'a UInt32Array, band_nodata_array: &'a BinaryArray, band_view_list: &'a ListArray, + band_view_source_axis: &'a Int64Array, + band_view_start: &'a Int64Array, + band_view_step: &'a Int64Array, + band_view_steps: &'a Int64Array, band_outdb_uri_array: &'a StringArray, band_outdb_format_array: &'a StringViewArray, band_data_array: &'a BinaryViewArray, @@ -174,6 +260,124 @@ impl<'a> RasterRefImpl<'a> { Some(self.crs_array.value(self.raster_index)) } } + + /// Read the band's source_shape and convert u64 → i64 with overflow check. + /// + /// Rejects 0-D bands (empty source_shape) at the read boundary: the schema + /// doesn't forbid them outright but every consumer assumes ndim >= 1. Every + /// downstream consumer in the view machinery wants i64 (matches ViewEntry's + /// signed fields and the stride arithmetic); converting once here keeps the + /// rest of band() free of mixed-signedness gymnastics. + fn read_band_source_shape(&self, band_row: usize) -> Result, ArrowError> { + let ss_start = self.band_source_shape_list.value_offsets()[band_row] as usize; + let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1] as usize; + let source_shape: &[i64] = &self.band_source_shape_values.values()[ss_start..ss_end]; + + if source_shape.is_empty() { + return Err(ArrowError::ExternalError(Box::new( + sedona_common::sedona_internal_datafusion_err!( + "band {band_row} has empty source_shape; ndim must be >= 1" + ), + ))); + } + + Ok(source_shape.to_vec()) + } + + /// Resolve the band's data-type discriminant or fail. An unknown + /// discriminant is schema-corruption, not user data. + fn read_band_data_type_or_err(&self, band_row: usize) -> Result { + let data_type_value = self.band_datatype_array.value(band_row); + BandDataType::try_from_u32(data_type_value).ok_or_else(|| { + ArrowError::ExternalError(Box::new(sedona_common::sedona_internal_datafusion_err!( + "band {band_row} has unknown data_type discriminant {data_type_value}" + ))) + }) + } + + /// Read the band's view-entry list. Identity is encoded exclusively as a + /// NULL row — an empty (non-null, zero-length) list is malformed and + /// rejected later by [`ViewEntries::validate`]. The schema (see + /// `RasterSchema::view_type`) documents this contract. + fn read_band_view_entries( + &self, + band_row: usize, + source_shape: &[i64], + ) -> Result { + if self.band_view_list.is_null(band_row) { + return Ok(ViewEntries::identity_for_shape(source_shape)); + } + let v_start = self.band_view_list.value_offsets()[band_row] as usize; + let v_end = self.band_view_list.value_offsets()[band_row + 1] as usize; + Ok(ViewEntries::new( + (v_start..v_end) + .map(|i| ViewEntry { + source_axis: self.band_view_source_axis.value(i), + start: self.band_view_start.value(i), + step: self.band_view_step.value(i), + steps: self.band_view_steps.value(i), + }) + .collect(), + )) + } +} + +/// Compose a validated view against a source shape into C-order byte strides +/// and a byte offset. +/// +/// C-order source strides are dtype-scaled cumulative products of `source_shape`, +/// then each visible axis's stride/offset is composed as `view.step * +/// src_stride` / `view.start * src_stride`. All arithmetic is checked: even +/// after `ViewEntries::validate`, the cumulative byte product can overflow +/// `i64` for cosmically large shapes, and a corrupt source_shape whose product +/// wraps would otherwise silently pass downstream bound checks. The returned +/// `byte_offset` is non-negative by construction (start >= 0, src_stride > 0); +/// the defensive sign check guards future refactors that might break that +/// invariant before we cross the i64 → u64 boundary in `nd_buffer()`. +fn compose_byte_strides( + band_row: usize, + source_shape: &[i64], + view_entries: &ViewEntries, + dtype_byte_size: usize, +) -> Result<(Vec, i64), ArrowError> { + let overflow_err = |msg: &str| { + ArrowError::ExternalError(Box::new(sedona_common::sedona_internal_datafusion_err!( + "band {band_row}: {msg}" + ))) + }; + + let dtype_size = dtype_byte_size as i64; + + let mut source_strides_bytes = vec![0i64; source_shape.len()]; + source_strides_bytes[source_shape.len() - 1] = dtype_size; + for k in (0..source_shape.len() - 1).rev() { + source_strides_bytes[k] = source_strides_bytes[k + 1] + .checked_mul(source_shape[k + 1]) + .ok_or_else(|| overflow_err("source-stride product overflows i64"))?; + } + + let mut byte_strides = vec![0i64; view_entries.len()]; + let mut byte_offset: i64 = 0; + for (k, v) in view_entries.iter().enumerate() { + let src_stride = source_strides_bytes[v.source_axis as usize]; + byte_strides[k] = v + .step + .checked_mul(src_stride) + .ok_or_else(|| overflow_err("view step × source-stride overflows i64"))?; + let start_off = v + .start + .checked_mul(src_stride) + .ok_or_else(|| overflow_err("view start × source-stride overflows i64"))?; + byte_offset = byte_offset + .checked_add(start_off) + .ok_or_else(|| overflow_err("view offset accumulation overflows i64"))?; + } + + if byte_offset < 0 { + return Err(overflow_err("composed byte_offset is negative")); + } + + Ok((byte_strides, byte_offset)) } impl<'a> RasterRef for RasterRefImpl<'a> { @@ -195,67 +399,44 @@ impl<'a> RasterRef for RasterRefImpl<'a> { let start = self.bands_list.value_offsets()[self.raster_index] as usize; let band_row = start + index; - // Read source shape slice. - let ss_start = self.band_source_shape_list.value_offsets()[band_row] as usize; - let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1] as usize; - let source_shape: &[i64] = &self.band_source_shape_values.values()[ss_start..ss_end]; - - // Reject 0-D bands at the read boundary. Schema doesn't forbid them - // outright but every consumer assumes ndim >= 1. - if source_shape.is_empty() { - return Err(ArrowError::ExternalError(Box::new( - sedona_common::sedona_internal_datafusion_err!( - "band {band_row} has empty source_shape; ndim must be >= 1" - ), - ))); - } - - // Resolve data type up front; an unknown discriminant is a - // schema-corruption bug, not user data, so failing the band loudly - // here is appropriate. - let data_type_value = self.band_datatype_array.value(band_row); - let data_type = BandDataType::try_from_u32(data_type_value).ok_or_else(|| { + let source_shape = self.read_band_source_shape(band_row)?; + let data_type = self.read_band_data_type_or_err(band_row)?; + let view_entries = self.read_band_view_entries(band_row, &source_shape)?; + view_entries.validate(&source_shape).map_err(|e| { ArrowError::ExternalError(Box::new(sedona_common::sedona_internal_datafusion_err!( - "band {band_row} has unknown data_type discriminant {data_type_value}" + "band {band_row} has malformed view: {e}" ))) })?; - // Only the canonical identity view (null view row) is written today. - // A non-null view row would require the view → byte-stride composition - // path, which is not yet implemented. Surface it loudly here rather - // than silently rejecting the band, so callers see the standardised - // SedonaDB-internal-error framing. - // - // This rejection is also the guardrail keeping `RS_EnsureLoaded` - // correct: it drops `view()` on rebuild, so it would corrupt a - // viewed band. When this comes off (view composition), the loader - // request/response must round-trip the view — tracked in - // . - if !self.band_view_list.is_null(band_row) { - return Err(ArrowError::ExternalError(Box::new( - sedona_common::sedona_internal_datafusion_err!( - "non-null view row at band {band_row}: view composition is not yet implemented" - ), - ))); - } - let view_entries: Vec = source_shape - .iter() - .enumerate() - .map(|(i, &s)| ViewEntry { - source_axis: i as i64, - start: 0, - step: 1, - steps: s, - }) - .collect(); - - let visible_shape: Vec = source_shape.to_vec(); - - let dtype_size = data_type.byte_size() as i64; - let mut byte_strides = vec![0i64; source_shape.len()]; - byte_strides[source_shape.len() - 1] = dtype_size; - for k in (0..source_shape.len() - 1).rev() { - byte_strides[k] = byte_strides[k + 1] * source_shape[k + 1]; + let visible_shape = view_entries.visible_shape(); + let (byte_strides, byte_offset) = compose_byte_strides( + band_row, + &source_shape, + &view_entries, + data_type.byte_size(), + )?; + + // For InDb bands, verify the underlying data column is long enough + // to cover every byte the view can address. The view-machinery + // validation above doesn't know the actual `data` BinaryView + // length — a writer that lies about source_shape vs the bytes + // written would otherwise slip through and panic later when a + // consumer walks the strided buffer. OutDb bands skip this check: + // their data column is empty by design. + let data_bytes = self.band_data_array.value(band_row); + if !data_bytes.is_empty() { + check_view_buffer_bounds( + data_bytes.len(), + &visible_shape, + &byte_strides, + byte_offset, + data_type.byte_size(), + ) + .map_err(|e| { + ArrowError::ExternalError(Box::new(sedona_common::sedona_internal_datafusion_err!( + "band {band_row}: view-buffer bounds check failed: {e}" + ))) + })?; } Ok(Box::new(BandRefImpl { @@ -272,7 +453,7 @@ impl<'a> RasterRef for RasterRefImpl<'a> { view_entries, visible_shape, byte_strides, - byte_offset: 0, + byte_offset, })) } @@ -394,6 +575,10 @@ pub struct RasterStructArray<'a> { band_datatype_array: &'a UInt32Array, band_nodata_array: &'a BinaryArray, band_view_list: &'a ListArray, + band_view_source_axis: &'a Int64Array, + band_view_start: &'a Int64Array, + band_view_step: &'a Int64Array, + band_view_steps: &'a Int64Array, band_outdb_uri_array: &'a StringArray, band_outdb_format_array: &'a StringViewArray, band_data_array: &'a BinaryViewArray, @@ -493,6 +678,31 @@ impl<'a> RasterStructArray<'a> { .as_any() .downcast_ref::() .unwrap(); + let band_view_struct = band_view_list + .values() + .as_any() + .downcast_ref::() + .unwrap(); + let band_view_source_axis = band_view_struct + .column(band_view_indices::SOURCE_AXIS) + .as_any() + .downcast_ref::() + .unwrap(); + let band_view_start = band_view_struct + .column(band_view_indices::START) + .as_any() + .downcast_ref::() + .unwrap(); + let band_view_step = band_view_struct + .column(band_view_indices::STEP) + .as_any() + .downcast_ref::() + .unwrap(); + let band_view_steps = band_view_struct + .column(band_view_indices::STEPS) + .as_any() + .downcast_ref::() + .unwrap(); let band_outdb_uri_array = bands_struct .column(band_indices::OUTDB_URI) .as_any() @@ -527,6 +737,10 @@ impl<'a> RasterStructArray<'a> { band_datatype_array, band_nodata_array, band_view_list, + band_view_source_axis, + band_view_start, + band_view_step, + band_view_steps, band_outdb_uri_array, band_outdb_format_array, band_data_array, @@ -570,6 +784,10 @@ impl<'a> RasterStructArray<'a> { band_datatype_array: self.band_datatype_array, band_nodata_array: self.band_nodata_array, band_view_list: self.band_view_list, + band_view_source_axis: self.band_view_source_axis, + band_view_start: self.band_view_start, + band_view_step: self.band_view_step, + band_view_steps: self.band_view_steps, band_outdb_uri_array: self.band_outdb_uri_array, band_outdb_format_array: self.band_outdb_format_array, band_data_array: self.band_data_array, @@ -603,11 +821,11 @@ impl<'a> RasterStructArray<'a> { #[cfg(test)] mod tests { use super::*; - use crate::builder::RasterBuilder; - use crate::traits::{BandMetadata, RasterMetadata}; - use arrow_array::{ArrayRef, ListArray, StructArray, UInt32Array}; - use arrow_buffer::{OffsetBuffer, ScalarBuffer}; - use arrow_schema::{DataType, Fields}; + use crate::builder::{RasterBuilder, StartBandWithViewArgs}; + use crate::traits::{BandMetadata, RasterMetadata, ViewEntry}; + use arrow_array::{types::Int64Type, ArrayRef, ListArray, StructArray, UInt32Array}; + use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; + use arrow_schema::{DataType, Field, Fields}; use sedona_schema::raster::{ band_indices, raster_indices, BandDataType, RasterSchema, StorageType, }; @@ -775,20 +993,36 @@ mod tests { assert!(rasters.is_null(1)); } - /// Build a single-raster, single-band raster StructArray with the - /// canonical identity view. Used as the baseline input to the surgery - /// helpers below; callers replace one band-level column to simulate - /// schema corruption on non-view fields. - fn build_identity_raster() -> StructArray { + /// Build a single-raster, single-band raster StructArray with an explicit + /// view. Used as the input to the surgery helpers below; callers replace + /// one band-level column to simulate schema corruption. + fn build_explicit_view_raster() -> StructArray { let mut builder = RasterBuilder::new(1); let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; builder .start_raster_nd(&transform, &["x"], &[3], None) .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; builder - .start_band_nd(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[8], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) .unwrap(); - builder.band_data_writer().append_value(vec![0u8, 1, 2]); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); builder.finish().unwrap() @@ -841,11 +1075,73 @@ mod tests { ) } - // bad data_type discriminant + /// Rebuild the band view list with hand-rolled entries. `entries[i]` + /// supplies all four `(source_axis, start, step, steps)` Int64 values + /// for band-row `i`. `nulls` controls per-row validity bits — `None` + /// means every row is non-null. + fn make_band_view_list( + entries: Vec>, + nulls: Option>, + ) -> ArrayRef { + let mut offsets: Vec = vec![0]; + let mut sa: Vec = vec![]; + let mut start: Vec = vec![]; + let mut step: Vec = vec![]; + let mut steps: Vec = vec![]; + for row in &entries { + for &(a, s, k, n) in row { + sa.push(a); + start.push(s); + step.push(k); + steps.push(n); + } + offsets.push(sa.len() as i32); + } + let view_struct_fields = Fields::from(vec![ + Field::new("source_axis", DataType::Int64, false), + Field::new("start", DataType::Int64, false), + Field::new("step", DataType::Int64, false), + Field::new("steps", DataType::Int64, false), + ]); + let view_struct = StructArray::new( + view_struct_fields, + vec![ + Arc::new(arrow_array::PrimitiveArray::::from(sa)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(start)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(step)) as ArrayRef, + Arc::new(arrow_array::PrimitiveArray::::from(steps)) as ArrayRef, + ], + None, + ); + let DataType::List(view_field) = RasterSchema::view_type() else { + unreachable!() + }; + let null_buf = nulls.map(NullBuffer::from); + Arc::new(ListArray::new( + view_field, + OffsetBuffer::new(ScalarBuffer::from(offsets)), + Arc::new(view_struct), + null_buf, + )) + } + + // ---- Critical #1: malformed view entries ---- + + #[test] + fn band_returns_none_when_view_length_mismatches_source_shape() { + // source_shape has 1 dim but view encodes 2 entries. + let array = build_explicit_view_raster(); + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, 3), (0, 0, 1, 3)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_err()); + } + + // ---- Critical #2: bad data_type discriminant ---- #[test] fn band_and_band_data_type_surface_corruption_for_unknown_discriminant() { - let array = build_identity_raster(); + let array = build_explicit_view_raster(); let bad_dtype: ArrayRef = Arc::new(UInt32Array::from(vec![0xFFu32])); let mutated = replace_band_column(&array, band_indices::DATA_TYPE, bad_dtype); let rasters = RasterStructArray::new(&mutated); @@ -867,7 +1163,7 @@ mod tests { #[test] fn band_surfaces_internal_error_when_source_shape_is_empty() { - let array = build_identity_raster(); + let array = build_explicit_view_raster(); // Replace source_shape with a single empty list row. let DataType::List(ss_field) = RasterSchema::source_shape_type() else { unreachable!() @@ -889,7 +1185,100 @@ mod tests { assert!(err.to_string().contains("empty source_shape")); } - // direct fast-path tests + #[test] + fn band_surfaces_internal_error_when_data_column_shorter_than_view() { + // build_explicit_view_raster writes 8 UInt8 source bytes with view + // (start=1, step=2, steps=3) which addresses bytes 1, 3, 5. + // Inflate source_shape to [16] and the view to cover steps=10 along + // the (now nominally-larger) source axis: the byte range jumps past + // the actual 8-byte data column and the precheck must fire. + let array = build_explicit_view_raster(); + // source_shape := [16] + let new_source_shape = make_band_source_shape_list(vec![vec![16i64]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + // view := (source_axis=0, start=0, step=1, steps=10) — addresses + // bytes 0..10 but the underlying data column only has 8 bytes. + let new_view = make_band_view_list(vec![vec![(0, 0, 1, 10)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + let err = rasters.get(0).unwrap().band(0).err().unwrap(); + assert!(err.to_string().contains("SedonaDB internal error")); + assert!(err.to_string().contains("view-buffer bounds check failed")); + } + + #[test] + fn band_rejects_empty_non_null_view_row() { + // The identity view is encoded exclusively as a NULL row; a + // non-null zero-length list is malformed and must error rather + // than silently fall back to identity. (Pre-rev behaviour + // accepted it — see `RasterSchema::view_type` for the contract.) + let array = build_explicit_view_raster(); + let empty_non_null_view = make_band_view_list(vec![vec![]], Some(vec![true])); + let mutated = replace_band_column(&array, band_indices::VIEW, empty_non_null_view); + let rasters = RasterStructArray::new(&mutated); + let err = rasters.get(0).unwrap().band(0).err().unwrap(); + assert!(err.to_string().contains("view length"), "got: {err}"); + } + + // ---- Stride composition overflow guards ---- + + /// Build a band source_shape list with hand-rolled i64 entries so tests + /// can inject values that the builder's writer-side checks would refuse. + fn make_band_source_shape_list(rows: Vec>) -> ArrayRef { + let mut offsets: Vec = vec![0]; + let mut values: Vec = vec![]; + for row in &rows { + values.extend_from_slice(row); + offsets.push(values.len() as i32); + } + let DataType::List(field) = RasterSchema::source_shape_type() else { + unreachable!() + }; + Arc::new(ListArray::new( + field, + OffsetBuffer::new(ScalarBuffer::from(offsets)), + Arc::new(Int64Array::from(values)), + None, + )) + } + + #[test] + fn band_returns_none_when_source_strides_product_overflows() { + // dtype_size × Π source_shape[j>k] must not silently wrap. With a + // 3-D source_shape of `[1, 1<<32, 1<<32]` the product (1<<32) × + // (1<<32) = 1<<64 overflows i64 in the source-stride build. + let array = build_explicit_view_raster(); + let new_source_shape = + make_band_source_shape_list(vec![vec![1i64, 1i64 << 32, 1i64 << 32]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + // Pad the view to 3 entries; steps=0 on the giant axes keeps + // validate_view's start/last checks out of the casts-from-u64 path. + let new_view = + make_band_view_list(vec![vec![(0, 0, 1, 1), (1, 0, 1, 0), (2, 0, 1, 0)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_err()); + } + + #[test] + fn band_returns_none_when_view_step_times_source_stride_overflows() { + // `validate_view` bounds (steps-1)*step + start on the SOURCE axis + // but doesn't bound v.step × cumulative_byte_stride. A view with a + // small visible region but a step large enough to wrap the byte + // stride must be rejected at construction. + // + // Source `[3, 1<<60]`, dtype_size=1 (UInt8) → src_stride[0] = 1<<60. + // View on axis 0 with step=8 makes byte_strides[0] = 8 × (1<<60) = + // 1<<63 which overflows i64. The view itself only walks 1 step on + // that axis so validate_view's (steps-1)*step bound holds. + let array = build_explicit_view_raster(); + let new_source_shape = make_band_source_shape_list(vec![vec![3i64, 1i64 << 60]]); + let mutated_ss = replace_band_column(&array, band_indices::SOURCE_SHAPE, new_source_shape); + let new_view = make_band_view_list(vec![vec![(0, 0, 8, 1), (1, 0, 1, 1)]], None); + let mutated = replace_band_column(&mutated_ss, band_indices::VIEW, new_view); + let rasters = RasterStructArray::new(&mutated); + assert!(rasters.get(0).unwrap().band(0).is_err()); + } #[test] fn raster_ref_fast_paths_return_expected_values() { @@ -990,18 +1379,17 @@ mod tests { assert!(bm0.outdb_band_id().is_none()); } - // multi-band, multi-raster identity + // ---- Important #9: multi-band, multi-raster mixed identity/explicit ---- #[test] - fn multi_raster_identity_views() { - // Two rasters with multiple identity bands each. Exercises the - // `bands_list.value_offsets()` routing for every per-band lookup — - // a naive reader that forgets to add the per-raster offset would - // hand back data from the wrong band. + fn multi_raster_mixed_identity_and_explicit_views() { + // Two rasters. Raster 0 has 3 bands (identity, explicit slice, + // identity). Raster 1 has 2 bands (explicit broadcast, identity). + // bands_list.value_offsets() must correctly route each band. let mut builder = RasterBuilder::new(2); let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; - // Raster 0: three identity bands. + // Raster 0 builder .start_raster_nd(&transform, &["x"], &[3], None) .unwrap(); @@ -1011,9 +1399,25 @@ mod tests { builder.band_data_writer().append_value(vec![10u8, 20, 30]); builder.finish_band().unwrap(); builder - .start_band_nd(None, &["x"], &[3], BandDataType::UInt8, None, None, None) + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[8], + view: &[ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }], + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) .unwrap(); - builder.band_data_writer().append_value(vec![40u8, 50, 60]); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); builder.finish_band().unwrap(); builder .start_band_nd(None, &["x"], &[3], BandDataType::UInt8, None, None, None) @@ -1024,16 +1428,28 @@ mod tests { builder.finish_band().unwrap(); builder.finish_raster().unwrap(); - // Raster 1: two identity bands of a different shape. + // Raster 1 builder .start_raster_nd(&transform, &["x"], &[4], None) .unwrap(); builder - .start_band_nd(None, &["x"], &[4], BandDataType::UInt8, None, None, None) + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[1], + view: &[ViewEntry { + source_axis: 0, + start: 0, + step: 0, + steps: 4, + }], + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) .unwrap(); - builder - .band_data_writer() - .append_value(vec![42u8, 43, 44, 45]); + builder.band_data_writer().append_value(vec![42u8]); builder.finish_band().unwrap(); builder .start_band_nd(None, &["x"], &[4], BandDataType::UInt8, None, None, None) @@ -1045,61 +1461,39 @@ mod tests { let array = builder.finish().unwrap(); let rasters = RasterStructArray::new(&array); + // Raster 0 bands: identity (3), slice (3), identity (3). The identity + // bands are contiguous and borrow zero-copy; the step=2 slice is + // strided so `as_contiguous` rejects it. let r0 = rasters.get(0).unwrap(); assert_eq!(r0.num_bands(), 3); assert_eq!(r0.band(0).unwrap().shape(), &[3]); - assert_eq!( - r0.band(0) - .unwrap() - .nd_buffer() - .unwrap() - .as_contiguous() - .unwrap(), - &[10u8, 20, 30] - ); + let b0 = r0.band(0).unwrap(); + let nd0 = b0.nd_buffer().unwrap(); + assert_eq!(nd0.as_contiguous().unwrap(), &[10u8, 20, 30]); assert_eq!(r0.band(1).unwrap().shape(), &[3]); - assert_eq!( - r0.band(1) - .unwrap() - .nd_buffer() - .unwrap() - .as_contiguous() - .unwrap(), - &[40u8, 50, 60] - ); + let b1 = r0.band(1).unwrap(); + let nd1 = b1.nd_buffer().unwrap(); + assert!(!nd1.is_contiguous()); + assert!(nd1.as_contiguous().is_err()); assert_eq!(r0.band(2).unwrap().shape(), &[3]); - assert_eq!( - r0.band(2) - .unwrap() - .nd_buffer() - .unwrap() - .as_contiguous() - .unwrap(), - &[100u8, 101, 102] - ); + let b2 = r0.band(2).unwrap(); + let nd2 = b2.nd_buffer().unwrap(); + assert_eq!(nd2.as_contiguous().unwrap(), &[100u8, 101, 102]); + // Raster 1 bands: broadcast (4 copies of 42), identity (4). The + // broadcast band has a zero stride so it is non-contiguous and + // rejected; the identity band borrows zero-copy. let r1 = rasters.get(1).unwrap(); assert_eq!(r1.num_bands(), 2); assert_eq!(r1.band(0).unwrap().shape(), &[4]); - assert_eq!( - r1.band(0) - .unwrap() - .nd_buffer() - .unwrap() - .as_contiguous() - .unwrap(), - &[42u8, 43, 44, 45] - ); + let r1b0 = r1.band(0).unwrap(); + let r1nd0 = r1b0.nd_buffer().unwrap(); + assert!(!r1nd0.is_contiguous()); + assert!(r1nd0.as_contiguous().is_err()); assert_eq!(r1.band(1).unwrap().shape(), &[4]); - assert_eq!( - r1.band(1) - .unwrap() - .nd_buffer() - .unwrap() - .as_contiguous() - .unwrap(), - &[1u8, 2, 3, 4] - ); + let r1b1 = r1.band(1).unwrap(); + let r1nd1 = r1b1.nd_buffer().unwrap(); + assert_eq!(r1nd1.as_contiguous().unwrap(), &[1u8, 2, 3, 4]); // Fast paths must honour the same offsets. assert_eq!(r0.band_data_type(1), Some(BandDataType::UInt8)); @@ -1157,6 +1551,60 @@ mod tests { assert!(r1.band_nodata(0).is_none()); } + // ---- Fast-path / band(i) divergence on a corrupt view ---- + + #[test] + fn fast_paths_return_columnar_values_when_band_is_corrupt() { + // band(i) goes through validate_view and returns None for a + // malformed view; the columnar fast paths read their fields + // directly without consulting the view at all. Pin down that + // contract so a future reader doesn't accidentally couple them + // (or "fix" the divergence in either direction without us + // noticing). Also catches a regression where a fast path would + // panic instead of returning the underlying value. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + builder + .start_band_with_view(StartBandWithViewArgs { + name: Some("a"), + dim_names: &["x"], + source_shape: &[8], + view: &[ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }], + data_type: BandDataType::UInt32, + nodata: Some(&[0u8, 0, 0, 0]), + outdb_uri: Some("s3://bucket/a.tif"), + outdb_format: Some("GTiff"), + }) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 32]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + + let bad_view = make_band_view_list(vec![vec![(0, 0, 1, -1)]], None); + let mutated = replace_band_column(&array, band_indices::VIEW, bad_view); + let rasters = RasterStructArray::new(&mutated); + let r = rasters.get(0).unwrap(); + + // band(i) rejects on validate_view. + assert!(r.band(0).is_err()); + + // Fast paths still surface the underlying columnar values — + // they don't validate the view, by design. Locking that in. + assert_eq!(r.band_data_type(0), Some(BandDataType::UInt32)); + assert_eq!(r.band_outdb_uri(0), Some("s3://bucket/a.tif")); + assert_eq!(r.band_outdb_format(0), Some("GTiff")); + assert_eq!(r.band_nodata(0), Some(&[0u8, 0, 0, 0][..])); + } + #[test] fn zero_element_indb_band_classifies_as_indb() { // A band with a 0-size dim (here `time = 0`) legitimately holds 0 bytes. diff --git a/rust/sedona-raster/src/builder.rs b/rust/sedona-raster/src/builder.rs index 874791708..a6e163303 100644 --- a/rust/sedona-raster/src/builder.rs +++ b/rust/sedona-raster/src/builder.rs @@ -29,7 +29,8 @@ use std::sync::Arc; use sedona_schema::raster::{BandDataType, RasterSchema}; -use crate::traits::{BandMetadata, MetadataRef}; +use crate::traits::{BandMetadata, BandRef, MetadataRef, ViewEntry}; +use crate::view_entries::ViewEntries; /// Maximum byte length of an inline `BinaryViewArray` view. Views this short /// store their bytes in the 16-byte view itself; longer views reference a data @@ -84,6 +85,36 @@ const MAX_INLINE_VIEW_LEN: u32 = 12; /// // Get the final StructArray /// let raster_array = builder.finish().unwrap(); /// ``` +/// Arguments to [`RasterBuilder::start_band_with_view`]. Bundled into a +/// struct to keep the call site readable — eight slots is enough that +/// positional args invite mis-ordering bugs. +pub(crate) struct StartBandWithViewArgs<'a> { + pub name: Option<&'a str>, + pub dim_names: &'a [&'a str], + pub source_shape: &'a [i64], + pub view: &'a [ViewEntry], + pub data_type: BandDataType, + pub nodata: Option<&'a [u8]>, + pub outdb_uri: Option<&'a str>, + pub outdb_format: Option<&'a str>, +} + +/// Arguments to [`RasterBuilder::with_view`]. Mirrors +/// [`StartBandWithViewArgs`] minus the two fields `with_view` derives from +/// `input` (`source_shape` from `input.raw_source_shape()`, `data_type` from +/// `input.data_type()`) — accepting those from the caller would let them +/// contradict `input`. `view` here is a *delta* composed against +/// `input.view()`, not the absolute view stored on the band. +pub struct WithViewArgs<'a> { + pub name: Option<&'a str>, + pub dim_names: &'a [&'a str], + pub input: &'a dyn BandRef, + pub view: &'a [ViewEntry], + pub nodata: Option<&'a [u8]>, + pub outdb_uri: Option<&'a str>, + pub outdb_format: Option<&'a str>, +} + pub struct RasterBuilder { // Top-level raster fields crs: StringViewBuilder, @@ -387,6 +418,187 @@ impl RasterBuilder { Ok(()) } + /// Internal: write a band with an explicit view over a raw source + /// shape. Public callers should use [`Self::with_view`] instead, + /// which derives `source_shape`, validates view composition, and + /// inherits the input band's source bytes — `with_view` calls this + /// helper after composing. + /// + /// Each `ViewEntry` describes one *visible* axis in `dim_names` order: + /// `(source_axis, start, step, steps)`. Validates that: + /// - `dim_names`, `source_shape`, and `view` have equal length. + /// - Across `view`, `source_axis` values form a permutation of + /// `0..ndim` (no axis duplicated, none missing). + /// - For each entry with `steps > 0`: `start` and (when `step != 0`) + /// `start + (steps - 1) * step` are in `[0, source_shape[source_axis])`. + /// - `steps >= 0`. + pub(crate) fn start_band_with_view( + &mut self, + args: StartBandWithViewArgs<'_>, + ) -> Result<(), ArrowError> { + let StartBandWithViewArgs { + name, + dim_names, + source_shape, + view, + data_type, + nodata, + outdb_uri, + outdb_format, + } = args; + let ndim = dim_names.len(); + if ndim == 0 { + return Err(ArrowError::InvalidArgumentError( + "start_band_with_view: 0-dimensional bands are not supported".into(), + )); + } + if source_shape.len() != ndim || view.len() != ndim { + return Err(ArrowError::InvalidArgumentError(format!( + "start_band_with_view: dim_names ({}), source_shape ({}), and view ({}) \ + must all have the same length", + ndim, + source_shape.len(), + view.len() + ))); + } + + let view_entries = ViewEntries::new(view.to_vec()); + view_entries.validate(source_shape)?; + + // Write fields. + match name { + Some(n) => self.band_name.append_value(n), + None => self.band_name.append_null(), + } + + for dn in dim_names { + self.band_dim_names_values.append_value(dn); + } + let next = *self.band_dim_names_offsets.last().unwrap() + ndim as i32; + self.band_dim_names_offsets.push(next); + + for &s in source_shape { + self.band_shape_values.append_value(s); + } + let next = *self.band_shape_offsets.last().unwrap() + ndim as i32; + self.band_shape_offsets.push(next); + + self.band_datatype.append_value(data_type as u32); + + match nodata { + Some(b) => self.band_nodata.append_value(b), + None => self.band_nodata.append_null(), + } + + for v in view { + self.band_view_source_axis_values + .append_value(v.source_axis); + self.band_view_start_values.append_value(v.start); + self.band_view_step_values.append_value(v.step); + self.band_view_steps_values.append_value(v.steps); + } + let next = *self.band_view_offsets.last().unwrap() + ndim as i32; + self.band_view_offsets.push(next); + self.band_view_validity.push(true); + + match outdb_uri { + Some(uri) => self.band_outdb_uri.append_value(uri), + None => self.band_outdb_uri.append_null(), + } + match outdb_format { + Some(format) => self.band_outdb_format.append_value(format), + None => self.band_outdb_format.append_null(), + } + + self.current_band_count += 1; + self.band_data_count_at_start = self.band_data.len(); + + // finish_raster compares visible shape against spatial_shape. + self.current_raster_bands.push(( + dim_names.iter().map(|s| s.to_string()).collect(), + view_entries.visible_shape(), + )); + + Ok(()) + } + + /// Build a band that is a new view into an existing band. + /// + /// The output band stores a view that is the composition of `input`'s + /// existing view with the supplied `view`. The supplied `view`'s + /// `source_axis` entries refer to `input`'s *visible* axes, not its + /// source axes — composition with `input.view()` translates them. + /// + /// `dim_names` names the output's *visible* axes (length == view.len()). + /// + /// Storage: + /// - **InDb input** → output is InDb. The input's source bytes are + /// copied verbatim into the output's `data` column (today's + /// simple-share strategy; buffer-sharing via Arrow `BinaryView` is a + /// future optimisation). + /// - **OutDb input** → output is OutDb. The data column stays empty; + /// the input's `outdb_uri` and `outdb_format` are inherited (unless + /// overridden via the explicit `outdb_uri` / `outdb_format` args). + /// The composed view lives alongside the same external pointer — + /// loading is deferred to whoever reads the visible bytes. + /// + /// Identity-input shortcut: when `input` carries identity view, the + /// composed view equals `view` verbatim. + pub fn with_view(&mut self, args: WithViewArgs) -> Result<(), ArrowError> { + let WithViewArgs { + name, + dim_names, + input, + view, + nodata, + outdb_uri, + outdb_format, + } = args; + let source_shape: Vec = input.raw_source_shape().to_vec(); + let composed = + ViewEntries::new(input.view().to_vec()).compose(&ViewEntries::new(view.to_vec()))?; + + // Inherit storage metadata from the input unless the caller has + // explicitly overridden it. For OutDb inputs this propagates the + // external pointer to the output; for InDb inputs the input's + // outdb_uri/outdb_format are typically None anyway. + let final_outdb_uri = outdb_uri.or_else(|| input.outdb_uri()); + let final_outdb_format = outdb_format.or_else(|| input.outdb_format()); + + // Reuse the internal start_band_with_view helper to perform + // validation + write the schema fields. + self.start_band_with_view(StartBandWithViewArgs { + name, + dim_names, + source_shape: &source_shape, + view: composed.as_slice(), + data_type: input.data_type(), + nodata, + outdb_uri: final_outdb_uri, + outdb_format: final_outdb_format, + })?; + + if input.is_indb() { + // InDb: nd_buffer().buffer is the source bytes — borrow them + // directly into `append_value` so the only copy is the one + // BinaryViewBuilder makes into its block. This is still + // a full source-bytes copy per `with_view` call, which + // undermines the "lazy slice" framing for large rasters. + // + // The principled fix is Arrow `BinaryView` buffer-sharing: + // the output's data row references the input's existing + // backing `Buffer` instead of copying. Tracked separately + // in the Raster Clean Up project. + let buf = input.nd_buffer()?; + self.band_data_writer().append_value(buf.buffer); + } else { + // OutDb: data column stays empty; the source bytes live at the + // inherited outdb_uri and are fetched lazily on read. + self.band_data_writer().append_value([]); + } + Ok(()) + } + /// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`. /// /// Must be called after `start_raster_2d` / `start_raster_2d` which sets @@ -1161,9 +1373,13 @@ mod tests { // Test creating raster with OutDb reference metadata let mut builder = RasterBuilder::new(10); + // 10x10 raster of UInt8 = 100 visible bytes, matching the data buffer + // written below. `RasterRef::band()` now verifies the data column is + // long enough to cover the visible region, so the dimensions and the + // byte count must agree. let metadata = RasterMetadata { - width: 1024, - height: 1024, + width: 10, + height: 10, upperleft_x: 0.0, upperleft_y: 0.0, scale_x: 1.0, @@ -1220,6 +1436,8 @@ mod tests { assert!(indb_metadata.outdb_url().is_none()); assert!(indb_metadata.outdb_band_id().is_none()); assert!(indb_band.is_indb()); + let indb_nd = indb_band.nd_buffer().unwrap(); + assert_eq!(indb_nd.as_contiguous().unwrap().len(), 100); // Test OutDbRef band let outdb_band = bands.band(2).unwrap(); @@ -1234,7 +1452,10 @@ mod tests { "s3://mybucket/satellite_image.tif" ); assert_eq!(outdb_metadata.outdb_band_id().unwrap(), 2); + // OutDbRef bands carry no in-band bytes; byte access via nd_buffer() + // is not supported (backend-specific resolvers are tracked separately). assert!(!outdb_band.is_indb()); + assert!(outdb_band.nd_buffer().is_err()); } #[test] @@ -1831,6 +2052,44 @@ mod tests { ); } + #[test] + fn test_start_band_with_view_identity_matches_start_band() { + // Identity view through start_band_with_view should produce the same + // visible shape and byte strides as the convenience start_band path. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[5, 4], None) + .unwrap(); + + let view = crate::view_entries![0:4, 0:5]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["y", "x"], + source_shape: &[4, 5], + view: view.as_slice(), + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 20]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[4, 5]); + assert_eq!(band.raw_source_shape(), &[4, 5]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[5, 1]); + assert_eq!(buf.offset, 0); + } + #[test] fn test_start_band_rejects_zero_dim() { // 0-D bands carry no spatial extent and no caller has a use for @@ -1848,6 +2107,63 @@ mod tests { ); } + #[test] + fn test_start_band_with_view_rejects_zero_dim() { + // start_band_with_view must apply the same 0-D guard as start_band + // — accepting empty dim_names would otherwise bypass it via the + // explicit-view path. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let err = builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &[], + source_shape: &[], + view: &[], + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap_err(); + assert!( + err.to_string().contains("0-dimensional"), + "unexpected error: {err}" + ); + } + + #[test] + fn test_view_validation_rejects_step_overrun() { + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + // start=1, step=2, steps=4 → addresses element 1+(4-1)*2 = 7 which is + // out of range for a source size of 7. + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 4, + }]; + let err = builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[7], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap_err(); + assert!( + err.to_string().contains("out of range"), + "unexpected error: {err}" + ); + } + #[test] fn test_as_contiguous_identity_via_start_band_borrows() { // Canonical identity: the row's view list is null, and the read path @@ -1890,6 +2206,149 @@ mod tests { assert_eq!(buf.as_contiguous().unwrap(), pixels.as_slice()); } + #[test] + fn test_as_contiguous_explicit_identity_view_borrows() { + // Identity expressed *explicitly* through start_band_with_view must be + // indistinguishable to consumers from the null-row identity above — + // same visible shape, same byte strides, same zero-copy borrow. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + let view = crate::view_entries![0:2, 0:3]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["y", "x"], + source_shape: &[2, 3], + view: view.as_slice(), + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + let pixels: Vec = (0..6).collect(); + builder.band_data_writer().append_value(pixels.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + assert_eq!(band.shape(), &[2, 3]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[3, 1]); + assert_eq!(buf.offset, 0); + assert!(buf.is_contiguous()); + assert_eq!(buf.as_contiguous().unwrap(), pixels.as_slice()); + } + + #[test] + fn test_zero_step_broadcast_2d_is_strided_and_rejected() { + // 2D broadcast: source shape [1, 3], view broadcasts axis 0 four + // times so the visible region is 4×3. Each visible row must equal the + // source's only row. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 0, + start: 0, + step: 0, + steps: 4, + }, + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, + ]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["row", "col"], + source_shape: &[1, 3], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder.band_data_writer().append_value(vec![10u8, 20, 30]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[4, 3]); + // Broadcast row stride is 0; column stride is 1 byte per UInt8. + assert_eq!(buf.strides, &[0, 1]); + assert_eq!(buf.offset, 0); + + // A zero stride is not C-order packed, so the buffer is non-contiguous + // and as_contiguous rejects it (repacking lives behind + // RS_EnsureContiguous, https://github.com/apache/sedona-db/issues/899). + assert!(!buf.is_contiguous()); + assert!(buf.as_contiguous().is_err()); + } + + #[test] + fn test_negative_step_strided_reverse_is_rejected() { + // 1D source [0..8] with start=6, step=-2, steps=3 picks every other + // element walking backwards: {6, 4, 2}. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 6, + step: -2, + steps: 3, + }]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[8], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3]); + assert_eq!(buf.strides, &[-2]); + assert_eq!(buf.offset, 6); + + // A negative stride is not C-order packed → non-contiguous, rejected. + assert!(!buf.is_contiguous()); + assert!(buf.as_contiguous().is_err()); + } + #[test] fn test_view_field_is_null_for_identity_band() { // Schema invariant: identity views are stored as null list rows so @@ -1970,6 +2429,187 @@ mod tests { ); } + #[test] + fn test_outer_axis_slice_float32_is_contiguous() { + // Multi-byte dtype outer-axis slice: a 2D view over Float32 that + // takes the leading rows from offset 0 is contiguous-but-not-identity, + // so as_contiguous borrows the source prefix zero-copy. Catches a + // regression where contiguity assumed dtype_size == 1. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + // Slice the outer axis: take rows 0 and 1 of a 3-row source. With + // start=0, step=1, steps=2 over an axis of size 3, the view is not + // identity, but its byte strides are still C-order packed from + // offset 0, so the buffer is contiguous and borrows zero-copy. + let view = crate::view_entries![0:2, 0:3]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["y", "x"], + source_shape: &[3, 3], // 3x3 source + view: view.as_slice(), + data_type: BandDataType::Float32, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + let source: Vec = (0..9).map(|i| i as f32).collect(); + let source_bytes: Vec = source.iter().flat_map(|f| f.to_le_bytes()).collect(); + builder + .band_data_writer() + .append_value(source_bytes.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + // Visible shape is [2, 3]; the first 6 source floats (rows 0,1) are + // exactly the visible pixels — i.e. the first 24 source bytes. + let buf = band.nd_buffer().unwrap(); + assert!(buf.is_contiguous()); + assert_eq!(buf.as_contiguous().unwrap(), &source_bytes[0..24]); + } + + #[test] + fn test_outer_axis_slice_3d_is_contiguous() { + // 3D source [T=3, Y=2, X=3] of UInt8. View slices T to T=1..3 + // (start=1, step=1, steps=2), keeps Y and X identity. The visible + // region is a contiguous source sub-range (offset 6, C-order packed + // strides), so as_contiguous borrows it zero-copy. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder + .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) + .unwrap(); + let view = crate::view_entries![1:3, 0:2, 0:3]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["t", "y", "x"], + source_shape: &[3, 2, 3], + view: view.as_slice(), + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + let source: Vec = (0..18).collect(); + builder.band_data_writer().append_value(source.clone()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + + // Visible region = source[6..18] (T=1 and T=2 planes). + assert_eq!(band.shape(), &[2, 2, 3]); + let buf = band.nd_buffer().unwrap(); + assert!(buf.is_contiguous()); + assert_eq!(buf.as_contiguous().unwrap(), &source[6..18]); + } + + #[test] + fn test_nd_buffer_permutation_and_slice_combined() { + // 2D source [Y=4, X=3]. View permutes (visible order [X, Y]) and + // slices Y from 1, step 2, steps 2. Expected: + // visible_shape = [3, 2] + // byte_strides = [step_X * stride_X_src, step_Y * stride_Y_src] + // = [1 * 1, 2 * 3] = [1, 6] + // byte_offset = start_X * stride_X_src + start_Y * stride_Y_src + // = 0 * 1 + 1 * 3 = 3 + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = [ + ViewEntry { + source_axis: 1, + start: 0, + step: 1, + steps: 3, + }, // X + ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 2, + }, // Y + ]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x", "y"], + source_shape: &[4, 3], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder + .band_data_writer() + .append_value((0u8..12).collect::>()); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3, 2]); + assert_eq!(buf.strides, &[1, 6]); + assert_eq!(buf.offset, 3); + + // The permuted+strided layout (strides [1, 6]) is not C-order packed, + // so the buffer is non-contiguous and as_contiguous rejects it. + assert!(!buf.is_contiguous()); + assert!(buf.as_contiguous().is_err()); + } + + #[test] + fn test_nd_buffer_multidim_with_zero_axis() { + // visible_shape contains a zero axis somewhere in the middle. The + // buffer spans zero bytes, so it is trivially contiguous and + // as_contiguous borrows an empty slice; nd_buffer returns the + // zero-element shape. + let mut builder = RasterBuilder::new(1); + let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + builder.start_raster_nd(&transform, &[], &[], None).unwrap(); + let view = crate::view_entries![0:3, 0:0, 0:5]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["a", "b", "c"], + source_shape: &[3, 4, 5], + view: view.as_slice(), + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder.band_data_writer().append_value(vec![0u8; 60]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + let array = builder.finish().unwrap(); + let rasters = RasterStructArray::new(&array); + let r = rasters.get(0).unwrap(); + let band = r.band(0).unwrap(); + assert_eq!(band.shape(), &[3, 0, 5]); + let buf = band.nd_buffer().unwrap(); + assert_eq!(buf.shape, &[3, 0, 5]); + assert!(buf.is_contiguous()); + assert!(buf.as_contiguous().unwrap().is_empty()); + } + #[test] fn test_view_null_round_trips_through_arrow_ipc() { // Schema invariant: a band built via start_band_nd serialises with a @@ -1978,8 +2618,9 @@ mod tests { // instead, downstream readers (DuckDB, PyArrow, sedona-py) will // disagree about whether the view is identity. - let mut builder = RasterBuilder::new(1); + let mut builder = RasterBuilder::new(2); let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0]; + // Raster 0: identity-view band → null view row. builder .start_raster_nd(&transform, &["x", "y"], &[3, 2], None) .unwrap(); @@ -1997,6 +2638,33 @@ mod tests { builder.band_data_writer().append_value(vec![0u8; 6]); builder.finish_band().unwrap(); builder.finish_raster().unwrap(); + // Raster 1: explicit non-identity view → non-null view row. + builder + .start_raster_nd(&transform, &["x"], &[3], None) + .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + builder + .start_band_with_view(StartBandWithViewArgs { + name: None, + dim_names: &["x"], + source_shape: &[8], + view: &view, + data_type: BandDataType::UInt8, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + builder + .band_data_writer() + .append_value(vec![0u8, 1, 2, 3, 4, 5, 6, 7]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); let array = builder.finish().unwrap(); let schema = Arc::new(Schema::new(vec![Arc::new(arrow_schema::Field::new( @@ -2023,6 +2691,8 @@ mod tests { .downcast_ref::() .unwrap(); + // Reach into the restored bands list and confirm the view list + // preserves null/non-null per row. let bands_list = restored_struct .column(sedona_schema::raster::raster_indices::BANDS) .as_any() @@ -2038,15 +2708,257 @@ mod tests { .as_any() .downcast_ref::() .unwrap(); - assert_eq!(view_list.len(), 1); + assert_eq!(view_list.len(), 2); assert!( view_list.is_null(0), "identity-view band must remain a null view row after IPC round-trip" ); + assert!( + !view_list.is_null(1), + "explicit-view band must remain non-null after IPC round-trip" + ); + // Sanity: read paths still produce the expected visible shapes. let rasters = RasterStructArray::new(restored_struct); let r0 = rasters.get(0).unwrap(); assert_eq!(r0.band(0).unwrap().shape(), &[2, 3]); + let r1 = rasters.get(1).unwrap(); + assert_eq!(r1.band(0).unwrap().shape(), &[3]); + } + + // ---- with_view: public "create a new view into an existing band" ---- + + /// Build a 1-D UInt8 raster with `source_shape=[8]` and bytes + /// `[0, 1, ..., 7]`. Identity-view; used as input to with_view tests. + fn build_1d_identity_raster() -> StructArray { + let mut b = RasterBuilder::new(1); + b.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[8], None) + .unwrap(); + b.start_band_nd(None, &["x"], &[8], BandDataType::UInt8, None, None, None) + .unwrap(); + b.band_data_writer() + .append_value((0u8..8).collect::>()); + b.finish_band().unwrap(); + b.finish_raster().unwrap(); + b.finish().unwrap() + } + + #[test] + fn with_view_over_identity_input_produces_expected_visible_bytes() { + // Input is identity over [0..8]. with_view layers a slice + // (start=1, step=2, steps=3) producing visible bytes [1, 3, 5]. + let input_array = build_1d_identity_raster(); + let input_rasters = RasterStructArray::new(&input_array); + let input_raster = input_rasters.get(0).unwrap(); + let input_band = input_raster.band(0).unwrap(); + + let mut b = RasterBuilder::new(1); + b.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[3], None) + .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + b.with_view(WithViewArgs { + name: None, + dim_names: &["x"], + input: input_band.as_ref(), + view: &view, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + b.finish_band().unwrap(); + b.finish_raster().unwrap(); + let out_array = b.finish().unwrap(); + + let out_rasters = RasterStructArray::new(&out_array); + let out_raster = out_rasters.get(0).unwrap(); + let out_band = out_raster.band(0).unwrap(); + assert_eq!(out_band.shape(), &[3]); + // The composed view is a strided slice (start=1, step=2) over the + // original source: byte stride 2, offset 1. Non-contiguous, so + // as_contiguous rejects it; the visible bytes [1, 3, 5] would be + // produced by RS_EnsureContiguous + // (https://github.com/apache/sedona-db/issues/899). + let buf = out_band.nd_buffer().unwrap(); + assert_eq!(buf.strides, &[2]); + assert_eq!(buf.offset, 1); + assert!(!buf.is_contiguous()); + assert!(buf.as_contiguous().is_err()); + // The output's source_shape is inherited from the input. + assert_eq!(out_band.raw_source_shape(), &[8]); + } + + #[test] + fn with_view_chained_composes_into_single_view() { + // Round 1: with_view layers (start=1, step=2, steps=4) → visible + // bytes [1, 3, 5, 7] over source [0..8]. + // Round 2: with_view on that, layering (start=1, step=1, steps=2) → + // visible bytes [3, 5] (input visible indices 1 and 2). + // + // After Round 2 the output's view, when composed against the + // ORIGINAL source [0..8], must give bytes [3, 5] from indices + // 3 and 5. compose_view collapses the chain into one source-space + // view; the test verifies the bytes round-trip end-to-end. + let input_array = build_1d_identity_raster(); + let input_rasters = RasterStructArray::new(&input_array); + let input_raster = input_rasters.get(0).unwrap(); + let input_band = input_raster.band(0).unwrap(); + + // Round 1. + let mut b1 = RasterBuilder::new(1); + b1.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[4], None) + .unwrap(); + let v1 = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 4, + }]; + b1.with_view(WithViewArgs { + name: None, + dim_names: &["x"], + input: input_band.as_ref(), + view: &v1, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + b1.finish_band().unwrap(); + b1.finish_raster().unwrap(); + let mid_array = b1.finish().unwrap(); + + // Sanity: Round 1 alone produces [1, 3, 5, 7]. + let mid_rasters = RasterStructArray::new(&mid_array); + let mid_raster = mid_rasters.get(0).unwrap(); + let mid_band = mid_raster.band(0).unwrap(); + assert_eq!(mid_band.shape(), &[4]); + // Round 1 view: start=1, step=2 over source [0..8] → strides [2], + // offset 1 (visible bytes would be [1, 3, 5, 7]). Strided, so the + // buffer is non-contiguous. + let mid_buf = mid_band.nd_buffer().unwrap(); + assert_eq!(mid_buf.strides, &[2]); + assert_eq!(mid_buf.offset, 1); + assert!(!mid_buf.is_contiguous()); + + // Round 2: with_view applied on the view-bearing mid_band. + let mut b2 = RasterBuilder::new(1); + b2.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[2], None) + .unwrap(); + let v2 = crate::view_entries![1:3]; + b2.with_view(WithViewArgs { + name: None, + dim_names: &["x"], + input: mid_band.as_ref(), + view: v2.as_slice(), + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + b2.finish_band().unwrap(); + b2.finish_raster().unwrap(); + let final_array = b2.finish().unwrap(); + + let final_rasters = RasterStructArray::new(&final_array); + let final_raster = final_rasters.get(0).unwrap(); + let final_band = final_raster.band(0).unwrap(); + assert_eq!(final_band.shape(), &[2]); + // compose_view collapses both rounds into a single source-space view: + // visible bytes [3, 5] map to source indices 3 and 5 → strides [2], + // offset 3. Strided, so the composed buffer is non-contiguous. + let final_buf = final_band.nd_buffer().unwrap(); + assert_eq!(final_buf.strides, &[2]); + assert_eq!(final_buf.offset, 3); + assert!(!final_buf.is_contiguous()); + // The composed view still references the original 8-byte source. + assert_eq!(final_band.raw_source_shape(), &[8]); + } + + #[test] + fn with_view_on_outdb_input_produces_outdb_output_with_composed_view() { + // Viewing an OutDb band doesn't need the source bytes — the output + // band is itself OutDb, pointing at the same external resource via + // an inherited outdb_uri, with the composed view describing the + // slice. Loading is deferred to whoever reads the visible bytes. + let mut b = RasterBuilder::new(1); + b.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[8], None) + .unwrap(); + b.start_band_nd( + None, + &["x"], + &[8], + BandDataType::UInt8, + None, + Some("s3://bucket/file.tif#band=1"), + Some("geotiff"), + ) + .unwrap(); + b.band_data_writer().append_value([0u8; 0]); // empty → OutDb + b.finish_band().unwrap(); + b.finish_raster().unwrap(); + let input_array = b.finish().unwrap(); + + let input_rasters = RasterStructArray::new(&input_array); + let input_raster = input_rasters.get(0).unwrap(); + let input_band = input_raster.band(0).unwrap(); + assert!(!input_band.is_indb(), "fixture must be OutDb"); + + // Slice the OutDb band's visible axis: start=1, step=2, steps=3. + let mut b2 = RasterBuilder::new(1); + b2.start_raster_nd(&[0.0, 1.0, 0.0, 0.0, 0.0, -1.0], &["x"], &[3], None) + .unwrap(); + let view = [ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }]; + b2.with_view(WithViewArgs { + name: None, + dim_names: &["x"], + input: input_band.as_ref(), + view: &view, + nodata: None, + outdb_uri: None, + outdb_format: None, + }) + .unwrap(); + b2.finish_band().unwrap(); + b2.finish_raster().unwrap(); + let out_array = b2.finish().unwrap(); + + let out_rasters = RasterStructArray::new(&out_array); + let out_raster = out_rasters.get(0).unwrap(); + let out_band = out_raster.band(0).unwrap(); + + // Output remains OutDb (data column stayed empty). + assert!( + !out_band.is_indb(), + "output of OutDb-input with_view must be OutDb" + ); + // Storage metadata inherited from the input. + assert_eq!(out_band.outdb_uri(), Some("s3://bucket/file.tif#band=1")); + assert_eq!(out_band.outdb_format(), Some("geotiff")); + // Composed view: input had identity view, so composed == supplied + // view entry verbatim. + assert_eq!( + out_band.view(), + &[ViewEntry { + source_axis: 0, + start: 1, + step: 2, + steps: 3, + }] + ); + assert_eq!(out_band.raw_source_shape(), &[8]); + // Visible shape derived from composed view. + assert_eq!(out_band.shape(), &[3]); } /// Navigate an output raster `StructArray` to its bands' `data` diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index 3ead932d5..90ebbd203 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -43,8 +43,14 @@ pub fn is_spatial_dim_pair(y_like: &str, x_like: &str) -> bool { /// `source_shape` (the natural extent of `buffer`) with its `view` /// (the per-axis `(source_axis, start, step, steps)` slice spec). Stride /// can be zero (broadcast) or negative (reverse iteration), and may not be -/// C-order. Consumers that need a flat row-major buffer should use -/// `BandRef::contiguous_data()` instead. +/// C-order. Consumers that need a flat row-major buffer should check +/// `NdBuffer::is_contiguous()` and borrow via `NdBuffer::as_contiguous()`, +/// which errors on strided layouts rather than allocating. +/// +/// `shape` and `offset` are `i64` (not `u64`) to match the surrounding +/// stride arithmetic (`strides: Vec` to allow negative steps); +/// shape elements are always non-negative — `validate_view` enforces +/// this — and `offset` is non-negative by construction. /// /// Only `buffer` is tied to the producer's lifetime `'a` (it can be tens of /// MBs of pixel data and must not be copied). `shape` and `strides` are @@ -56,7 +62,7 @@ pub struct NdBuffer<'a> { pub buffer: &'a [u8], pub shape: Vec, pub strides: Vec, - pub offset: u64, + pub offset: i64, pub data_type: BandDataType, } @@ -118,10 +124,10 @@ impl<'a> NdBuffer<'a> { } } -// `ViewEntry` (and the `ViewEntries` newtype with its validation / -// composition / visible-shape machinery) lives in `view_entries.rs`. -// Re-exported here so the `crate::traits::ViewEntry` import path keeps -// working. +// `ViewEntry` and `ViewEntries` (with their associated machinery — +// `validate`, `is_identity`, `visible_shape`, `compose`) live in +// `view_entries.rs`. Re-exported here so the existing `crate::traits` +// import path keeps working. pub use crate::view_entries::ViewEntry; /// Concrete raster metadata returned by `RasterRef::metadata()`. @@ -503,11 +509,15 @@ pub trait RasterRef { /// Trait for accessing a single band/variable within an N-D raster. /// /// This is the consumer interface. Implementations handle storage details -/// Two data access paths: -/// - `contiguous_data()` — flat row-major bytes for consumers that don't need -/// stride awareness (most RS_* functions, GDAL boundary, serialization). -/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware -/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies. +/// and view composition transparently. +/// +/// `nd_buffer()` is the sole zero-copy byte accessor: it returns the raw +/// buffer plus shape + strides + offset describing the visible region. +/// Stride-aware consumers (numpy zero-copy views, Arrow FFI) read it +/// directly; consumers that need flat row-major bytes borrow via +/// `NdBuffer::as_contiguous()`, which errors on strided layouts rather than +/// allocating. The trait never materializes a strided view behind the +/// caller's back — repacking is an explicit plan-node concern. pub trait BandRef { // -- Dimension metadata -- @@ -585,7 +595,7 @@ pub trait BandRef { /// OutDb format — how to interpret the bytes at `outdb_uri` /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's - /// `contiguous_data()` / `nd_buffer()` is authoritative. + /// `nd_buffer()` is authoritative. fn outdb_format(&self) -> Option<&str> { None } @@ -649,6 +659,18 @@ pub trait BandRef { /// `offset` are computed by composing the view with the source's /// natural C-order byte strides. Strides may be zero (broadcast) or /// negative (reverse iteration). + /// + /// The returned `shape`, `strides`, and `offset` are guaranteed + /// in-bounds for `buffer`: `RasterRef::band()` rejects malformed views, + /// overflowing stride composition, and source-shape/data-column length + /// mismatches at construction. Stride-aware consumers can walk the + /// returned layout without further bound checks against `buffer.len()`. + /// + /// This is the **sole** byte-access method on the trait — it is + /// zero-copy and never allocates. Contiguous-byte consumers call + /// [`NdBuffer::as_contiguous`] on the result (borrow-or-error); + /// materialization of a strided view is an explicit `RS_EnsureContiguous` + /// step, never a transparent allocation behind this interface. fn nd_buffer(&self) -> Result, ArrowError>; /// Nodata value interpreted as f64. @@ -988,40 +1010,116 @@ mod tests { assert!(!b.is_spatial_2d()); } - /// Build a bufferless `NdBuffer` for contiguity checks — `is_contiguous` - /// inspects only shape/strides/data_type, never the bytes. - fn ndbuf(shape: &[i64], strides: &[i64], offset: u64) -> NdBuffer<'static> { + // ---- NdBuffer::is_contiguous / as_contiguous ---- + // + // The contiguity predicate is a pure function of (shape, strides, + // data_type), so it is exercised here directly on NdBuffer literals + // rather than through the full RasterBuilder → reader path. The + // builder/reader tests in builder.rs and array.rs cover that the view + // composition produces these strides/offsets in the first place. + + fn ndbuf<'a>( + buffer: &'a [u8], + shape: &[i64], + strides: &[i64], + offset: i64, + data_type: BandDataType, + ) -> NdBuffer<'a> { NdBuffer { - buffer: &[], + buffer, shape: shape.to_vec(), strides: strides.to_vec(), offset, - data_type: BandDataType::UInt8, + data_type, } } #[test] - fn is_contiguous_packed_identity() { - // C-order packed strides for shape [2, 3], byte_size 1. - assert!(ndbuf(&[2, 3], &[3, 1], 0).is_contiguous()); + fn is_contiguous_identity_2d_uint8_borrows_full_buffer() { + let bytes: Vec = (0..6).collect(); + let b = ndbuf(&bytes, &[2, 3], &[3, 1], 0, BandDataType::UInt8); + assert!(b.is_contiguous()); + assert_eq!(b.as_contiguous().unwrap(), &bytes[..]); + } + + #[test] + fn is_contiguous_identity_multibyte_float32() { + // shape [2, 3] Float32 → C-order byte strides [12, 4]. + let bytes = vec![7u8; 24]; + let b = ndbuf(&bytes, &[2, 3], &[12, 4], 0, BandDataType::Float32); + assert!(b.is_contiguous()); + assert_eq!(b.as_contiguous().unwrap().len(), 24); + } + + #[test] + fn is_contiguous_is_offset_agnostic_for_outer_axis_slice() { + // Take rows 1..3 of a [3, 3] UInt8 source: offset 3, packed strides. + // Contiguous-but-not-identity → borrows the sub-range zero-copy. + let bytes: Vec = (0..9).collect(); + let b = ndbuf(&bytes, &[2, 3], &[3, 1], 3, BandDataType::UInt8); + assert!(b.is_contiguous()); + assert_eq!(b.as_contiguous().unwrap(), &bytes[3..9]); + } + + #[test] + fn is_contiguous_false_for_broadcast_zero_stride() { + let bytes = vec![0u8; 3]; + let b = ndbuf(&bytes, &[4, 3], &[0, 1], 0, BandDataType::UInt8); + assert!(!b.is_contiguous()); + assert!(b.as_contiguous().is_err()); + } + + #[test] + fn is_contiguous_false_for_negative_stride() { + let bytes: Vec = (0..8).collect(); + let b = ndbuf(&bytes, &[3], &[-2], 6, BandDataType::UInt8); + assert!(!b.is_contiguous()); + assert!(b.as_contiguous().is_err()); + } + + #[test] + fn is_contiguous_false_for_permuted_inner_stride() { + // shape [3, 2], strides [1, 6] — inner stride 6 != dtype_size. + let bytes = vec![0u8; 12]; + let b = ndbuf(&bytes, &[3, 2], &[1, 6], 0, BandDataType::UInt8); + assert!(!b.is_contiguous()); + assert!(b.as_contiguous().is_err()); + } + + #[test] + fn is_contiguous_false_for_inner_strided_multibyte() { + // UInt16 is 2 bytes; a step-2 view gives byte stride 4 != 2. + let bytes = vec![0u8; 12]; + let b = ndbuf(&bytes, &[3], &[4], 0, BandDataType::UInt16); + assert!(!b.is_contiguous()); + assert!(b.as_contiguous().is_err()); } #[test] - fn is_contiguous_packed_with_offset() { - // Offset is irrelevant to contiguity — a packed sub-window still - // counts (this is the relaxation the GDAL gate relies on). - assert!(ndbuf(&[2, 3], &[3, 1], 12).is_contiguous()); + fn is_contiguous_true_for_zero_extent_axis_borrows_empty() { + // A zero-extent axis addresses no bytes — trivially contiguous, + // regardless of the surrounding strides. + let bytes = vec![0u8; 8]; + let b = ndbuf(&bytes, &[3, 0, 5], &[0, 5, 1], 0, BandDataType::UInt8); + assert!(b.is_contiguous()); + assert!(b.as_contiguous().unwrap().is_empty()); } #[test] - fn is_contiguous_strided_is_false() { - // Inner stride 2 != byte_size 1 — gaps between elements. - assert!(!ndbuf(&[2, 3], &[6, 2], 0).is_contiguous()); + fn is_contiguous_false_for_shape_strides_length_mismatch() { + let bytes = vec![0u8; 6]; + let b = ndbuf(&bytes, &[2, 3], &[3], 0, BandDataType::UInt8); + assert!(!b.is_contiguous()); + assert!(b.as_contiguous().is_err()); } #[test] - fn is_contiguous_broadcast_is_false() { - // Zero stride (broadcast) is not packed. - assert!(!ndbuf(&[2, 3], &[0, 1], 0).is_contiguous()); + fn as_contiguous_errors_when_region_exceeds_buffer() { + // Layout is C-order packed, but offset pushes the region past the + // buffer end — the defensive bounds check must reject it. + let bytes = vec![0u8; 4]; + let b = ndbuf(&bytes, &[4], &[1], 2, BandDataType::UInt8); + assert!(b.is_contiguous()); + assert!(b.as_contiguous().is_err()); } }