Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3225,6 +3225,19 @@ def read_vrt(source: str, *, dtype=None,
Otherwise ``attrs['crs']`` stays unset and ``attrs['crs_wkt']`` carries
the original WKT. The source GeoTransform is preserved as a
rasterio-style 6-tuple in ``attrs['transform']``.

Source-path containment (issue #1671): every ``<SourceFilename>`` in
the VRT must resolve (after canonicalising ``..`` segments and
symlinks) to a path under the VRT's own directory. Absolute paths
pointing elsewhere are rejected with ``ValueError`` by default.
Operators that legitimately need to mosaic files from outside the
VRT directory can opt in by setting the
``XRSPATIAL_VRT_ALLOWED_ROOTS`` environment variable to a
``os.pathsep``-separated list of trusted directory roots; sources
resolving under any listed root are then accepted. A
``relativeToVRT='1'`` source that escapes the VRT directory (e.g.
``../../etc/passwd`` or a symlink to a file outside the directory)
is rejected regardless of the allowlist.
"""
from ._reader import _coerce_path
from ._vrt import read_vrt as _read_vrt_internal
Expand Down
82 changes: 81 additions & 1 deletion xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,44 @@ def _codec_decode_exceptions() -> tuple[type[BaseException], ...]:
_CODEC_DECODE_EXCEPTIONS = _codec_decode_exceptions()


# Environment variable name used to opt in to absolute source paths
# outside the VRT directory. ``os.pathsep``-separated list of
# directory roots (``:`` on POSIX, ``;`` on Windows). Empty entries
# are ignored. See ``parse_vrt`` for the containment policy.
_ALLOWED_ROOTS_ENV = 'XRSPATIAL_VRT_ALLOWED_ROOTS'


def _allowed_source_roots() -> list[str]:
"""Return the operator-supplied allowlist of trusted source roots.

Returns the canonical ``realpath`` of each entry so the containment
check can compare against the resolved source path directly. Empty
entries (from stray ``os.pathsep`` separators) are dropped.
"""
raw = os.environ.get(_ALLOWED_ROOTS_ENV, '')
roots = []
for entry in raw.split(os.pathsep):
entry = entry.strip()
if not entry:
continue
roots.append(os.path.realpath(entry))
return roots


def _path_is_under(path: str, root: str) -> bool:
"""Return True when ``path`` lives under ``root``.

Both inputs must already be canonical (``os.path.realpath`` applied).
Uses ``os.path.commonpath`` for robustness against prefix-collisions
(``/foo`` vs ``/foobar``) which a naive ``startswith`` check misses.
"""
try:
return os.path.commonpath([path, root]) == root
except ValueError:
# Different drives on Windows, or one of the paths is empty.
return False


def _xml_text(value) -> str:
"""Escape *value* for safe inclusion as XML element text.

Expand Down Expand Up @@ -170,12 +208,26 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
Returns
-------
VRTDataset

Raises
------
ValueError
When a ``SourceFilename`` resolves outside ``vrt_dir`` and
outside every entry in ``XRSPATIAL_VRT_ALLOWED_ROOTS``. See
:func:`read_vrt` for the full containment policy.
"""
# ``safe_fromstring`` refuses DOCTYPE declarations so a crafted VRT
# cannot trigger XML entity expansion (billion-laughs) attacks
# against the reader. See issue #1579.
root = safe_fromstring(xml_str)

# Pre-compute the trusted roots once per parse. ``vrt_dir`` is
# always trusted; the allowlist from ``XRSPATIAL_VRT_ALLOWED_ROOTS``
# is only consulted for absolute sources (``relativeToVRT='0'``).
# See issue #1671 for the path-traversal hardening.
vrt_root = os.path.realpath(vrt_dir)
allowed_roots = _allowed_source_roots()

width = int(root.get('rasterXSize', 0))
height = int(root.get('rasterYSize', 0))

Expand Down Expand Up @@ -225,9 +277,37 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
relative.get('relativeToVRT', '0') == '1')
if is_relative and not os.path.isabs(filename):
filename = os.path.join(vrt_dir, filename)
# Canonicalize to prevent path traversal (e.g. ../)
# Canonicalize so ``..`` segments and symlinks both resolve
# to their target before the containment check below.
filename = os.path.realpath(filename)

# Containment policy (issue #1671):
#
# * ``relativeToVRT='1'`` declares the source lives under the
# VRT directory. Honour that intent even when the allowlist
# would otherwise cover the resolved path -- a relative
# source that escapes ``vrt_dir`` is always an attack
# primitive ('..' segments, symlink swap, etc.).
# * Absolute sources are accepted when they resolve under
# ``vrt_dir`` (matches the writer's ``relative=False`` round
# trip) or under any explicit ``XRSPATIAL_VRT_ALLOWED_ROOTS``
# entry.
#
# In every other case raise ``ValueError`` so a crafted VRT
# cannot read arbitrary files the process has access to.
if is_relative:
trusted = [vrt_root]
else:
trusted = [vrt_root, *allowed_roots]
if not any(_path_is_under(filename, r) for r in trusted):
raise ValueError(
f"VRT SourceFilename {filename!r} resolves outside "
f"the VRT directory ({vrt_root!r}) and is not "
f"covered by any {_ALLOWED_ROOTS_ENV} entry "
f"({allowed_roots!r}). Refusing to read; see issue "
f"#1671."
)

src_band = int(_text(src_elem, 'SourceBand') or '1')

src_rect_elem = src_elem.find('SrcRect')
Expand Down
12 changes: 8 additions & 4 deletions xrspatial/geotiff/tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,26 +796,30 @@ def test_read_vrt_function(self, tmp_path):
assert da.name == 'mosaic'
np.testing.assert_array_equal(da.values, arr)

def test_vrt_parser(self):
def test_vrt_parser(self, tmp_path):
"""VRT XML parser extracts all fields correctly."""
from xrspatial.geotiff._vrt import parse_vrt

# Use a path under tmp_path so the issue #1671 containment check
# accepts the source. The test exercises field-extraction, not
# the on-disk readability of the source file.
src_path = str(tmp_path / 'tile.tif')
xml = (
'<VRTDataset rasterXSize="100" rasterYSize="200">\n'
' <SRS>EPSG:32610</SRS>\n'
' <GeoTransform>500000, 30, 0, 4500000, 0, -30</GeoTransform>\n'
' <VRTRasterBand dataType="UInt16" band="1">\n'
' <NoDataValue>0</NoDataValue>\n'
' <SimpleSource>\n'
' <SourceFilename relativeToVRT="0">/data/tile.tif</SourceFilename>\n'
f' <SourceFilename relativeToVRT="0">{src_path}</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="10" yOff="20" xSize="80" ySize="160"/>\n'
' <DstRect xOff="0" yOff="0" xSize="80" ySize="160"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)
vrt = parse_vrt(xml)
vrt = parse_vrt(xml, str(tmp_path))
assert vrt.width == 100
assert vrt.height == 200
assert vrt.crs_wkt == 'EPSG:32610'
Expand All @@ -825,7 +829,7 @@ def test_vrt_parser(self):
assert vrt.bands[0].nodata == 0.0
assert len(vrt.bands[0].sources) == 1
src = vrt.bands[0].sources[0]
assert src.filename == os.path.realpath('/data/tile.tif')
assert src.filename == os.path.realpath(src_path)
assert src.src_rect.x_off == 10

def test_vrt_float64_fractional_nodata_masked(self, tmp_path):
Expand Down
34 changes: 16 additions & 18 deletions xrspatial/geotiff/tests/test_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,18 @@ def test_open_geotiff_vrt_max_pixels(self, tmp_path):

# ---------------------------------------------------------------------------
# Cat 5: VRT path traversal
#
# Tightened in issue #1671: ``parse_vrt`` no longer accepts source paths
# that resolve outside the VRT directory (or any explicit allowlist
# entry). The realpath call by itself only normalised ``..`` segments;
# it did not enforce containment, so a crafted VRT could still hand
# ``read_to_array`` an arbitrary path.
# ---------------------------------------------------------------------------

class TestVRTPathTraversal:
def test_relative_path_canonicalized(self, tmp_path):
"""Relative paths in VRT SourceFilename are canonicalized."""
def test_relative_path_traversal_rejected(self, tmp_path):
"""Relative paths in VRT SourceFilename that escape the VRT
directory are rejected, not silently canonicalised."""
from xrspatial.geotiff._vrt import parse_vrt

vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
Expand All @@ -321,15 +328,8 @@ def test_relative_path_canonicalized(self, tmp_path):
vrt_dir = str(tmp_path / "subdir")
os.makedirs(vrt_dir)

vrt = parse_vrt(vrt_xml, vrt_dir)
source_path = vrt.bands[0].sources[0].filename

# After canonicalization, the path should NOT contain ".."
assert ".." not in source_path
# It should be an absolute path
assert os.path.isabs(source_path)
# Verify it was resolved through realpath
assert source_path == os.path.realpath(source_path)
with pytest.raises(ValueError, match="outside the VRT directory"):
parse_vrt(vrt_xml, vrt_dir)

def test_normal_relative_path_still_works(self, tmp_path):
"""Normal relative paths without traversal still resolve correctly."""
Expand All @@ -353,8 +353,9 @@ def test_normal_relative_path_still_works(self, tmp_path):
expected = os.path.realpath(os.path.join(vrt_dir, "data", "tile.tif"))
assert source_path == expected

def test_absolute_path_also_canonicalized(self, tmp_path):
"""Absolute paths in VRT are also canonicalized."""
def test_absolute_path_outside_vrt_dir_rejected(self, tmp_path):
"""Absolute paths pointing outside the VRT directory are rejected
by default (issue #1671)."""
from xrspatial.geotiff._vrt import parse_vrt

vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
Expand All @@ -368,11 +369,8 @@ def test_absolute_path_also_canonicalized(self, tmp_path):
</VRTRasterBand>
</VRTDataset>'''

vrt = parse_vrt(vrt_xml, str(tmp_path))
source_path = vrt.bands[0].sources[0].filename

assert ".." not in source_path
assert source_path == os.path.realpath("/tmp/../tmp/test.tif")
with pytest.raises(ValueError, match="outside the VRT directory"):
parse_vrt(vrt_xml, str(tmp_path))


# ---------------------------------------------------------------------------
Expand Down
Loading
Loading