diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md index 881c8931ee44..69aed30dce94 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md @@ -3,6 +3,8 @@ ## 1.0.0b52 (Unreleased) ### Features Added +- Add ownership checks for storage directories + ([#46725](https://github.com/Azure/azure-sdk-for-python/pull/46725)) - Add logger name to custom dimensions for Message, Exception and Event telemetry ([#46096](https://github.com/Azure/azure-sdk-for-python/pull/46096)) - Add support for populating SDK version from distro and Microsoft OpenTelemetry distro environment variables diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_storage.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_storage.py index 0f2ac4ab8bf0..83a7a656146c 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_storage.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_storage.py @@ -66,7 +66,15 @@ def get(self) -> Optional[Tuple[Any, ...]]: def put(self, data: List[Any], lease_period: int = 0) -> Union[StorageExportResult, str]: try: fullpath = self.fullpath + ".tmp" - with open(fullpath, "w", encoding="utf-8") as file: + # Use O_CREAT | O_EXCL | O_WRONLY to atomically create the file # cspell:disable-line + # and fail if it already exists, preventing race conditions. + fd = os.open(fullpath, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) # cspell:disable-line + try: + file = os.fdopen(fd, "w", encoding="utf-8") + except Exception: + os.close(fd) + raise + with file: for item in data: file.write(json.dumps(item)) # The official Python doc: Do not use os.linesep as a line @@ -255,7 +263,27 @@ def _check_and_set_folder_permissions(self) -> bool: return True # Unix else: - os.chmod(self._path, 0o700) + open_flags = os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW # pylint: disable=no-member # cspell:disable-line + dir_fd = os.open(self._path, open_flags) + try: + dir_stat = os.fstat(dir_fd) + owner_uid = dir_stat.st_uid + current_uid = os.getuid() # pylint: disable=no-member + if owner_uid not in (current_uid, 0): + logger.error( + "Storage directory %s is owned by uid %d, not the current user (%d) or admin (uid 0). " + "Refusing to use this directory.", + self._path, + owner_uid, + current_uid, + ) + set_local_storage_setup_state_exception( + f"Directory owned by uid {owner_uid}, expected {current_uid} or 0" + ) + return False + os.fchmod(dir_fd, 0o700) # pylint: disable=no-member # cspell:disable-line + finally: + os.close(dir_fd) return True except OSError as error: if getattr(error, "errno", None) == errno.EROFS: # cspell:disable-line diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_storage.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_storage.py index 141bc672353f..1d8c885a8e1e 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_storage.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_storage.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import errno import os import shutil import unittest @@ -21,6 +22,14 @@ TEST_USER = "multiuser-test" STORAGE_MODULE = "azure.monitor.opentelemetry.exporter._storage" +# Ensure Unix-only constants exist for cross-platform testing. +# These are used by the fd-based permission checks in the Unix code path; +# since os.open is mocked in those tests, the actual values don't matter. +if not hasattr(os, "O_DIRECTORY"): + os.O_DIRECTORY = 0o200000 +if not hasattr(os, "O_NOFOLLOW"): + os.O_NOFOLLOW = 0o400000 + def throw(exc_type, *args, **kwargs): def func(*_args, **_kwargs): @@ -83,7 +92,7 @@ def test_put_file_write_error_returns_string(self): blob = LocalFileBlob(os.path.join(TEST_FOLDER, "write_error_blob")) test_input = [1, 2, 3] - with mock.patch("builtins.open", side_effect=PermissionError("Cannot write to file")): + with mock.patch(f"{STORAGE_MODULE}.os.open", side_effect=PermissionError("Cannot write to file")): result = blob.put(test_input) self.assertIsInstance(result, str) self.assertIn("Cannot write to file", result) @@ -619,29 +628,36 @@ def test_check_and_set_folder_permissions_unix_chmod_exception_sets_exception_st # Clear any existing exception state (set to empty string, not None) set_local_storage_setup_state_exception("") - # Mock Unix environment and chmod failure + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 1000 + + # Mock Unix environment and fchmod failure # cspell:disable-line with mock.patch("os.name", "posix"): # Unix with mock.patch("os.makedirs"): # Allow directory creation - with mock.patch("os.chmod", side_effect=OSError(test_error_message)): - stor = LocalFileStorage(os.path.join(TEST_FOLDER, "chmod_failure_test")) - - # Storage should be disabled due to chmod failure - self.assertFalse(stor._enabled) - - # Exception state should be set with the error message - exception_state = get_local_storage_setup_state_exception() - self.assertEqual(exception_state, test_error_message) - - # When storage is disabled, put() behavior depends on readonly state - result = stor.put(test_input) - if get_local_storage_setup_state_readonly(): - # Readonly takes priority over exception state - self.assertEqual(result, StorageExportResult.CLIENT_READONLY) - else: - # If readonly not set, should return the exception message - self.assertEqual(result, test_error_message) - - stor.close() + with mock.patch("os.open", return_value=99): + with mock.patch("os.fstat", return_value=mock_stat_result): + with mock.patch("os.getuid", create=True, return_value=1000): + with mock.patch("os.fchmod", create=True, side_effect=OSError(test_error_message)): # cspell:disable-line + with mock.patch("os.close"): + stor = LocalFileStorage(os.path.join(TEST_FOLDER, "chmod_failure_test")) + + # Storage should be disabled due to fchmod failure # cspell:disable-line + self.assertFalse(stor._enabled) + + # Exception state should be set with the error message + exception_state = get_local_storage_setup_state_exception() + self.assertEqual(exception_state, test_error_message) + + # When storage is disabled, put() behavior depends on readonly state + result = stor.put(test_input) + if get_local_storage_setup_state_readonly(): + # Readonly takes priority over exception state + self.assertEqual(result, StorageExportResult.CLIENT_READONLY) + else: + # If readonly not set, should return the exception message + self.assertEqual(result, test_error_message) + + stor.close() # Clean up set_local_storage_setup_state_exception("") @@ -940,35 +956,42 @@ def test_check_and_set_folder_permissions_unix_multiuser_scenario(self): storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): - chmod_calls = [] + fchmod_calls = [] # cspell:disable-line makedirs_calls = [] - def mock_chmod(path, mode): - chmod_calls.append((path, oct(mode))) + def mock_fchmod(fd, mode): # cspell:disable-line + fchmod_calls.append((fd, oct(mode))) # cspell:disable-line def mock_makedirs(path, mode=0o777, exist_ok=False): makedirs_calls.append((path, oct(mode), exist_ok)) - with mock.patch(f"{STORAGE_MODULE}.os.makedirs", side_effect=mock_makedirs): - with mock.patch(f"{STORAGE_MODULE}.os.chmod", side_effect=mock_chmod): - with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): - stor = LocalFileStorage(storage_abs_path) - - self.assertTrue(stor._enabled) + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 1000 - self.assertEqual( - makedirs_calls, - [(storage_abs_path, "0o777", True)], - f"Unexpected makedirs calls: {makedirs_calls}", - ) - - self.assertEqual( - {(storage_abs_path, "0o700")}, - set(chmod_calls), - f"Unexpected chmod calls: {chmod_calls}", - ) - - stor.close() + with mock.patch(f"{STORAGE_MODULE}.os.makedirs", side_effect=mock_makedirs): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=1000): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True, side_effect=mock_fchmod): # cspell:disable-line + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + + self.assertTrue(stor._enabled) + + self.assertEqual( + makedirs_calls, + [(storage_abs_path, "0o777", True)], + f"Unexpected makedirs calls: {makedirs_calls}", + ) + + self.assertEqual( + [(99, "0o700")], + fchmod_calls, # cspell:disable-line + f"Unexpected fchmod calls: {fchmod_calls}", # cspell:disable-line + ) + + stor.close() # Clean up set_local_storage_setup_state_exception("") @@ -1019,22 +1042,220 @@ def test_check_and_set_folder_permissions_unix_multiuser_storage_permission_fail with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): - def mock_chmod(path, mode): - if mode == 0o700: - raise PermissionError(test_error_message) - raise OSError(f"Unexpected chmod call: {path}, {oct(mode)}") + def mock_fchmod(fd, mode): + raise PermissionError(test_error_message) + + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 1000 with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): - with mock.patch(f"{STORAGE_MODULE}.os.chmod", side_effect=mock_chmod): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=1000): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True, side_effect=mock_fchmod): + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + + self.assertFalse(stor._enabled) + + exception_state = get_local_storage_setup_state_exception() + self.assertEqual(exception_state, test_error_message) + + stor.close() + + # Clean up + set_local_storage_setup_state_exception("") + + def test_check_and_set_folder_permissions_unix_ownership_by_current_user(self): + """Directory owned by the current user should pass ownership check.""" + from azure.monitor.opentelemetry.exporter.statsbeat.customer._state import ( + set_local_storage_setup_state_exception, + ) + + set_local_storage_setup_state_exception("") + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 4242 + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=4242): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True) as mock_fchmod: + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + self.assertTrue(stor._enabled) + mock_fchmod.assert_called_with(99, 0o700) + stor.close() + + set_local_storage_setup_state_exception("") + + def test_check_and_set_folder_permissions_unix_ownership_by_root(self): + """Directory owned by root (uid 0) should pass ownership check.""" + from azure.monitor.opentelemetry.exporter.statsbeat.customer._state import ( + set_local_storage_setup_state_exception, + ) + + set_local_storage_setup_state_exception("") + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 0 # root + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=4242): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True) as mock_fchmod: + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + self.assertTrue(stor._enabled) + mock_fchmod.assert_called_with(99, 0o700) + stor.close() + + set_local_storage_setup_state_exception("") + + def test_check_and_set_folder_permissions_unix_ownership_by_different_user(self): + """Directory owned by a different non-root user should fail ownership check.""" + from azure.monitor.opentelemetry.exporter.statsbeat.customer._state import ( + get_local_storage_setup_state_exception, + set_local_storage_setup_state_exception, + ) + + set_local_storage_setup_state_exception("") + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 9999 # attacker + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=4242): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True) as mock_fchmod: + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + self.assertFalse(stor._enabled) + mock_fchmod.assert_not_called() + exception_state = get_local_storage_setup_state_exception() + self.assertIn("owned by uid 9999", exception_state) + stor.close() + + set_local_storage_setup_state_exception("") + + def test_attack_scenario_precreated_directory_by_attacker(self): + """ + Simulates the attack described in the vulnerability report: + An unprivileged attacker pre-creates the storage directory in /tmp with + their own uid. When the legitimate process starts, it should detect the + ownership mismatch and refuse to use the directory, preventing data exfiltration. + """ + from azure.monitor.opentelemetry.exporter.statsbeat.customer._state import ( + get_local_storage_setup_state_exception, + set_local_storage_setup_state_exception, + ) + + set_local_storage_setup_state_exception("") + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + # Attacker pre-created the directory — it is owned by attacker uid 5000 + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 5000 # attacker's uid + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): # dir already exists + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=1000): # legitimate user + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True) as mock_fchmod: + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + + # Storage MUST be disabled — attacker owns the dir + self.assertFalse(stor._enabled) + # fchmod must NOT be called — we refuse to use the directory + mock_fchmod.assert_not_called() + # No data can be written + result = stor.put([{"telemetry": "sensitive_data"}]) + self.assertNotEqual(result, StorageExportResult.LOCAL_FILE_BLOB_SUCCESS) + # Exception state captures the ownership mismatch + exception_state = get_local_storage_setup_state_exception() + self.assertIn("owned by uid 5000", exception_state) + self.assertIn("expected 1000", exception_state) + + stor.close() + + set_local_storage_setup_state_exception("") + + def test_attack_scenario_symlink_to_attacker_controlled_path(self): + """ + Simulates a symlink attack: attacker creates a symlink at the expected + storage path. With O_NOFOLLOW, os.open() refuses to follow the symlink + and raises OSError, preventing the SDK from using the attacker's target. + """ + from azure.monitor.opentelemetry.exporter.statsbeat.customer._state import ( + get_local_storage_setup_state_exception, + set_local_storage_setup_state_exception, + ) + + set_local_storage_setup_state_exception("") + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + # os.open with O_NOFOLLOW raises OSError (ELOOP) when path is a symlink # cspell:disable-line + symlink_error = OSError(errno.ELOOP, "Too many levels of symbolic links") # cspell:disable-line + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): + with mock.patch(f"{STORAGE_MODULE}.os.open", side_effect=symlink_error): with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): stor = LocalFileStorage(storage_abs_path) + # Storage MUST be disabled — symlink detected by O_NOFOLLOW self.assertFalse(stor._enabled) - exception_state = get_local_storage_setup_state_exception() - self.assertEqual(exception_state, test_error_message) + self.assertIn("Too many levels of symbolic links", exception_state) stor.close() - # Clean up set_local_storage_setup_state_exception("") + + def test_attack_scenario_file_race_condition_oexcl(self): # cspell:disable-line + """ + Simulates O_EXCL protection: if an attacker manages to pre-create a + .tmp file at the exact path our blob will use, os.open with O_EXCL + must fail with FileExistsError, preventing data from being written + to an attacker-controlled file. + """ + storage_abs_path = _get_storage_directory(DUMMY_INSTRUMENTATION_KEY) + + mock_stat_result = mock.MagicMock() + mock_stat_result.st_uid = 1000 + + with mock.patch(f"{STORAGE_MODULE}.os.name", "posix"): + with mock.patch(f"{STORAGE_MODULE}.os.makedirs"): + with mock.patch(f"{STORAGE_MODULE}.os.open", return_value=99): + with mock.patch(f"{STORAGE_MODULE}.os.fstat", return_value=mock_stat_result): + with mock.patch(f"{STORAGE_MODULE}.os.getuid", create=True, return_value=1000): + with mock.patch(f"{STORAGE_MODULE}.os.fchmod", create=True): + with mock.patch(f"{STORAGE_MODULE}.os.close"): + with mock.patch(f"{STORAGE_MODULE}.os.path.abspath", side_effect=lambda path: path): + stor = LocalFileStorage(storage_abs_path) + self.assertTrue(stor._enabled) + + # Now simulate the attack: attacker pre-created the .tmp file + with mock.patch(f"{STORAGE_MODULE}.os.open", side_effect=FileExistsError("File exists")): + result = stor.put([{"secret": "credential_data"}]) + # put() must fail — data must NOT be written + self.assertIsInstance(result, str) + self.assertIn("File exists", result) + + stor.close()