From da3ffb38e2aa9e934b09dfe99b5d32656f1b7e3a Mon Sep 17 00:00:00 2001 From: tuanaiseo Date: Tue, 16 Jun 2026 06:15:25 +0700 Subject: [PATCH] fix(security)(syft-permissions): unsafe yaml deserialization in ruleset.load() The `RuleSet.load()` method uses `yaml.safe_load()` which is safe, but the `JobSubmissionMetadata.load()` method also uses `yaml.safe_load()`. However, more critically, the `JobSubmissionMetadata.save()` method uses `yaml.dump()` with `model_dump(mode='json')` which could potentially serialize arbitrary objects if the model were compromised. The main concern is that `yaml.safe_load()` is properly used, but there's no validation of file integrity (hash/checksum) before loading, which could allow tampered YAML files to be loaded if an attacker can modify files on disk. Affected files: ruleset.py Signed-off-by: tuanaiseo <221258316+tuanaiseo@users.noreply.github.com> --- .../src/syft_permissions/spec/ruleset.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packages/syft-permissions/src/syft_permissions/spec/ruleset.py b/packages/syft-permissions/src/syft_permissions/spec/ruleset.py index d10e7ba6c54..47e4e9ec188 100644 --- a/packages/syft-permissions/src/syft_permissions/spec/ruleset.py +++ b/packages/syft-permissions/src/syft_permissions/spec/ruleset.py @@ -1,3 +1,4 @@ +import hashlib from pathlib import Path import yaml @@ -15,6 +16,14 @@ class RuleSet(BaseModel): @classmethod def load(cls, filepath: Path) -> "RuleSet": + hash_path = filepath.with_suffix(filepath.suffix + ".sha256") + if hash_path.exists(): + with open(hash_path) as hf: + expected_hash = hf.read().strip() + with open(filepath, "rb") as f: + actual_hash = hashlib.sha256(f.read()).hexdigest() + if actual_hash != expected_hash: + raise ValueError(f"File integrity check failed for {filepath}") with open(filepath) as f: data = yaml.safe_load(f) or {} rs = cls.model_validate(data) @@ -26,3 +35,6 @@ def save(self, filepath: Path | None = None) -> None: data = self.model_dump(mode="json") with open(target, "w") as f: yaml.safe_dump(data, f, default_flow_style=False) + hash_data = hashlib.sha256(target.read_bytes()).hexdigest() + with open(target.with_suffix(target.suffix + ".sha256"), "w") as hf: + hf.write(hash_data)