From a959afea3e951c49cbac774770476e9939a49faa Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 22:59:10 +0100 Subject: [PATCH 1/2] feat(python): use pyproject.toml metadata for root SBOM component Read project name, version, and license from pyproject.toml instead of using hardcoded defaults. Supports both PEP 621 ([project]) and Poetry ([tool.poetry]) formats with graceful fallback to existing defaults. - Add getRootComponentName/Version overrides in PythonPyprojectProvider - Add readLicenseFromManifest override for TOML license extraction - Cache parsed TOML to avoid redundant parsing - Add virtual methods in PythonProvider base class for subclass override Implements TC-3894 Assisted-by: Claude Code --- .../trustifyda/providers/PythonProvider.java | 14 +++- .../providers/PythonPyprojectProvider.java | 77 ++++++++++++++++++- .../Python_Pyproject_Provider_Test.java | 69 +++++++++++++++++ .../pyproject.toml | 4 + .../pyproject.toml | 7 ++ .../pyproject.toml | 8 ++ 6 files changed, 171 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml create mode 100644 src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml create mode 100644 src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry_license/pyproject.toml diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonProvider.java index cfd6d47e..5647d6ad 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonProvider.java @@ -69,6 +69,14 @@ public String readLicenseFromManifest() { return LicenseUtils.readLicenseFile(manifest); } + protected String getRootComponentName() { + return DEFAULT_PIP_ROOT_COMPONENT_NAME; + } + + protected String getRootComponentVersion() { + return DEFAULT_PIP_ROOT_COMPONENT_VERSION; + } + /** * Returns the path to a requirements-format file that the {@link PythonControllerBase} can * consume. For requirements.txt this is the manifest itself; for pyproject.toml a temporary file @@ -92,8 +100,7 @@ public Content provideStack() throws IOException { printDependenciesTree(dependencies); Sbom sbom = SbomFactory.newInstance(Sbom.BelongingCondition.PURL, "sensitive"); sbom.addRoot( - toPurl(DEFAULT_PIP_ROOT_COMPONENT_NAME, DEFAULT_PIP_ROOT_COMPONENT_VERSION), - readLicenseFromManifest()); + toPurl(getRootComponentName(), getRootComponentVersion()), readLicenseFromManifest()); for (Map component : dependencies) { addAllDependencies(sbom.getRoot(), component, sbom); } @@ -120,8 +127,7 @@ public Content provideComponent() throws IOException { printDependenciesTree(dependencies); Sbom sbom = SbomFactory.newInstance(); sbom.addRoot( - toPurl(DEFAULT_PIP_ROOT_COMPONENT_NAME, DEFAULT_PIP_ROOT_COMPONENT_VERSION), - readLicenseFromManifest()); + toPurl(getRootComponentName(), getRootComponentVersion()), readLicenseFromManifest()); dependencies.forEach( (component) -> sbom.addDependency( diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java index 057e4b68..51cbc742 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java @@ -17,6 +17,7 @@ package io.github.guacsec.trustifyda.providers; import com.github.packageurl.PackageURL; +import io.github.guacsec.trustifyda.license.LicenseUtils; import io.github.guacsec.trustifyda.utils.PythonControllerBase; import java.io.IOException; import java.nio.file.Files; @@ -34,6 +35,7 @@ public final class PythonPyprojectProvider extends PythonProvider { private Set collectedIgnoredDeps; + private TomlParseResult cachedToml; public PythonPyprojectProvider(Path manifest) { super(manifest); @@ -54,6 +56,76 @@ protected void cleanupRequirementsPath(Path requirementsPath) throws IOException Files.deleteIfExists(requirementsPath.getParent()); } + private TomlParseResult getToml() throws IOException { + if (cachedToml == null) { + cachedToml = Toml.parse(manifest); + if (cachedToml.hasErrors()) { + throw new IOException( + "Invalid pyproject.toml format: " + cachedToml.errors().get(0).getMessage()); + } + } + return cachedToml; + } + + @Override + protected String getRootComponentName() { + try { + TomlParseResult toml = getToml(); + String name = toml.getString("project.name"); + if (name != null && !name.isBlank()) { + return name; + } + String poetryName = toml.getString("tool.poetry.name"); + if (poetryName != null && !poetryName.isBlank()) { + return poetryName; + } + } catch (IOException e) { + // fall through to default + } + return super.getRootComponentName(); + } + + @Override + protected String getRootComponentVersion() { + try { + TomlParseResult toml = getToml(); + String version = toml.getString("project.version"); + if (version != null && !version.isBlank()) { + return version; + } + String poetryVersion = toml.getString("tool.poetry.version"); + if (poetryVersion != null && !poetryVersion.isBlank()) { + return poetryVersion; + } + } catch (IOException e) { + // fall through to default + } + return super.getRootComponentVersion(); + } + + @Override + public String readLicenseFromManifest() { + try { + TomlParseResult toml = getToml(); + String license = toml.getString("project.license"); + if (license != null && !license.isBlank()) { + return license; + } + // PEP 639: license may be in project.license.text + String licenseText = toml.getString("project.license.text"); + if (licenseText != null && !licenseText.isBlank()) { + return licenseText; + } + String poetryLicense = toml.getString("tool.poetry.license"); + if (poetryLicense != null && !poetryLicense.isBlank()) { + return poetryLicense; + } + } catch (IOException e) { + // fall through to LICENSE file + } + return LicenseUtils.readLicenseFile(manifest); + } + @Override protected Set getIgnoredDependencies(String manifestContent) { if (collectedIgnoredDeps == null) { @@ -69,10 +141,7 @@ protected Set getIgnoredDependencies(String manifestContent) { } List parseDependencyStrings() throws IOException { - TomlParseResult toml = Toml.parse(manifest); - if (toml.hasErrors()) { - throw new IOException("Invalid pyproject.toml format: " + toml.errors().get(0).getMessage()); - } + TomlParseResult toml = getToml(); List rawLines = Files.readAllLines(manifest); collectedIgnoredDeps = new HashSet<>(); diff --git a/src/test/java/io/github/guacsec/trustifyda/providers/Python_Pyproject_Provider_Test.java b/src/test/java/io/github/guacsec/trustifyda/providers/Python_Pyproject_Provider_Test.java index 5e71562d..54f9f1ef 100644 --- a/src/test/java/io/github/guacsec/trustifyda/providers/Python_Pyproject_Provider_Test.java +++ b/src/test/java/io/github/guacsec/trustifyda/providers/Python_Pyproject_Provider_Test.java @@ -163,6 +163,75 @@ void test_ignored_deps_collected_during_parsing() throws IOException { assertThat(ignoredNames).doesNotContain("anyio", "requests"); } + @Test + void test_getRootComponentName_reads_pep621_name() { + Path pyprojectPath = + Path.of("src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_ignore/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentName()).isEqualTo("test-project"); + } + + @Test + void test_getRootComponentName_reads_poetry_name() { + Path pyprojectPath = + Path.of("src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentName()).isEqualTo("test-project"); + } + + @Test + void test_getRootComponentName_falls_back_to_default() { + Path pyprojectPath = + Path.of( + "src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentName()).isEqualTo("default-pip-root"); + } + + @Test + void test_getRootComponentVersion_reads_pep621_version() { + Path pyprojectPath = + Path.of( + "src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentVersion()).isEqualTo("2.0.0"); + } + + @Test + void test_getRootComponentVersion_reads_poetry_version() { + Path pyprojectPath = + Path.of("src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentVersion()).isEqualTo("0.1.0"); + } + + @Test + void test_getRootComponentVersion_falls_back_to_default() { + Path pyprojectPath = + Path.of( + "src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.getRootComponentVersion()).isEqualTo("0.0.0"); + } + + @Test + void test_readLicenseFromManifest_reads_pep621_license() { + Path pyprojectPath = + Path.of( + "src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.readLicenseFromManifest()).isEqualTo("MIT"); + } + + @Test + void test_readLicenseFromManifest_reads_poetry_license() { + Path pyprojectPath = + Path.of( + "src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry_license/pyproject.toml"); + var provider = new PythonPyprojectProvider(pyprojectPath); + assertThat(provider.readLicenseFromManifest()).isEqualTo("Apache-2.0"); + } + @Test void test_provideComponent_generates_correct_media_type() throws IOException { Path pyprojectPath = diff --git a/src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml new file mode 100644 index 00000000..0ab5b1fb --- /dev/null +++ b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_no_metadata/pyproject.toml @@ -0,0 +1,4 @@ +[project] +dependencies = [ + "anyio==3.6.2", +] diff --git a/src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml new file mode 100644 index 00000000..8cd1f966 --- /dev/null +++ b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_pep621_license/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "licensed-project" +version = "2.0.0" +license = "MIT" +dependencies = [ + "anyio==3.6.2", +] diff --git a/src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry_license/pyproject.toml b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry_license/pyproject.toml new file mode 100644 index 00000000..40c52810 --- /dev/null +++ b/src/test/resources/tst_manifests/pip/pip_pyproject_toml_poetry_license/pyproject.toml @@ -0,0 +1,8 @@ +[tool.poetry] +name = "poetry-licensed" +version = "1.5.0" +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.9" +anyio = "^3.6.2" From 2b0fde8393d7155e0a7c0b27f25aa368697229de Mon Sep 17 00:00:00 2001 From: Ruben Romero Montes Date: Wed, 25 Mar 2026 23:36:55 +0100 Subject: [PATCH 2/2] fix(python): fix cached errored TOML and add debug logging - Parse TOML into local variable before caching to avoid retaining errored parse results across subsequent calls - Add FINE-level debug logging when TOML parsing fails and metadata extraction falls back to defaults Implements TC-3894 Assisted-by: Claude Code --- .../providers/PythonPyprojectProvider.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java index 51cbc742..c53d91ac 100644 --- a/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java +++ b/src/main/java/io/github/guacsec/trustifyda/providers/PythonPyprojectProvider.java @@ -18,6 +18,7 @@ import com.github.packageurl.PackageURL; import io.github.guacsec.trustifyda.license.LicenseUtils; +import io.github.guacsec.trustifyda.logging.LoggersFactory; import io.github.guacsec.trustifyda.utils.PythonControllerBase; import java.io.IOException; import java.nio.file.Files; @@ -26,6 +27,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.logging.Logger; import java.util.stream.Collectors; import org.tomlj.Toml; import org.tomlj.TomlArray; @@ -34,6 +36,9 @@ public final class PythonPyprojectProvider extends PythonProvider { + private static final Logger log = + LoggersFactory.getLogger(PythonPyprojectProvider.class.getName()); + private Set collectedIgnoredDeps; private TomlParseResult cachedToml; @@ -58,11 +63,12 @@ protected void cleanupRequirementsPath(Path requirementsPath) throws IOException private TomlParseResult getToml() throws IOException { if (cachedToml == null) { - cachedToml = Toml.parse(manifest); - if (cachedToml.hasErrors()) { + TomlParseResult parsed = Toml.parse(manifest); + if (parsed.hasErrors()) { throw new IOException( - "Invalid pyproject.toml format: " + cachedToml.errors().get(0).getMessage()); + "Invalid pyproject.toml format: " + parsed.errors().get(0).getMessage()); } + cachedToml = parsed; } return cachedToml; } @@ -80,7 +86,7 @@ protected String getRootComponentName() { return poetryName; } } catch (IOException e) { - // fall through to default + log.fine("Failed to parse pyproject.toml for root component name: " + e.getMessage()); } return super.getRootComponentName(); } @@ -98,7 +104,7 @@ protected String getRootComponentVersion() { return poetryVersion; } } catch (IOException e) { - // fall through to default + log.fine("Failed to parse pyproject.toml for root component version: " + e.getMessage()); } return super.getRootComponentVersion(); } @@ -121,7 +127,7 @@ public String readLicenseFromManifest() { return poetryLicense; } } catch (IOException e) { - // fall through to LICENSE file + log.fine("Failed to parse pyproject.toml for license: " + e.getMessage()); } return LicenseUtils.readLicenseFile(manifest); }