@@ -311,27 +311,77 @@ def convert_spdx_expression(license_expression_spdx):
311311 return get_license_detections_and_expression (license_expression_spdx )[1 ]
312312
313313
314+ def build_spdx_purl (spdx_package ):
315+ """
316+ Return a PackageURL dict for the SPDX package.
317+
318+ Resolution order:
319+ 1. Use declared PURL unless type == "unknown"
320+ 2. Fallback to deterministic generic PURL
321+ """
322+ for ref in spdx_package .external_refs :
323+ if ref .type == "purl" and ref .locator :
324+ declared = PackageURL .from_string (ref .locator )
325+
326+ # If declared type is meaningful it will use it
327+ if declared .type and declared .type != "unknown" :
328+ return declared .to_dict (encode = True ), False
329+
330+ # If declared type is unknown it will upgrade to generic
331+ name = declared .name
332+ version = declared .version
333+
334+ if name :
335+ generic = PackageURL (
336+ type = "generic" ,
337+ name = name ,
338+ version = version ,
339+ )
340+ return generic .to_dict (encode = True ), True
341+
342+ # No declared PURL - fallback
343+ name = (spdx_package .name or "" ).strip ()
344+ version = (spdx_package .version or "" ).strip ()
345+
346+ if name :
347+ generic = PackageURL (
348+ type = "generic" ,
349+ name = name ,
350+ version = version or None ,
351+ )
352+ return generic .to_dict (encode = True ), True
353+
354+ return {}, False
355+
356+
314357def spdx_package_to_package_data (spdx_package ):
315358 """Convert the provided spdx_package into package_data."""
316- package_url_dict = {}
317- # Store the original "SPDXID" as package_uid for dependencies resolution.
318359 package_uid = spdx_package .spdx_id
319360
320- for ref in spdx_package .external_refs :
321- if ref .type == "purl" :
322- purl = ref .locator
323- package_url_dict = PackageURL .from_string (purl ).to_dict (encode = True )
361+ # Resolve declared or fallback PURL
362+ package_url_dict , inferred = build_spdx_purl (spdx_package )
324363
364+ # Collect checksums
325365 checksum_data = {
326366 checksum .algorithm .lower (): checksum .value
327367 for checksum in spdx_package .checksums
328368 }
329369
370+ # License handling
330371 declared_license_expression_spdx = spdx_package .license_concluded
331372 declared_expression = ""
332373 if declared_license_expression_spdx :
333374 declared_expression = convert_spdx_expression (declared_license_expression_spdx )
334375
376+ # Structured identity metadata
377+ identity = {
378+ "source" : "inferred" if inferred else "declared" ,
379+ "origin" : {
380+ "download_location" : spdx_package .download_location ,
381+ "homepage" : spdx_package .homepage ,
382+ },
383+ }
384+
335385 package_data = {
336386 "package_uid" : package_uid ,
337387 "name" : spdx_package .name ,
@@ -345,6 +395,9 @@ def spdx_package_to_package_data(spdx_package):
345395 "filename" : spdx_package .filename ,
346396 "description" : spdx_package .description ,
347397 "release_date" : spdx_package .release_date ,
398+ "extra_data" : {
399+ "identity" : identity ,
400+ },
348401 ** package_url_dict ,
349402 ** checksum_data ,
350403 }
0 commit comments