diff --git a/spdxexp/extracts.go b/spdxexp/extracts.go index 55afff3..923e6e9 100644 --- a/spdxexp/extracts.go +++ b/spdxexp/extracts.go @@ -1,5 +1,10 @@ package spdxexp +import ( + "maps" + "slices" +) + // ExtractLicenses extracts licenses from the given expression without duplicates. // Returns an array of licenses or error if error occurs during processing. func ExtractLicenses(expression string) ([]string, error) { @@ -8,14 +13,30 @@ func ExtractLicenses(expression string) ([]string, error) { return nil, err } - expanded := node.expand(true) - licenses := make([]string, 0) - allLicenses := flatten(expanded) - for _, licenseNode := range allLicenses { - licenses = append(licenses, *licenseNode.reconstructedLicenseString()) + seen := map[string]struct{}{} + collectExtractedLicenses(node, seen) + return slices.Collect(maps.Keys(seen)), nil +} + +func collectExtractedLicenses(n *node, seen map[string]struct{}) { + if n == nil { + return } - licenses = removeDuplicateStrings(licenses) + if n.isExpression() { + collectExtractedLicenses(n.left(), seen) + collectExtractedLicenses(n.right(), seen) + return + } - return licenses, nil + reconstructed := n.reconstructedLicenseString() + if reconstructed == nil { + return + } + + license := *reconstructed + if _, ok := seen[license]; ok { + return + } + seen[license] = struct{}{} } diff --git a/spdxexp/extracts_test.go b/spdxexp/extracts_test.go index f56676b..f7f7d7a 100644 --- a/spdxexp/extracts_test.go +++ b/spdxexp/extracts_test.go @@ -1,11 +1,49 @@ package spdxexp import ( + "context" + "os" + "os/exec" "testing" + "time" "github.com/stretchr/testify/assert" ) +const kernelHeadersLicense = `(GPL-2.0-only WITH Linux-syscall-note OR BSD-2-Clause) AND (GPL-2.0-only WITH Linux-syscall-note OR BSD-3-Clause) AND (GPL-2.0-only WITH Linux-syscall-note OR CDDL-1.0) AND (GPL-2.0-only WITH Linux-syscall-note OR Linux-OpenIB) AND (GPL-2.0-only WITH Linux-syscall-note OR MIT) AND (GPL-2.0-or-later WITH Linux-syscall-note OR BSD-3-Clause) AND (GPL-2.0-or-later WITH Linux-syscall-note OR MIT) AND Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND BSD-3-Clause-Clear AND GFDL-1.1-no-invariants-or-later AND GPL-1.0-or-later AND (GPL-1.0-or-later OR BSD-3-Clause) AND GPL-1.0-or-later WITH Linux-syscall-note AND GPL-2.0-only AND (GPL-2.0-only OR Apache-2.0) AND (GPL-2.0-only OR BSD-2-Clause) AND (GPL-2.0-only OR BSD-3-Clause) AND (GPL-2.0-only OR CDDL-1.0) AND (GPL-2.0-only OR GFDL-1.1-no-invariants-or-later) AND (GPL-2.0-only OR GFDL-1.2-no-invariants-only) AND GPL-2.0-only WITH Linux-syscall-note AND GPL-2.0-or-later AND (GPL-2.0-or-later OR BSD-2-Clause) AND (GPL-2.0-or-later OR BSD-3-Clause) AND (GPL-2.0-or-later OR CC-BY-4.0) AND GPL-2.0-or-later WITH GCC-exception-2.0 AND GPL-2.0-or-later WITH Linux-syscall-note AND ISC AND LGPL-2.0-or-later AND (LGPL-2.0-or-later OR BSD-2-Clause) AND LGPL-2.0-or-later WITH Linux-syscall-note AND LGPL-2.1-only AND (LGPL-2.1-only OR BSD-2-Clause) AND LGPL-2.1-only WITH Linux-syscall-note AND LGPL-2.1-or-later AND LGPL-2.1-or-later WITH Linux-syscall-note AND (Linux-OpenIB OR GPL-2.0-only) AND (Linux-OpenIB OR GPL-2.0-only OR BSD-2-Clause) AND Linux-man-pages-copyleft AND MIT AND (MIT OR GPL-2.0-only) AND (MIT OR GPL-2.0-or-later) AND (MIT OR LGPL-2.1-only) AND (MPL-1.1 OR GPL-2.0-only) AND (X11 OR GPL-2.0-only) AND (X11 OR GPL-2.0-or-later) AND Zlib AND (copyleft-next-0.3.1 OR GPL-2.0-or-later)` + +var expectedKernelHeadersLicenses = []string{ + "GPL-2.0-only WITH Linux-syscall-note", + "BSD-2-Clause", + "BSD-3-Clause", + "CDDL-1.0", + "Linux-OpenIB", + "MIT", + "GPL-2.0-or-later WITH Linux-syscall-note", + "Apache-2.0", + "BSD-3-Clause-Clear", + "GFDL-1.1-no-invariants-or-later", + "GPL-1.0-or-later", + "GPL-1.0-or-later WITH Linux-syscall-note", + "GPL-2.0-only", + "GFDL-1.2-no-invariants-only", + "GPL-2.0-or-later", + "CC-BY-4.0", + "GPL-2.0-or-later WITH GCC-exception-2.0", + "ISC", + "LGPL-2.0-or-later", + "LGPL-2.0-or-later WITH Linux-syscall-note", + "LGPL-2.1-only", + "LGPL-2.1-only WITH Linux-syscall-note", + "LGPL-2.1-or-later", + "LGPL-2.1-or-later WITH Linux-syscall-note", + "Linux-man-pages-copyleft", + "MPL-1.1", + "X11", + "Zlib", + "copyleft-next-0.3.1", +} + func TestExtractLicenses(t *testing.T) { tests := []struct { name string @@ -35,3 +73,32 @@ func TestExtractLicenses(t *testing.T) { }) } } + +func TestExtractLicensesLicenseRefAndDedup(t *testing.T) { + licenses, err := ExtractLicenses("(LicenseRef-custom OR LicenseRef-custom) AND (DocumentRef-spdx-tool-1.2:LicenseRef-custom OR MIT)") + assert.NoError(t, err) + assert.ElementsMatch(t, []string{"LicenseRef-custom", "DocumentRef-spdx-tool-1.2:LicenseRef-custom", "MIT"}, licenses) +} + +func TestExtractLicensesLongExpressionDoesNotHang(t *testing.T) { + if os.Getenv("GO_SPDX_EXTRACT_LICENSES_LONG_CHILD") == "1" { + licenses, err := ExtractLicenses(kernelHeadersLicense) + assert.NoError(t, err) + assert.ElementsMatch(t, expectedKernelHeadersLicenses, licenses) + return + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // #nosec G204 G702 -- safe in tests: re-executes current test binary with fixed arg + cmd := exec.CommandContext(ctx, os.Args[0], "-test.run", "^TestExtractLicensesLongExpressionDoesNotHang$") + cmd.Env = append(os.Environ(), "GO_SPDX_EXTRACT_LICENSES_LONG_CHILD=1") + output, err := cmd.CombinedOutput() + if ctx.Err() == context.DeadlineExceeded { + t.Fatalf("ExtractLicenses timed out on long expression: %s", output) + } + if err != nil { + t.Fatalf("child process failed: %v\n%s", err, output) + } +} diff --git a/spdxexp/helpers.go b/spdxexp/helpers.go deleted file mode 100644 index 8ad73df..0000000 --- a/spdxexp/helpers.go +++ /dev/null @@ -1,24 +0,0 @@ -package spdxexp - -// flatten will take an array of nested array and return -// all nested elements in an array. e.g. [[1,2,[3]],4] -> [1,2,3,4] -func flatten[T any](lists [][]T) []T { - var res []T - for _, list := range lists { - res = append(res, list...) - } - return res -} - -// removeDuplicateStrings will remove all duplicates from a slice -func removeDuplicateStrings(sliceList []string) []string { - allKeys := make(map[string]bool) - list := []string{} - for _, item := range sliceList { - if _, value := allKeys[item]; !value { - allKeys[item] = true - list = append(list, item) - } - } - return list -}