-
Notifications
You must be signed in to change notification settings - Fork 165
Add warning when an invalid value is specified for enum field #3050
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
b7e0bed
4c327c5
f612f5a
9199d25
de8477e
849787f
427ae3b
2a0e91a
43d08e6
4883c88
3798248
4185e7e
fae01ce
8b09d0b
a788da5
34ea7ff
e66cf33
13a9bda
2b6c55e
7d60764
0cdd22a
16a7af6
293f248
9051a32
8227fba
c841f6e
ea7b3e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,5 +6,4 @@ artifacts: | |
| type: whl | ||
| build: uv build --wheel | ||
| second_wheel: | ||
| type: jar | ||
| build: true | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,6 @@ | |
| }, | ||
| "second_wheel": { | ||
| "build": "true", | ||
| "path": "[TEST_TMP_DIR]", | ||
| "type": "jar" | ||
| "path": "[TEST_TMP_DIR]" | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| variables: | ||
| my_variable: | ||
| type: "complex" | ||
| default: | ||
| key: "value" | ||
|
|
||
| my_variable_invalid: | ||
| type: "INVALID_TYPE" | ||
| default: "value" | ||
|
|
||
| my_variable_type_missing: | ||
| default: "value" | ||
|
|
||
| my_variable_type_empty: | ||
| type: "" | ||
| default: "value" | ||
|
|
||
| resources: | ||
| jobs: | ||
| my_job_valid: | ||
| tasks: | ||
| - task_key: "task1" | ||
| # Valid enum value | ||
| run_if: "ALL_SUCCESS" | ||
| notebook_task: | ||
| # Valid enum value | ||
| source: "GIT" | ||
| notebook_path: "/path/to/notebook" | ||
| new_cluster: | ||
| # Valid enum values | ||
| runtime_engine: "PHOTON" | ||
| data_security_mode: "SINGLE_USER" | ||
| aws_attributes: | ||
| availability: "ON_DEMAND" | ||
| ebs_volume_type: "GENERAL_PURPOSE_SSD" | ||
| node_type_id: "i3.xlarge" | ||
| num_workers: 1 | ||
|
|
||
| my_job_invalid: | ||
| tasks: | ||
| - task_key: "task2" | ||
| # Invalid enum value - should trigger warning | ||
| run_if: "INVALID_CONDITION" | ||
| notebook_task: | ||
| # Invalid enum value - should trigger warning | ||
| source: "INVALID_SOURCE" | ||
| notebook_path: "/path/to/notebook" | ||
| new_cluster: | ||
| # Invalid enum values - should trigger warnings | ||
| runtime_engine: "INVALID_ENGINE" | ||
| data_security_mode: "INVALID_MODE" | ||
| aws_attributes: | ||
| availability: "INVALID_AVAILABILITY" | ||
| ebs_volume_type: "INVALID_VOLUME_TYPE" | ||
| node_type_id: "i3.xlarge" | ||
| num_workers: 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| Local = true | ||
| Cloud = false | ||
|
|
||
| [EnvMatrix] | ||
| DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
|
|
||
| >>> [CLI] bundle validate | ||
| Warning: invalid value "" for enum field. Valid values are [complex] | ||
| at variables.my_variable_type_empty.type | ||
| in databricks.yml:45:11 | ||
|
|
||
| Warning: invalid value "INVALID_AVAILABILITY" for enum field. Valid values are [ON_DEMAND SPOT SPOT_WITH_FALLBACK] | ||
| at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.availability | ||
| in databricks.yml:9:29 | ||
|
|
||
| Warning: invalid value "INVALID_CONDITION" for enum field. Valid values are [ALL_DONE ALL_FAILED ALL_SUCCESS AT_LEAST_ONE_FAILED AT_LEAST_ONE_SUCCESS NONE_FAILED] | ||
| at resources.jobs.my_job_invalid.tasks[0].run_if | ||
| in databricks.yml:18:19 | ||
|
|
||
| Warning: invalid value "INVALID_ENGINE" for enum field. Valid values are [NULL PHOTON STANDARD] | ||
| at resources.jobs.my_job_invalid.tasks[0].new_cluster.runtime_engine | ||
| in databricks.yml:14:29 | ||
|
|
||
| Warning: invalid value "INVALID_MODE" for enum field. Valid values are [DATA_SECURITY_MODE_AUTO DATA_SECURITY_MODE_DEDICATED DATA_SECURITY_MODE_STANDARD LEGACY_PASSTHROUGH LEGACY_SINGLE_USER LEGACY_SINGLE_USER_STANDARD LEGACY_TABLE_ACL NONE SINGLE_USER USER_ISOLATION] | ||
| at resources.jobs.my_job_invalid.tasks[0].new_cluster.data_security_mode | ||
| in databricks.yml:11:33 | ||
|
|
||
| Warning: invalid value "INVALID_SOURCE" for enum field. Valid values are [GIT WORKSPACE] | ||
| at resources.jobs.my_job_invalid.tasks[0].notebook_task.source | ||
| in databricks.yml:17:21 | ||
|
|
||
| Warning: invalid value "INVALID_TYPE" for enum field. Valid values are [complex] | ||
| at variables.my_variable_invalid.type | ||
| in databricks.yml:42:11 | ||
|
|
||
| Warning: invalid value "INVALID_VOLUME_TYPE" for enum field. Valid values are [GENERAL_PURPOSE_SSD THROUGHPUT_OPTIMIZED_HDD] | ||
| at resources.jobs.my_job_invalid.tasks[0].new_cluster.aws_attributes.ebs_volume_type | ||
| in databricks.yml:10:32 | ||
|
|
||
| Name: test-bundle | ||
| Target: default | ||
| Workspace: | ||
| User: [USERNAME] | ||
| Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default | ||
|
|
||
| Found 8 warnings |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| trace $CLI bundle validate |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| package validate | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "slices" | ||
| "sort" | ||
|
|
||
| "github.com/databricks/cli/bundle" | ||
| "github.com/databricks/cli/bundle/internal/validation/generated" | ||
| "github.com/databricks/cli/libs/diag" | ||
| "github.com/databricks/cli/libs/dyn" | ||
| ) | ||
|
|
||
| type enum struct{} | ||
|
|
||
| func Enum() bundle.Mutator { | ||
| return &enum{} | ||
| } | ||
|
|
||
| func (f *enum) Name() string { | ||
| return "validate:enum" | ||
| } | ||
|
|
||
| func (f *enum) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { | ||
| diags := diag.Diagnostics{} | ||
|
|
||
| // Generate prefix tree for all enum fields. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious how much time does it take. Can we add debug message with stats? "Prefix tree with 125 nodes generated in 125ms"
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same for the actual validation below.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the PR to include benchmarks for this. The times are pretty comparable to the required validation, with a really big bundle taking around 800ms. The prefix tree itself takes 50 microseconds to generate. We also now have actual hard data for per mutator times. From what I recall, the required validation rarely takes long enough to even register in the minimum 1ms threshold. We can monitor actual validation times that customers see to determine if this is something we need to optimize. |
||
| trie := &dyn.TrieNode{} | ||
| for k := range generated.EnumFields { | ||
| pattern, err := dyn.NewPatternFromString(k) | ||
| if err != nil { | ||
| return diag.FromErr(fmt.Errorf("invalid pattern %q for enum field validation: %w", k, err)) | ||
| } | ||
|
|
||
| err = trie.Insert(pattern) | ||
| if err != nil { | ||
| return diag.FromErr(fmt.Errorf("failed to insert pattern %q into trie: %w", k, err)) | ||
| } | ||
| } | ||
|
|
||
| err := dyn.WalkReadOnly(b.Config.Value(), func(p dyn.Path, v dyn.Value) error { | ||
| // If the path is not found in the prefix tree, we do not need to validate any enum | ||
| // fields in it. | ||
| pattern, ok := trie.SearchPath(p) | ||
| if !ok { | ||
| return nil | ||
| } | ||
|
|
||
| // Skip validation if the value is not set | ||
| if v.Kind() == dyn.KindInvalid || v.Kind() == dyn.KindNil { | ||
|
shreyas-goenka marked this conversation as resolved.
Outdated
|
||
| return nil | ||
| } | ||
|
|
||
| // Get the string value for comparison | ||
| strValue, ok := v.AsString() | ||
| if !ok { | ||
| return nil | ||
| } | ||
|
|
||
| cloneP := slices.Clone(p) | ||
|
shreyas-goenka marked this conversation as resolved.
Outdated
|
||
|
|
||
| // Get valid values for this pattern | ||
| validValues := generated.EnumFields[pattern.String()] | ||
|
|
||
| // Check if the value is in the list of valid enum values | ||
| validValue := false | ||
| for _, valid := range validValues { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is not efficient, and should ideally be a set or a map. But given that these lists only have 1-10 elements, the CPU time here will not make a difference in practice at all. |
||
| if strValue == valid { | ||
|
shreyas-goenka marked this conversation as resolved.
Outdated
|
||
| validValue = true | ||
| break | ||
| } | ||
| } | ||
|
|
||
| if !validValue { | ||
| diags = diags.Append(diag.Diagnostic{ | ||
| Severity: diag.Warning, | ||
| Summary: fmt.Sprintf("invalid value %q for enum field. Valid values are %v", strValue, validValues), | ||
| Locations: v.Locations(), | ||
| Paths: []dyn.Path{cloneP}, | ||
| }) | ||
| } | ||
|
|
||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return diag.FromErr(err) | ||
| } | ||
|
|
||
| // Sort diagnostics to make them deterministic | ||
| sort.Slice(diags, func(i, j int) bool { | ||
| // First sort by summary | ||
| if diags[i].Summary != diags[j].Summary { | ||
| return diags[i].Summary < diags[j].Summary | ||
| } | ||
|
|
||
| // Then sort by locations as a tie breaker if summaries are the same. | ||
|
shreyas-goenka marked this conversation as resolved.
|
||
| iLocs := fmt.Sprintf("%v", diags[i].Locations) | ||
| jLocs := fmt.Sprintf("%v", diags[j].Locations) | ||
| return iLocs < jLocs | ||
|
shreyas-goenka marked this conversation as resolved.
|
||
| }) | ||
|
|
||
| return diags | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was this removed? AFAIK this specifically tests non-whl artifacts type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"jar" is not a supported value for
type. Onlywhlis. If we do not remove it, an unnecessary warning would be serialized in the test output.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we have docs that says otherwise:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough. "jar" is non functional, but I've added it to the list of valid values for type and revert this change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding it separately in this PR: https://github.com/databricks/cli/pull/3367/files