Skip to content

Commit 2fb5559

Browse files
kanterovanton-107
authored andcommitted
[Python] Fix unicode support (#2873)
## Changes Fix the Unicode support in the Python mutator. By default, `json.dump` escapes non-ASCII characters. We use the `ensure_ascii=False` to preserve the Unicode characters. ## Why Fixes #2859 ## Tests Acceptance and unit tests
1 parent c126167 commit 2fb5559

8 files changed

Lines changed: 108 additions & 7 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
* Preserve folder structure for app source code in bundle generate ([#2848](https://github.com/databricks/cli/pull/2848))
1616
* Fixed normalising requirements file path in dependencies section ([#2861](https://github.com/databricks/cli/pull/2861))
1717
* Fix default-python template not to add environments when serverless=yes and include\_python=no ([#2866](https://github.com/databricks/cli/pull/2866))
18+
* Fixed handling of Unicode characters in Python support ([#2873](https://github.com/databricks/cli/pull/2873))
1819

1920
### API Changes
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
bundle:
2+
name: my_project
3+
4+
sync: { paths: [] } # don't need to copy files
5+
6+
experimental:
7+
python:
8+
resources:
9+
- "resources:load_resources"
10+
11+
resources:
12+
jobs:
13+
job_1:
14+
name: "🔥🔥🔥"
15+
16+
variables:
17+
my_variable:
18+
default: "my_variable"
19+
description: "🔥🔥🔥"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
>>> uv run --with-requirements requirements-latest.txt --no-cache -q [CLI] bundle validate --output json
3+
Warning: This is a warning message with unicode characters: 🔥🔥🔥
4+
5+
{
6+
"variables": {
7+
"my_variable": {
8+
"default": "my_variable",
9+
"description": "🔥🔥🔥",
10+
"value": "my_variable"
11+
}
12+
},
13+
"resources": {
14+
"jobs": {
15+
"job_1": {
16+
"deployment": {
17+
"kind": "BUNDLE",
18+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json"
19+
},
20+
"edit_mode": "UI_LOCKED",
21+
"format": "MULTI_TASK",
22+
"max_concurrent_runs": 1,
23+
"name": "🔥🔥🔥",
24+
"permissions": [],
25+
"queue": {
26+
"enabled": true
27+
}
28+
},
29+
"job_2": {
30+
"deployment": {
31+
"kind": "BUNDLE",
32+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_project/default/state/metadata.json"
33+
},
34+
"edit_mode": "UI_LOCKED",
35+
"format": "MULTI_TASK",
36+
"max_concurrent_runs": 1,
37+
"name": "🔥🔥🔥",
38+
"permissions": [],
39+
"queue": {
40+
"enabled": true
41+
}
42+
}
43+
}
44+
}
45+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from databricks.bundles.core import Resources, Diagnostics
2+
3+
4+
def load_resources() -> Resources:
5+
resources = Resources()
6+
7+
resources.add_job("job_2", {"name": "🔥🔥🔥"})
8+
9+
resources.add_diagnostics(
10+
Diagnostics.create_warning("This is a warning message with unicode characters: 🔥🔥🔥"),
11+
)
12+
13+
return resources
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
echo "$DATABRICKS_BUNDLES_WHEEL" > "requirements-latest.txt"
2+
3+
trace uv run $UV_ARGS -q $CLI bundle validate --output json | \
4+
jq "pick(.variables, .resources)"
5+
6+
rm -fr .databricks __pycache__
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Local = true
2+
Cloud = false # tests don't interact with APIs
3+
4+
[EnvMatrix]
5+
UV_ARGS = [
6+
# only fixed in the latest version
7+
"--with-requirements requirements-latest.txt --no-cache",
8+
]

experimental/python/databricks/bundles/build.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def _apply_mutators_for_type(
169169
def python_mutator(
170170
args: _Args,
171171
) -> tuple[dict, dict[tuple[str, ...], Location], Diagnostics]:
172-
input = json.load(open(args.input))
172+
input = json.load(open(args.input, encoding="utf-8"))
173173
experimental = input.get("experimental", {})
174174

175175
if experimental.get("pydabs", {}) != {}:
@@ -446,14 +446,14 @@ def main(argv: list[str]) -> int:
446446
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
447447
new_bundle, locations, diagnostics = python_mutator(args)
448448

449-
with open(args.diagnostics, "w") as f:
449+
with open(args.diagnostics, "w", encoding="utf-8") as f:
450450
_write_diagnostics(f, diagnostics)
451451

452452
if locations_path := args.locations:
453-
with open(locations_path, "w") as f:
453+
with open(locations_path, "w", encoding="utf-8") as f:
454454
_write_locations(f, locations)
455455

456-
with open(args.output, "w") as f:
456+
with open(args.output, "w", encoding="utf-8") as f:
457457
_write_output(f, new_bundle)
458458

459459
return 1 if diagnostics.has_error() else 0
@@ -466,12 +466,12 @@ def _write_diagnostics(f: TextIO, diagnostics: Diagnostics) -> None:
466466
if obj.get("path"):
467467
obj["path"] = ".".join(obj["path"])
468468

469-
json.dump(obj, f)
469+
json.dump(obj, f, ensure_ascii=False)
470470
f.write("\n")
471471

472472

473473
def _write_output(f: TextIO, bundle: dict) -> None:
474-
json.dump(bundle, f)
474+
json.dump(bundle, f, ensure_ascii=False)
475475

476476

477477
def _relativize_locations(
@@ -503,7 +503,7 @@ def _write_locations(f: TextIO, locations: dict[tuple[str, ...], Location]) -> N
503503
for path, location in locations.items():
504504
obj = {"path": ".".join(path), **location.as_dict()}
505505

506-
json.dump(obj, f)
506+
json.dump(obj, f, ensure_ascii=False)
507507
f.write("\n")
508508

509509

experimental/python/databricks_tests/test_build.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_relativize_location,
1717
_write_diagnostics,
1818
_write_locations,
19+
_write_output,
1920
)
2021
from databricks.bundles.core import (
2122
Bundle,
@@ -100,6 +101,14 @@ def test_write_location():
100101
)
101102

102103

104+
def test_write_output_unicode():
105+
out = StringIO()
106+
107+
_write_output(out, {"unicode": "🔥🔥🔥"})
108+
109+
assert out.getvalue() == '{"unicode": "🔥🔥🔥"}'
110+
111+
103112
def test_relativize_location():
104113
file = Path("bar.py").absolute().as_posix()
105114
location = Location(file=file, line=42, column=1)

0 commit comments

Comments
 (0)