Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .coverage
Binary file not shown.
114 changes: 111 additions & 3 deletions json2xml/dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numbers
from collections.abc import Callable, Sequence
from random import SystemRandom
from typing import Any, Union
from typing import Any, Union, cast

from defusedxml.minidom import parseString

Expand Down Expand Up @@ -188,6 +188,79 @@ def default_item_func(parent: str) -> str:
return "item"


# XPath 3.1 json-to-xml conversion
# Spec: https://www.w3.org/TR/xpath-functions-31/#json-to-xml-mapping
XPATH_FUNCTIONS_NS = "http://www.w3.org/2005/xpath-functions"


def get_xpath31_tag_name(val: Any) -> str:
"""
Determine XPath 3.1 tag name by Python type.

See: https://www.w3.org/TR/xpath-functions-31/#func-json-to-xml

Args:
val: The value to get the tag name for.

Returns:
str: The XPath 3.1 tag name (map, array, string, number, boolean, null).
"""
if val is None:
return "null"
if isinstance(val, bool):
return "boolean"
if isinstance(val, dict):
return "map"
if isinstance(val, (int, float, numbers.Number)):
return "number"
if isinstance(val, str):
return "string"
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
if isinstance(val, (bytes, bytearray)):
return "string"
if isinstance(val, Sequence):
return "array"
return "string"


def convert_to_xpath31(obj: Any, parent_key: str | None = None) -> str:
"""
Convert a Python object to XPath 3.1 json-to-xml format.

See: https://www.w3.org/TR/xpath-functions-31/#json-to-xml-mapping

Args:
obj: The object to convert.
parent_key: The key from the parent dict (used for key attribute).

Returns:
str: XML string in XPath 3.1 format.
"""
key_attr = f' key="{escape_xml(parent_key)}"' if parent_key is not None else ""
tag_name = get_xpath31_tag_name(obj)

if tag_name == "null":
return f"<null{key_attr}/>"

if tag_name == "boolean":
return f"<boolean{key_attr}>{str(obj).lower()}</boolean>"

if tag_name == "number":
return f"<number{key_attr}>{obj}</number>"
Comment on lines +243 to +253
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Non-string dict keys will raise when building the key attribute instead of being handled gracefully.

In convert_to_xpath31, parent_key is passed directly to escape_xml, which assumes a string. When iterating for k, v in obj.items(), non-string keys (ints, enums, tuples, etc.) will cause a TypeError. To align with the original dicttoxml behavior and keep the function resilient, normalize keys before escaping, e.g.:

key_str = str(parent_key) if parent_key is not None else None
key_attr = f' key="{escape_xml(key_str)}"' if key_str is not None else ""

This keeps non-string keys supported while still escaping safely.


if tag_name == "string":
return f"<string{key_attr}>{escape_xml(str(obj))}</string>"

if tag_name == "map":
children = "".join(convert_to_xpath31(v, k) for k, v in obj.items())
return f"<map{key_attr}>{children}</map>"

if tag_name == "array":
children = "".join(convert_to_xpath31(item) for item in obj)
return f"<array{key_attr}>{children}</array>"

return f"<string{key_attr}>{escape_xml(str(obj))}</string>"


def convert(
obj: ELEMENT,
ids: Any,
Expand Down Expand Up @@ -233,7 +306,7 @@ def convert(
return convert_none(key=item_name, attr_type=attr_type, cdata=cdata)

if isinstance(obj, dict):
return convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers)
return convert_dict(cast("dict[str, Any]", obj), ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers)

if isinstance(obj, Sequence):
return convert_list(obj, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers)
Expand Down Expand Up @@ -563,7 +636,8 @@ def dicttoxml(
item_func: Callable[[str], str] = default_item_func,
cdata: bool = False,
xml_namespaces: dict[str, Any] = {},
list_headers: bool = False
list_headers: bool = False,
xpath_format: bool = False,
) -> bytes:
"""
Converts a python object into XML.
Expand Down Expand Up @@ -652,6 +726,28 @@ def dicttoxml(
<Bike><frame_color>red</frame_color></Bike>
<Bike><frame_color>green</frame_color></Bike>

:param bool xpath_format:
Default is False
When True, produces XPath 3.1 json-to-xml compliant output as specified
by W3C (https://www.w3.org/TR/xpath-functions-31/#func-json-to-xml).
Uses type-based element names (map, array, string, number, boolean, null)
with key attributes and the http://www.w3.org/2005/xpath-functions namespace.

Example:

.. code-block:: python

{"name": "John", "age": 30}

results in

.. code-block:: xml

<map xmlns="http://www.w3.org/2005/xpath-functions">
<string key="name">John</string>
<number key="age">30</number>
</map>

Dictionaries-keys with special char '@' has special meaning:
@attrs: This allows custom xml attributes:

Expand Down Expand Up @@ -681,6 +777,18 @@ def dicttoxml(
<list a="b" c="d"><item>4</item><item>5</item><item>6</item></list>

"""
if xpath_format:
xml_content = convert_to_xpath31(obj)
output = [
'<?xml version="1.0" encoding="UTF-8" ?>',
xml_content.replace("<map", f'<map xmlns="{XPATH_FUNCTIONS_NS}"', 1)
if xml_content.startswith("<map")
else xml_content.replace("<array", f'<array xmlns="{XPATH_FUNCTIONS_NS}"', 1)
if xml_content.startswith("<array")
else f'<map xmlns="{XPATH_FUNCTIONS_NS}">{xml_content}</map>',
]
return "".join(output).encode("utf-8")

output = []
namespace_str = ""
for prefix in xml_namespaces:
Expand Down
5 changes: 4 additions & 1 deletion json2xml/json2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,21 @@ class Json2xml:
"""
def __init__(
self,
data: dict[str, Any] | None = None,
data: dict[str, Any] | list[Any] | None = None,
wrapper: str = "all",
root: bool = True,
pretty: bool = True,
attr_type: bool = True,
item_wrap: bool = True,
xpath_format: bool = False,
):
self.data = data
self.pretty = pretty
self.wrapper = wrapper
self.attr_type = attr_type
self.root = root
self.item_wrap = item_wrap
self.xpath_format = xpath_format

def to_xml(self) -> Any | None:
"""
Expand All @@ -39,6 +41,7 @@ def to_xml(self) -> Any | None:
custom_root=self.wrapper,
attr_type=self.attr_type,
item_wrap=self.item_wrap,
xpath_format=self.xpath_format,
)
if self.pretty:
try:
Expand Down
86 changes: 86 additions & 0 deletions tests/test_json2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,89 @@ def test_encoding_without_pretty_print(self) -> None:
xmldata = json2xml.Json2xml(data, pretty=False).to_xml()
if xmldata:
assert b'encoding="UTF-8"' in xmldata

def test_xpath_format_basic(self) -> None:
"""Test XPath 3.1 json-to-xml format with basic types."""
data = {"name": "John", "age": 30, "active": True}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +235 to +236
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'xmlns="http://www.w3.org/2005/xpath-functions"' in xmldata
assert b'<string key="name">John</string>' in xmldata
assert b'<number key="age">30</number>' in xmldata
assert b'<boolean key="active">true</boolean>' in xmldata

def test_xpath_format_nested_dict(self) -> None:
"""Test XPath 3.1 format with nested dictionaries."""
data = {"person": {"name": "Alice", "age": 25}}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +245 to +246
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<map key="person">' in xmldata
assert b'<string key="name">Alice</string>' in xmldata
assert b'<number key="age">25</number>' in xmldata

def test_xpath_format_array(self) -> None:
"""Test XPath 3.1 format with arrays."""
data = {"numbers": [1, 2, 3]}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +254 to +255
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<array key="numbers">' in xmldata
assert b'<number>1</number>' in xmldata
assert b'<number>2</number>' in xmldata
assert b'<number>3</number>' in xmldata

def test_xpath_format_null(self) -> None:
"""Test XPath 3.1 format with null values."""
data = {"value": None}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +264 to +265
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<null key="value"/>' in xmldata

def test_xpath_format_mixed_array(self) -> None:
"""Test XPath 3.1 format with mixed type arrays."""
data = {"items": ["text", 42, True, None]}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +271 to +272
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<array key="items">' in xmldata
assert b'<string>text</string>' in xmldata
assert b'<number>42</number>' in xmldata
assert b'<boolean>true</boolean>' in xmldata
assert b'<null/>' in xmldata

def test_xpath_format_complex_nested(self) -> None:
"""Test XPath 3.1 format with complex nested structures."""
data = {
"content": [
{"id": 70805774, "value": "1001", "position": [1004.0, 288.0]},
]
}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +286 to +287
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<array key="content">' in xmldata
assert b'<number key="id">70805774</number>' in xmldata
assert b'<string key="value">1001</string>' in xmldata
assert b'<array key="position">' in xmldata
assert b'<number>1004.0</number>' in xmldata
Comment on lines +279 to +292
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): It may be helpful to assert on the root element shape to prove there is no extra wrapper in XPath mode

Since this test currently validates only inner fragments, consider also asserting on the top-level element to confirm the XPath root shape. For example, check that the root is <map> with the expected namespace and that no legacy wrapper like <all> is present. That will verify xpath_format=True affects the root correctly, not just nested nodes.

Suggested change
def test_xpath_format_complex_nested(self) -> None:
"""Test XPath 3.1 format with complex nested structures."""
data = {
"content": [
{"id": 70805774, "value": "1001", "position": [1004.0, 288.0]},
]
}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
assert b'<array key="content">' in xmldata
assert b'<number key="id">70805774</number>' in xmldata
assert b'<string key="value">1001</string>' in xmldata
assert b'<array key="position">' in xmldata
assert b'<number>1004.0</number>' in xmldata
def test_xpath_format_complex_nested(self) -> None:
"""Test XPath 3.1 format with complex nested structures."""
data = {
"content": [
{"id": 70805774, "value": "1001", "position": [1004.0, 288.0]},
]
}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
# Assert on the root element shape for XPath mode: a <map> with the XPath namespace
# and no legacy wrapper such as <all>.
assert b'<map xmlns="http://www.w3.org/2005/xpath-functions"' in xmldata
assert b'<all>' not in xmldata
# Existing inner-fragment assertions
assert b'<array key="content">' in xmldata
assert b'<number key="id">70805774</number>' in xmldata
assert b'<string key="value">1001</string>' in xmldata
assert b'<array key="position">' in xmldata
assert b'<number>1004.0</number>' in xmldata


def test_xpath_format_escaping(self) -> None:
"""Test XPath 3.1 format properly escapes special characters."""
data = {"text": "<script>alert('xss')</script>"}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +297 to +298
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b"&lt;script&gt;" in xmldata
assert b"&apos;xss&apos;" in xmldata
Comment on lines +294 to +300
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider also asserting that unsafe characters are not present unescaped alongside the escaped ones

For example, you could extend this test with negative assertions such as assert b"<script>" not in xmldata and assert b"'xss'" not in xmldata to verify the raw, unescaped content never appears in the output.

Suggested change
def test_xpath_format_escaping(self) -> None:
"""Test XPath 3.1 format properly escapes special characters."""
data = {"text": "<script>alert('xss')</script>"}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
assert b"&lt;script&gt;" in xmldata
assert b"&apos;xss&apos;" in xmldata
def test_xpath_format_escaping(self) -> None:
"""Test XPath 3.1 format properly escapes special characters."""
data = {"text": "<script>alert('xss')</script>"}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
# Escaped content should be present
assert b"&lt;script&gt;" in xmldata
assert b"&apos;xss&apos;" in xmldata
# Raw, unescaped content must not appear
assert b"<script>" not in xmldata
assert b"'xss'" not in xmldata


def test_xpath_format_with_pretty_print(self) -> None:
"""Test XPath 3.1 format works with pretty printing."""
data = {"name": "Test"}
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=True).to_xml()
if xmldata:
Comment on lines +305 to +306
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=True).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=True
).to_xml():

assert 'xmlns="http://www.w3.org/2005/xpath-functions"' in xmldata
assert '<string key="name">Test</string>' in xmldata
Comment on lines +302 to +308
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): xmldata is likely bytes here, so these assertions should consistently work with bytes or decode to str

Earlier in this file xmldata is treated as bytes (e.g. b'xmlns=...' in xmldata), but here the assertions use str. If Json2xml.to_xml() returns bytes, these will fail on Python 3 due to str/bytes mismatch. Please either make these assertions use byte literals (e.g. b'xmlns="..."') or decode xmldata first (e.g. xmldata_str = xmldata.decode("utf-8")) and assert on that, so the pretty-print behavior is actually tested rather than tripping on a type error.


def test_xpath_format_root_array(self) -> None:
"""Test XPath 3.1 format with root-level array."""
data = [1, 2, 3]
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
Comment on lines +313 to +314
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): Use named expression to simplify assignment and conditional (use-named-expression)

Suggested change
xmldata = json2xml.Json2xml(data, xpath_format=True, pretty=False).to_xml()
if xmldata:
if xmldata := json2xml.Json2xml(
data, xpath_format=True, pretty=False
).to_xml():

assert b'<array xmlns="http://www.w3.org/2005/xpath-functions">' in xmldata
assert b'<number>1</number>' in xmldata
Loading
Loading