From 4892c7ca312f26463a062b485a06d7d270fe3b80 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Thu, 28 May 2026 15:05:17 +0200 Subject: [PATCH 1/6] Add tests for compound-literal --- tests/runtests.py | 3 --- tests/test_jsonld.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/tests/runtests.py b/tests/runtests.py index 77ac1cc8..2e4709a9 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -995,9 +995,6 @@ def write(self, filename): 'skip': { 'specVersion': ['json-ld-1.0'], 'idRegex': [ - # direction (compound-literal) - '.*fromRdf-manifest#tdi11$', - '.*fromRdf-manifest#tdi12$', # uncategorized '.*fromRdf-manifest#t0027$', ], diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index 2f35a063..d3101e95 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -925,6 +925,59 @@ def test_conflicting_property_names_in_nested_node(self): nquads = jsonld.to_rdf(input, options={'format': 'application/n-quads'}) assert nquads == expected +class TestFromRDF: + def test_compound_literal_direction_without_language(self): + """ + Compound literals with rdf:direction should become JSON-LD value + objects when rdfDirection is compound-literal. + """ + input = """ + _:cl1 . + _:cl1 "no language" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': 'http://example.com/a', + 'http://example.org/label': [ + {'@value': 'no language', '@direction': 'rtl'} + ], + } + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected + + def test_compound_literal_direction_with_language(self): + """ + Compound literals with rdf:language should preserve the language + when rdfDirection is compound-literal. + """ + input = """ + _:cl1 . + _:cl1 "en-US" . + _:cl1 "en-us" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': 'http://example.com/a', + 'http://example.org/label': [ + { + '@value': 'en-US', + '@language': 'en-us', + '@direction': 'rtl', + } + ], + } + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected class TestCompact: # Issue 59 - PR: https://github.com/digitalbazaar/pyld/pull/60 From e9fa411b51ae46c55b8eaae81df451d4ee9ae047 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Thu, 28 May 2026 15:17:13 +0200 Subject: [PATCH 2/6] Implement compound-literal to value conversion --- lib/pyld/jsonld.py | 116 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index d3721ccf..c4b50aa7 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -79,6 +79,9 @@ RDF_TYPE = RDF + 'type' RDF_LANGSTRING = RDF + 'langString' RDF_JSON_LITERAL = RDF + 'JSON' +RDF_VALUE = RDF + 'value' +RDF_LANGUAGE = RDF + 'language' +RDF_DIRECTION = RDF + 'direction' # BCP47 REGEX_BCP47 = r'^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$' @@ -315,6 +318,8 @@ def from_rdf(input_, options=None): [useRdfType] True to use rdf:type, False to use @type (default: False). [useNativeTypes] True to convert XSD types into native types (boolean, integer, double), False not to (default: True). + [rdfDirection] Either 'i18n-datatype' or 'compound-literal' + is supported. (default: None) :return: the JSON-LD output. """ @@ -1000,7 +1005,8 @@ def from_rdf(self, dataset, options): (default: False). [useNativeTypes] True to convert XSD types into native types (boolean, integer, double), False not to (default: False). - [rdfDirection] Only 'i18n-datatype' is supported. (default: None) + [rdfDirection] Either 'i18n-datatype' or 'compound-literal' + is supported. (default: None) :return: the JSON-LD output. """ @@ -2976,8 +2982,14 @@ def _from_rdf(self, dataset, options): 'value': value, } - # convert linked lists to @list arrays for _name, graph_object in graph_map.items(): + # Convert compound-literal blank nodes before list conversion, so + # list items can also contain directional value objects. + if options['rdfDirection'] == 'compound-literal': + self._rdf_direction_to_compound_literal(graph_object) + + # convert linked lists to @list arrays + # no @lists to be converted, continue if RDF_NIL not in graph_object: continue @@ -3056,6 +3068,106 @@ def _from_rdf(self, dataset, options): return result + def _rdf_direction_to_compound_literal(self, graph_object): + """ + Replace RDF compound-literal blank nodes with JSON-LD value objects. + + The RDF encoding uses a blank node with rdf:value, rdf:direction, + and optionally rdf:language. The JSON-LD form stores those fields + directly on the referencing value object, so this updates the graph + object in place just like the RDF list conversion below does. + """ + # map with blank node id to the JSON-LD value object + # that should replace references to that blank node. + compound_literals = {} + # maps for not-yet-seen blank node id to the list + # positions where a reference to it was found. + pending_references = {} + + # Iterate over the graph. If a reference points to a compound literal + # already seen, replace it immediately. Otherwise, store it in + # pending_references and patch it if that target is identified later. + for id_, node in graph_object.items(): + value = self._compound_literal_to_value(id_, node) + if value is not None: + compound_literals[id_] = value + # Patch any earlier references to this blank node now that we + # know it is a compound literal. + for values, index in pending_references.pop(id_, []): + values[index] = value + + # Scan every array-valued property for node references. A + # compound literal can be the object of any predicate, including + # rdf:first in a list. + for key, values in node.items(): + if key == '@id' or not _is_array(values): + continue + for index, item in enumerate(values): + if not _is_subject_reference(item): + continue + ref_id = item['@id'] + replacement = compound_literals.get(ref_id) + if replacement is not None: + values[index] = replacement + elif ref_id.startswith('_:'): + # Only blank node references can become compound + # literals; IRI references can be ignored here. + pending_references.setdefault(ref_id, []).append((values, index)) + + # The encoding blank nodes are no longer graph subjects once all + # references have been rewritten. + for id_ in compound_literals: + del graph_object[id_] + + def _compound_literal_to_value(self, id_, node): + """ + Return a JSON-LD value object if node has the compound-literal shape. + + A valid compound literal is a blank node with only @id, rdf:value, + rdf:direction, and optionally rdf:language. Each RDF property must + have exactly one JSON-LD value-object entry. + """ + allowed_keys = {RDF_VALUE, RDF_LANGUAGE, RDF_DIRECTION, '@id'} + + # Anything with extra properties is an ordinary node, not the special + # compound-literal encoding. + if ( + not id_.startswith('_:') + or set(node.keys()) - allowed_keys + or not self._is_single_rdf_value(node, RDF_VALUE) + or not self._is_single_rdf_value(node, RDF_DIRECTION) + ): + return None + + # Start from rdf:value so datatype/native literal handling already + # done by _rdf_to_object is preserved. + value = node[RDF_VALUE][0].copy() + direction = node[RDF_DIRECTION][0].get('@value') + if direction not in ['ltr', 'rtl']: + return None + + if RDF_LANGUAGE in node: + if not self._is_single_rdf_value(node, RDF_LANGUAGE): + return None + language = node[RDF_LANGUAGE][0].get('@value') + if not _is_string(language): + return None + value['@language'] = language + + value['@direction'] = direction + return value + + def _is_single_rdf_value(self, node, key): + """ + Return True when a node property has exactly one JSON-LD value object. + """ + return ( + key in node + and _is_array(node[key]) + and len(node[key]) == 1 + and _is_value(node[key][0]) + ) + def _process_context( self, active_ctx, From fdf6241f8426ffa0bf1d03117d66f4e894341de0 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Thu, 28 May 2026 22:13:53 +0200 Subject: [PATCH 3/6] Ammend changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb916af..5d43de43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,10 @@ which replaces the role of the static `MAX_CONTEXT_URLS`. The constructor now accepts a `max_context_urls` parameter that sets the value of `max_context_urls` which defaults to `MAX_CONTEXT_URLS`. - +- `pyld.fromRdf()` now supports compound literals when serializing RDF to JSON-LD. + It therefore also accepts the value `'compound-literal'` for the `'rdfDirection'` option. + Fixes [fromRdf#tdi11](https://w3c.github.io/json-ld-api/tests/fromRdf-manifest.html#tdi11) + and [fromRdf#tdi12](https://w3c.github.io/json-ld-api/tests/fromRdf-manifest.html#tdi12). ## 3.0.0 - 2026-04-02 From 2f203794d9e4044e1acec9e71179aa4a4fbc48ef Mon Sep 17 00:00:00 2001 From: Anatoly Scherbakov Date: Sat, 6 Jun 2026 21:47:35 +0400 Subject: [PATCH 4/6] Add compound literal from RDF regression tests --- tests/test_jsonld.py | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index d3101e95..fd54c6af 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -979,6 +979,72 @@ def test_compound_literal_direction_with_language(self): assert result == expected + def test_shared_compound_literal_blank_node_remains_node(self): + """ + Compound literal blank nodes must only be decoded when referenced once. + """ + input = """ + _:cl1 . + _:cl1 . + _:cl1 "shared" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': '_:cl1', + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#direction': [ + {'@value': 'rtl'} + ], + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value': [ + {'@value': 'shared'} + ], + }, + { + '@id': 'http://example.com/a', + 'http://example.org/label': [{'@id': '_:cl1'}], + }, + { + '@id': 'http://example.com/b', + 'http://example.org/label': [{'@id': '_:cl1'}], + }, + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected + + def test_compound_literal_invalid_direction_fails(self): + """ + Invalid rdf:direction values in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "bad" . + _:cl1 "up" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid base direction' + + def test_compound_literal_invalid_language_fails(self): + """ + Invalid rdf:language values in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "bad lang" . + _:cl1 "bad_lang" . + _:cl1 "rtl" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid language-tagged string' + class TestCompact: # Issue 59 - PR: https://github.com/digitalbazaar/pyld/pull/60 def test_compaction_with_and_without_explicit_datatypes(self): From 95344a297a213b135c4be95a9e02b97655987676 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 8 Jun 2026 10:08:47 +0200 Subject: [PATCH 5/6] Adjust docstring --- tests/test_jsonld.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index fd54c6af..e878a535 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -981,7 +981,7 @@ def test_compound_literal_direction_with_language(self): def test_shared_compound_literal_blank_node_remains_node(self): """ - Compound literal blank nodes must only be decoded when referenced once. + Compound literal blank nodes must only be decoded once when referenced. """ input = """ _:cl1 . From 1619c8e6e80d009d75bce62ccad4ebd8c6c7fa2e Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 8 Jun 2026 11:47:01 +0200 Subject: [PATCH 6/6] Add extra checks for irregular compound literals. Rewrite only unshared compound literals --- lib/pyld/jsonld.py | 92 ++++++++++++++++++++++++++++---------------- tests/test_jsonld.py | 16 ++++++++ 2 files changed, 74 insertions(+), 34 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index c4b50aa7..332b9d90 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -3077,28 +3077,21 @@ def _rdf_direction_to_compound_literal(self, graph_object): directly on the referencing value object, so this updates the graph object in place just like the RDF list conversion below does. """ - # map with blank node id to the JSON-LD value object - # that should replace references to that blank node. + # Decode candidate blank nodes up front. Ordinary nodes return None; + # malformed compound literals raise before any graph mutation happens. compound_literals = {} - # maps for not-yet-seen blank node id to the list - # positions where a reference to it was found. - pending_references = {} - - # Iterate over the graph. If a reference points to a compound literal - # already seen, replace it immediately. Otherwise, store it in - # pending_references and patch it if that target is identified later. for id_, node in graph_object.items(): value = self._compound_literal_to_value(id_, node) if value is not None: compound_literals[id_] = value - # Patch any earlier references to this blank node now that we - # know it is a compound literal. - for values, index in pending_references.pop(id_, []): - values[index] = value - - # Scan every array-valued property for node references. A - # compound literal can be the object of any predicate, including - # rdf:first in a list. + + if not compound_literals: + return + + # Track where each compound literal is referenced. Only the first two + # locations matter: one reference can be inlined, two means shared. + references = {} + for node in graph_object.values(): for key, values in node.items(): if key == '@id' or not _is_array(values): continue @@ -3106,17 +3099,22 @@ def _rdf_direction_to_compound_literal(self, graph_object): if not _is_subject_reference(item): continue ref_id = item['@id'] - replacement = compound_literals.get(ref_id) - if replacement is not None: - values[index] = replacement - elif ref_id.startswith('_:'): - # Only blank node references can become compound - # literals; IRI references can be ignored here. - pending_references.setdefault(ref_id, []).append((values, index)) - - # The encoding blank nodes are no longer graph subjects once all - # references have been rewritten. - for id_ in compound_literals: + if ref_id not in compound_literals: + continue + locations = references.setdefault(ref_id, []) + if len(locations) < 2: + locations.append((values, index)) + + # Rewrite only unshared compound literals. Shared blank nodes carry + # graph identity, so their references must remain as @id references. + for id_, value in compound_literals.items(): + locations = references.get(id_, []) + if len(locations) != 1: + continue + values, index = locations[0] + values[index] = value + # The encoding blank node is no longer a graph subject once its + # only reference has been rewritten. del graph_object[id_] def _compound_literal_to_value(self, id_, node): @@ -3134,24 +3132,50 @@ def _compound_literal_to_value(self, id_, node): if ( not id_.startswith('_:') or set(node.keys()) - allowed_keys - or not self._is_single_rdf_value(node, RDF_VALUE) - or not self._is_single_rdf_value(node, RDF_DIRECTION) + or RDF_VALUE not in node + or RDF_DIRECTION not in node ): return None + if not self._is_single_rdf_value(node, RDF_VALUE): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:value must be a single value.', + 'jsonld.SyntaxError', + code='invalid value object', + ) + + if not self._is_single_rdf_value(node, RDF_DIRECTION): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:direction must be a single value.', + 'jsonld.InvalidBaseDirection', + code='invalid base direction', + ) + # Start from rdf:value so datatype/native literal handling already # done by _rdf_to_object is preserved. value = node[RDF_VALUE][0].copy() direction = node[RDF_DIRECTION][0].get('@value') if direction not in ['ltr', 'rtl']: - return None + raise JsonLdError( + 'Invalid JSON-LD syntax; @direction must be "ltr" or "rtl".', + 'jsonld.InvalidBaseDirection', + code='invalid base direction', + ) if RDF_LANGUAGE in node: if not self._is_single_rdf_value(node, RDF_LANGUAGE): - return None + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:language must be a single value.', + 'jsonld.InvalidLanguageTaggedString', + code='invalid language-tagged string', + ) language = node[RDF_LANGUAGE][0].get('@value') - if not _is_string(language): - return None + if not _is_string(language) or not re.match(REGEX_BCP47, language): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:language must be a valid BCP47 language.', + 'jsonld.InvalidLanguageTaggedString', + code='invalid language-tagged string', + ) value['@language'] = language value['@direction'] = direction diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index e878a535..47c1f999 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -1029,6 +1029,22 @@ def test_compound_literal_invalid_direction_fails(self): assert exc.value.code == 'invalid base direction' + def test_compound_literal_invalid_value_fails(self): + """ + Invalid rdf:value entries in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "one" . + _:cl1 "two" . + _:cl1 "rtl" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid value object' + def test_compound_literal_invalid_language_fails(self): """ Invalid rdf:language values in compound literals must fail.