diff --git a/CHANGELOG.md b/CHANGELOG.md index 37d1e47d..a8205cca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,7 +58,13 @@ which replaces the role of the static `MAX_CONTEXT_URLS`. The constructor now accepts a `max_context_urls` parameter that sets the value of `max_context_urls` which defaults to `MAX_CONTEXT_URLS`. - +- `pyld.fromRdf()` and `pyld.toRdf()` now support compound literals when + serializing/deserializing RDF to/from JSON-LD. Therefore, both methods accept + the value `'compound-literal'` for the `'rdfDirection'` option. Fixes + [fromRdf#tdi11](https://w3c.github.io/json-ld-api/tests/fromRdf-manifest.html#tdi11), + [fromRdf#tdi12](https://w3c.github.io/json-ld-api/tests/fromRdf-manifest.html#tdi12), + [toRdf#tdi11](https://w3c.github.io/json-ld-api/tests/toRdf-manifest.html#tdi11), and + [toRdf#tdi12](https://w3c.github.io/json-ld-api/tests/toRdf-manifest.html#tdi12). ## 3.0.0 - 2026-04-02 diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index d3721ccf..a60fcd36 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -79,6 +79,9 @@ RDF_TYPE = RDF + 'type' RDF_LANGSTRING = RDF + 'langString' RDF_JSON_LITERAL = RDF + 'JSON' +RDF_VALUE = RDF + 'value' +RDF_LANGUAGE = RDF + 'language' +RDF_DIRECTION = RDF + 'direction' # BCP47 REGEX_BCP47 = r'^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$' @@ -315,6 +318,8 @@ def from_rdf(input_, options=None): [useRdfType] True to use rdf:type, False to use @type (default: False). [useNativeTypes] True to convert XSD types into native types (boolean, integer, double), False not to (default: True). + [rdfDirection] Either 'i18n-datatype' or 'compound-literal' + is supported. (default: None) :return: the JSON-LD output. """ @@ -1000,7 +1005,8 @@ def from_rdf(self, dataset, options): (default: False). [useNativeTypes] True to convert XSD types into native types (boolean, integer, double), False not to (default: False). - [rdfDirection] Only 'i18n-datatype' is supported. (default: None) + [rdfDirection] Either 'i18n-datatype' or 'compound-literal' + is supported. (default: None) :return: the JSON-LD output. """ @@ -1051,7 +1057,7 @@ def to_rdf(self, input_, options): to produce only standard RDF (default: false). [documentLoader(url, options)] the document loader (default: _default_document_loader). - [rdfDirection] Only 'i18n-datatype' supported + [rdfDirection] Either 'i18n-datatype' or 'compound-literal' (default: None). :return: the resulting RDF dataset (or a serialization of it). @@ -2976,8 +2982,14 @@ def _from_rdf(self, dataset, options): 'value': value, } - # convert linked lists to @list arrays for _name, graph_object in graph_map.items(): + # Convert compound-literal blank nodes before list conversion, so + # list items can also contain directional value objects. + if options['rdfDirection'] == 'compound-literal': + self._rdf_direction_to_compound_literal(graph_object) + + # convert linked lists to @list arrays + # no @lists to be converted, continue if RDF_NIL not in graph_object: continue @@ -3056,6 +3068,130 @@ def _from_rdf(self, dataset, options): return result + def _rdf_direction_to_compound_literal(self, graph_object): + """ + Replace RDF compound-literal blank nodes with JSON-LD value objects. + + The RDF encoding uses a blank node with rdf:value, rdf:direction, + and optionally rdf:language. The JSON-LD form stores those fields + directly on the referencing value object, so this updates the graph + object in place just like the RDF list conversion below does. + """ + # Decode candidate blank nodes up front. Ordinary nodes return None; + # malformed compound literals raise before any graph mutation happens. + compound_literals = {} + for id_, node in graph_object.items(): + value = self._compound_literal_to_value(id_, node) + if value is not None: + compound_literals[id_] = value + + if not compound_literals: + return + + # Track where each compound literal is referenced. Only the first two + # locations matter: one reference can be inlined, two means shared. + references = {} + for node in graph_object.values(): + for key, values in node.items(): + if key == '@id' or not _is_array(values): + continue + for index, item in enumerate(values): + if not _is_subject_reference(item): + continue + ref_id = item['@id'] + if ref_id not in compound_literals: + continue + locations = references.setdefault(ref_id, []) + if len(locations) < 2: + locations.append((values, index)) + + # Rewrite only unshared compound literals. Shared blank nodes carry + # graph identity, so their references must remain as @id references. + for id_, value in compound_literals.items(): + locations = references.get(id_, []) + if len(locations) != 1: + continue + values, index = locations[0] + values[index] = value + # The encoding blank node is no longer a graph subject once its + # only reference has been rewritten. + del graph_object[id_] + + def _compound_literal_to_value(self, id_, node): + """ + Return a JSON-LD value object if node has the compound-literal shape. + + A valid compound literal is a blank node with only @id, rdf:value, + rdf:direction, and optionally rdf:language. Each RDF property must + have exactly one JSON-LD value-object entry. + """ + allowed_keys = {RDF_VALUE, RDF_LANGUAGE, RDF_DIRECTION, '@id'} + + # Anything with extra properties is an ordinary node, not the special + # compound-literal encoding. + if ( + not id_.startswith('_:') + or set(node.keys()) - allowed_keys + or RDF_VALUE not in node + or RDF_DIRECTION not in node + ): + return None + + if not self._is_single_rdf_value(node, RDF_VALUE): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:value must be a single value.', + 'jsonld.SyntaxError', + code='invalid value object', + ) + + if not self._is_single_rdf_value(node, RDF_DIRECTION): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:direction must be a single value.', + 'jsonld.InvalidBaseDirection', + code='invalid base direction', + ) + + # Start from rdf:value so datatype/native literal handling already + # done by _rdf_to_object is preserved. + value = node[RDF_VALUE][0].copy() + direction = node[RDF_DIRECTION][0].get('@value') + if direction not in ['ltr', 'rtl']: + raise JsonLdError( + 'Invalid JSON-LD syntax; @direction must be "ltr" or "rtl".', + 'jsonld.InvalidBaseDirection', + code='invalid base direction', + ) + + if RDF_LANGUAGE in node: + if not self._is_single_rdf_value(node, RDF_LANGUAGE): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:language must be a single value.', + 'jsonld.InvalidLanguageTaggedString', + code='invalid language-tagged string', + ) + language = node[RDF_LANGUAGE][0].get('@value') + if not _is_string(language) or not re.match(REGEX_BCP47, language): + raise JsonLdError( + 'Invalid JSON-LD syntax; rdf:language must be a valid BCP47 language.', + 'jsonld.InvalidLanguageTaggedString', + code='invalid language-tagged string', + ) + value['@language'] = language + + value['@direction'] = direction + return value + + def _is_single_rdf_value(self, node, key): + """ + Return True when a node property has exactly one JSON-LD value object. + """ + return ( + key in node + and _is_array(node[key]) + and len(node[key]) == 1 + and _is_value(node[key][0]) + ) + def _process_context( self, active_ctx, @@ -3781,8 +3917,7 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): :param item: the JSON-LD value or node object. :param issuer: the IdentifierIssuer for issuing blank node identifiers. :param triples: the array of triples to append list entries to. - :param rdf_direction: for creating datatyped literals. - :param rdf_direction: for creating datatyped literals. + :param rdf_direction: for creating directional literals. :return: the RDF literal or RDF resource. """ @@ -3824,6 +3959,44 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): elif _is_integer(value): object['value'] = str(value) object['datatype'] = datatype or XSD_INTEGER + elif rdf_direction == 'compound-literal' and '@direction' in item: + object['type'] = 'blank node' + object['value'] = issuer.get_id() + subject = {'type': 'blank node', 'value': object['value']} + triples.append( + { + 'subject': subject, + 'predicate': {'type': 'IRI', 'value': RDF_VALUE}, + 'object': { + 'type': 'literal', + 'value': value, + 'datatype': XSD_STRING, + }, + } + ) + triples.append( + { + 'subject': subject, + 'predicate': {'type': 'IRI', 'value': RDF_DIRECTION}, + 'object': { + 'type': 'literal', + 'value': item['@direction'], + 'datatype': XSD_STRING, + }, + } + ) + if '@language' in item: + triples.append( + { + 'subject': subject, + 'predicate': {'type': 'IRI', 'value': RDF_LANGUAGE}, + 'object': { + 'type': 'literal', + 'value': item['@language'], + 'datatype': XSD_STRING, + }, + } + ) elif rdf_direction == 'i18n-datatype' and '@direction' in item: datatype = 'https://www.w3.org/ns/i18n#{}_{}'.format( item.get('@language', ''), item['@direction'] diff --git a/tests/runtests.py b/tests/runtests.py index 1071045f..0ba5c4a1 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -993,9 +993,6 @@ def write(self, filename): 'skip': { 'specVersion': ['json-ld-1.0'], 'idRegex': [ - # direction (compound-literal) - '.*fromRdf-manifest#tdi11$', - '.*fromRdf-manifest#tdi12$', # uncategorized '.*fromRdf-manifest#t0027$', ], @@ -1040,11 +1037,7 @@ def write(self, filename): # skip tests where behavior changed for a 1.1 processor # see JSON-LD 1.0 Errata 'specVersion': ['json-ld-1.0'], - 'idRegex': [ - # node object direction - '.*toRdf-manifest#tdi11$', - '.*toRdf-manifest#tdi12$', - ], + 'idRegex': [], }, 'fn': 'to_rdf', 'params': [ diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index 2f35a063..f4fc22db 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -872,6 +872,62 @@ def test_double_and_float_values(self): result = jsonld.to_rdf(input) assert result == expected + def test_compound_literal_direction_without_language(self): + """ + Values with @direction should become compound literals during to_rdf + when rdfDirection is compound-literal. + """ + input = { + 'http://example.org/label': { + '@value': 'no language', + '@direction': 'rtl', + } + } + + expected = """_:b0 _:b1 . +_:b1 "rtl" . +_:b1 "no language" . +""" + + nquads = jsonld.to_rdf( + input, + options={ + 'format': 'application/n-quads', + 'rdfDirection': 'compound-literal', + }, + ) + + assert nquads == expected + + def test_compound_literal_direction_with_language(self): + """ + Values with @language should preserve it in compound literals during + to_rdf when rdfDirection is compound-literal. + """ + input = { + 'http://example.org/label': { + '@value': 'en-US', + '@language': 'en-US', + '@direction': 'rtl', + } + } + + expected = """_:b0 _:b1 . +_:b1 "rtl" . +_:b1 "en-us" . +_:b1 "en-US" . +""" + + nquads = jsonld.to_rdf( + input, + options={ + 'format': 'application/n-quads', + 'rdfDirection': 'compound-literal', + }, + ) + + assert nquads == expected + # Issue 204 def test_conflicting_property_names(self): """ @@ -925,6 +981,141 @@ def test_conflicting_property_names_in_nested_node(self): nquads = jsonld.to_rdf(input, options={'format': 'application/n-quads'}) assert nquads == expected +class TestFromRDF: + def test_compound_literal_direction_without_language(self): + """ + Compound literals with rdf:direction should become JSON-LD value + objects when rdfDirection is compound-literal. + """ + input = """ + _:cl1 . + _:cl1 "no language" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': 'http://example.com/a', + 'http://example.org/label': [ + {'@value': 'no language', '@direction': 'rtl'} + ], + } + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected + + def test_compound_literal_direction_with_language(self): + """ + Compound literals with rdf:language should preserve the language + when rdfDirection is compound-literal. + """ + input = """ + _:cl1 . + _:cl1 "en-US" . + _:cl1 "en-us" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': 'http://example.com/a', + 'http://example.org/label': [ + { + '@value': 'en-US', + '@language': 'en-us', + '@direction': 'rtl', + } + ], + } + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected + + def test_shared_compound_literal_blank_node_remains_node(self): + """ + Compound literal blank nodes must only be decoded once when referenced. + """ + input = """ + _:cl1 . + _:cl1 . + _:cl1 "shared" . + _:cl1 "rtl" . + """ + + expected = [ + { + '@id': '_:cl1', + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#direction': [ + {'@value': 'rtl'} + ], + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value': [ + {'@value': 'shared'} + ], + }, + { + '@id': 'http://example.com/a', + 'http://example.org/label': [{'@id': '_:cl1'}], + }, + { + '@id': 'http://example.com/b', + 'http://example.org/label': [{'@id': '_:cl1'}], + }, + ] + + result = jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert result == expected + + def test_compound_literal_invalid_direction_fails(self): + """ + Invalid rdf:direction values in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "bad" . + _:cl1 "up" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid base direction' + + def test_compound_literal_invalid_value_fails(self): + """ + Invalid rdf:value entries in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "one" . + _:cl1 "two" . + _:cl1 "rtl" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid value object' + + def test_compound_literal_invalid_language_fails(self): + """ + Invalid rdf:language values in compound literals must fail. + """ + input = """ + _:cl1 . + _:cl1 "bad lang" . + _:cl1 "bad_lang" . + _:cl1 "rtl" . + """ + + with pytest.raises(jsonld.JsonLdError) as exc: + jsonld.from_rdf(input, {'rdfDirection': 'compound-literal'}) + + assert exc.value.code == 'invalid language-tagged string' class TestCompact: # Issue 59 - PR: https://github.com/digitalbazaar/pyld/pull/60