From 53f3c164bd69fb02a1d6bd706a14b9808d5c702f Mon Sep 17 00:00:00 2001 From: Scott Black Date: Fri, 11 Nov 2022 08:46:13 -0700 Subject: [PATCH 1/5] add the base json-ld serializer --- hsmodels/serializers.py | 350 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 hsmodels/serializers.py diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py new file mode 100644 index 0000000..0be2de1 --- /dev/null +++ b/hsmodels/serializers.py @@ -0,0 +1,350 @@ +import warnings +from typing import IO, Optional + +from rdflib.graph import Graph +from rdflib.namespace import RDF, XSD +from rdflib.serializer import Serializer +from rdflib.term import BNode, Literal, URIRef + +from rdflib.plugins.shared.jsonld.context import UNDEF, Context +from rdflib.plugins.shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB +from rdflib.plugins.shared.jsonld.util import json + +__all__ = ["JsonLDSerializer", "from_rdf"] + + +PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string} + + +class JsonLDSerializer(Serializer): + def __init__(self, store: Graph): + super(JsonLDSerializer, self).__init__(store) + + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = None, + **kwargs, + ): + # TODO: docstring w. args and return value + encoding = encoding or "utf-8" + if encoding not in ("utf-8", "utf-16"): + warnings.warn( + "JSON should be encoded as unicode. " f"Given encoding was: {encoding}" + ) + + context_data = kwargs.get("context") + use_native_types = (kwargs.get("use_native_types", False),) + use_rdf_type = kwargs.get("use_rdf_type", False) + auto_compact = kwargs.get("auto_compact", False) + + indent = kwargs.get("indent", 2) + separators = kwargs.get("separators", (",", ": ")) + sort_keys = kwargs.get("sort_keys", True) + ensure_ascii = kwargs.get("ensure_ascii", False) + + obj = from_rdf( + self.store, + context_data, + base, + use_native_types, + use_rdf_type, + auto_compact=auto_compact, + ) + + data = json.dumps( + obj, + indent=indent, + separators=separators, + sort_keys=sort_keys, + ensure_ascii=ensure_ascii, + ) + + stream.write(data.encode(encoding, "replace")) + +def from_rdf( + graph, + context_data=None, + base=None, + use_native_types=False, + use_rdf_type=False, + auto_compact=False, + startnode=None, + index=False, +): + # TODO: docstring w. args and return value + # TODO: support for index and startnode + + if not context_data and auto_compact: + context_data = dict( + (pfx, str(ns)) + for (pfx, ns) in graph.namespaces() + if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace" + ) + + if isinstance(context_data, Context): + context = context_data + context_data = context.to_dict() + else: + context = Context(context_data, base=base) + + converter = Converter(context, use_native_types, use_rdf_type) + result = converter.convert(graph) + + if converter.context.active: + if isinstance(result, list): + result = {context.get_key(GRAPH): result} + result[CONTEXT] = context_data + + return result + +class Converter(object): + def __init__(self, context, use_native_types, use_rdf_type): + self.context = context + self.use_native_types = context.active or use_native_types + self.use_rdf_type = use_rdf_type + + def convert(self, graph): + # TODO: bug in rdflib dataset parsing (nquads et al): + # plain triples end up in separate unnamed graphs (rdflib issue #436) + if graph.context_aware: + default_graph = Graph() + graphs = [default_graph] + for g in graph.contexts(): + if isinstance(g.identifier, URIRef): + graphs.append(g) + else: + default_graph += g + else: + graphs = [graph] + + context = self.context + + objs = [] + for g in graphs: + obj = {} + graphname = None + + if isinstance(g.identifier, URIRef): + graphname = context.shrink_iri(g.identifier) + obj[context.id_key] = graphname + + nodes = self.from_graph(g) + + if not graphname and len(nodes) == 1: + obj.update(nodes[0]) + else: + if not nodes: + continue + obj[context.graph_key] = nodes + + if objs and objs[0].get(context.get_key(ID)) == graphname: + objs[0].update(obj) + else: + objs.append(obj) + + if len(graphs) == 1 and len(objs) == 1 and not self.context.active: + default = objs[0] + items = default.get(context.graph_key) + if len(default) == 1 and items: + objs = items + elif len(objs) == 1 and self.context.active: + objs = objs[0] + + return objs + + def from_graph(self, graph): + nodemap = {} + + for s in set(graph.subjects()): + ## only iri:s and unreferenced (rest will be promoted to top if needed) + if isinstance(s, URIRef) or ( + isinstance(s, BNode) and not any(graph.subjects(None, s)) + ): + self.process_subject(graph, s, nodemap) + + return list(nodemap.values()) + + def process_subject(self, graph, s, nodemap): + if isinstance(s, URIRef): + node_id = self.context.shrink_iri(s) + elif isinstance(s, BNode): + node_id = s.n3() + else: + node_id = None + + # used_as_object = any(graph.subjects(None, s)) + if node_id in nodemap: + return None + + node = {} + node[self.context.id_key] = node_id + nodemap[node_id] = node + + for p, o in graph.predicate_objects(s): + self.add_to_node(graph, s, p, o, node, nodemap) + + return node + + def add_to_node(self, graph, s, p, o, s_node, nodemap): + context = self.context + + if isinstance(o, Literal): + datatype = str(o.datatype) if o.datatype else None + language = o.language + term = context.find_term(str(p), datatype, language=language) + else: + containers = [LIST, None] if graph.value(o, RDF.first) else [None] + for container in containers: + for coercion in (ID, VOCAB, UNDEF): + term = context.find_term(str(p), coercion, container) + if term: + break + if term: + break + + node = None + use_set = not context.active + + if term: + p_key = term.name + + if term.type: + node = self.type_coerce(o, term.type) + elif term.language and o.language == term.language: + node = str(o) + elif context.language and (term.language is None and o.language is None): + node = str(o) + + if LIST in term.container: + node = [ + self.type_coerce(v, term.type) + or self.to_raw_value(graph, s, v, nodemap) + for v in self.to_collection(graph, o) + ] + elif LANG in term.container and language: + value = s_node.setdefault(p_key, {}) + values = value.get(language) + node = str(o) + if values or SET in term.container: + if not isinstance(values, list): + value[language] = values = [values] + values.append(node) + else: + value[language] = node + return + elif SET in term.container: + use_set = True + + else: + p_key = context.to_symbol(p) + # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? + key_term = context.terms.get(p_key) + if key_term and (key_term.type or key_term.container): + p_key = p + if not term and p == RDF.type and not self.use_rdf_type: + if isinstance(o, URIRef): + node = context.to_symbol(o) + p_key = context.type_key + + if node is None: + node = self.to_raw_value(graph, s, o, nodemap) + + value = s_node.get(p_key) + if value: + if not isinstance(value, list): + value = [value] + value.append(node) + elif use_set: + value = [node] + else: + value = node + s_node[p_key] = value + + def type_coerce(self, o, coerce_type): + if coerce_type == ID: + if isinstance(o, URIRef): + return self.context.shrink_iri(o) + elif isinstance(o, BNode): + return o.n3() + else: + return o + elif coerce_type == VOCAB and isinstance(o, URIRef): + return self.context.to_symbol(o) + elif isinstance(o, Literal) and str(o.datatype) == coerce_type: + return o + else: + return None + + def to_raw_value(self, graph, s, o, nodemap): + context = self.context + coll = self.to_collection(graph, o) + if coll is not None: + coll = [ + self.to_raw_value(graph, s, lo, nodemap) + for lo in self.to_collection(graph, o) + ] + return {context.list_key: coll} + elif isinstance(o, BNode): + embed = ( + False # TODO: self.context.active or using startnode and only one ref + ) + onode = self.process_subject(graph, o, nodemap) + if onode: + if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): + return onode + else: + nodemap[onode[context.id_key]] = onode + return {context.id_key: o.n3()} + elif isinstance(o, URIRef): + # TODO: embed if o != startnode (else reverse) + return {context.id_key: context.shrink_iri(o)} + elif isinstance(o, Literal): + # TODO: if compact + native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES + if native: + v = o.toPython() + else: + v = str(o) + if o.datatype: + if native: + if self.context.active: + return v + else: + return {context.value_key: v} + return { + context.type_key: context.to_symbol(o.datatype), + context.value_key: v, + } + elif o.language and o.language != context.language: + return {context.lang_key: o.language, context.value_key: v} + elif not context.active or context.language and not o.language: + return {context.value_key: v} + else: + return v + + def to_collection(self, graph, l_): + if l_ != RDF.nil and not graph.value(l_, RDF.first): + return None + list_nodes = [] + chain = set([l_]) + while l_: + if l_ == RDF.nil: + return list_nodes + if isinstance(l_, URIRef): + return None + first, rest = None, None + for p, o in graph.predicate_objects(l_): + if not first and p == RDF.first: + first = o + elif not rest and p == RDF.rest: + rest = o + elif p != RDF.type or o != RDF.List: + return None + list_nodes.append(first) + l_ = rest + if l_ in chain: + return None + chain.add(l_) \ No newline at end of file From d5fdfbd1b3f1cfa5bb3f6df5afea8bbe9b2c8fe4 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Fri, 11 Nov 2022 10:44:47 -0700 Subject: [PATCH 2/5] inject node consolidation into the serializer --- hsmodels/serializers.py | 346 +++++++--------------------------------- 1 file changed, 56 insertions(+), 290 deletions(-) diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py index 0be2de1..96a2495 100644 --- a/hsmodels/serializers.py +++ b/hsmodels/serializers.py @@ -2,21 +2,19 @@ from typing import IO, Optional from rdflib.graph import Graph -from rdflib.namespace import RDF, XSD +from rdflib.namespace import XSD from rdflib.serializer import Serializer -from rdflib.term import BNode, Literal, URIRef - -from rdflib.plugins.shared.jsonld.context import UNDEF, Context -from rdflib.plugins.shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB +from rdflib.plugin import register from rdflib.plugins.shared.jsonld.util import json +from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf -__all__ = ["JsonLDSerializer", "from_rdf"] +__all__ = ["PrettyJsonLDSerializer", "from_rdf"] PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string} -class JsonLDSerializer(Serializer): +class PrettyJsonLDSerializer(Serializer): def __init__(self, store: Graph): super(JsonLDSerializer, self).__init__(store) @@ -54,6 +52,9 @@ def serialize( auto_compact=auto_compact, ) + '''Here is where the compaction takes place!''' + distribute_nodes(obj) + data = json.dumps( obj, indent=indent, @@ -64,287 +65,52 @@ def serialize( stream.write(data.encode(encoding, "replace")) -def from_rdf( - graph, - context_data=None, - base=None, - use_native_types=False, - use_rdf_type=False, - auto_compact=False, - startnode=None, - index=False, -): - # TODO: docstring w. args and return value - # TODO: support for index and startnode - - if not context_data and auto_compact: - context_data = dict( - (pfx, str(ns)) - for (pfx, ns) in graph.namespaces() - if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace" - ) - - if isinstance(context_data, Context): - context = context_data - context_data = context.to_dict() - else: - context = Context(context_data, base=base) - - converter = Converter(context, use_native_types, use_rdf_type) - result = converter.convert(graph) - - if converter.context.active: - if isinstance(result, list): - result = {context.get_key(GRAPH): result} - result[CONTEXT] = context_data - - return result - -class Converter(object): - def __init__(self, context, use_native_types, use_rdf_type): - self.context = context - self.use_native_types = context.active or use_native_types - self.use_rdf_type = use_rdf_type - - def convert(self, graph): - # TODO: bug in rdflib dataset parsing (nquads et al): - # plain triples end up in separate unnamed graphs (rdflib issue #436) - if graph.context_aware: - default_graph = Graph() - graphs = [default_graph] - for g in graph.contexts(): - if isinstance(g.identifier, URIRef): - graphs.append(g) - else: - default_graph += g - else: - graphs = [graph] - - context = self.context - - objs = [] - for g in graphs: - obj = {} - graphname = None - - if isinstance(g.identifier, URIRef): - graphname = context.shrink_iri(g.identifier) - obj[context.id_key] = graphname - - nodes = self.from_graph(g) - - if not graphname and len(nodes) == 1: - obj.update(nodes[0]) - else: - if not nodes: - continue - obj[context.graph_key] = nodes - - if objs and objs[0].get(context.get_key(ID)) == graphname: - objs[0].update(obj) - else: - objs.append(obj) - - if len(graphs) == 1 and len(objs) == 1 and not self.context.active: - default = objs[0] - items = default.get(context.graph_key) - if len(default) == 1 and items: - objs = items - elif len(objs) == 1 and self.context.active: - objs = objs[0] - - return objs - - def from_graph(self, graph): - nodemap = {} - - for s in set(graph.subjects()): - ## only iri:s and unreferenced (rest will be promoted to top if needed) - if isinstance(s, URIRef) or ( - isinstance(s, BNode) and not any(graph.subjects(None, s)) - ): - self.process_subject(graph, s, nodemap) - - return list(nodemap.values()) - - def process_subject(self, graph, s, nodemap): - if isinstance(s, URIRef): - node_id = self.context.shrink_iri(s) - elif isinstance(s, BNode): - node_id = s.n3() - else: - node_id = None - - # used_as_object = any(graph.subjects(None, s)) - if node_id in nodemap: - return None - - node = {} - node[self.context.id_key] = node_id - nodemap[node_id] = node - - for p, o in graph.predicate_objects(s): - self.add_to_node(graph, s, p, o, node, nodemap) - - return node - - def add_to_node(self, graph, s, p, o, s_node, nodemap): - context = self.context - - if isinstance(o, Literal): - datatype = str(o.datatype) if o.datatype else None - language = o.language - term = context.find_term(str(p), datatype, language=language) - else: - containers = [LIST, None] if graph.value(o, RDF.first) else [None] - for container in containers: - for coercion in (ID, VOCAB, UNDEF): - term = context.find_term(str(p), coercion, container) - if term: - break - if term: - break - - node = None - use_set = not context.active - - if term: - p_key = term.name - - if term.type: - node = self.type_coerce(o, term.type) - elif term.language and o.language == term.language: - node = str(o) - elif context.language and (term.language is None and o.language is None): - node = str(o) - - if LIST in term.container: - node = [ - self.type_coerce(v, term.type) - or self.to_raw_value(graph, s, v, nodemap) - for v in self.to_collection(graph, o) - ] - elif LANG in term.container and language: - value = s_node.setdefault(p_key, {}) - values = value.get(language) - node = str(o) - if values or SET in term.container: - if not isinstance(values, list): - value[language] = values = [values] - values.append(node) - else: - value[language] = node - return - elif SET in term.container: - use_set = True - - else: - p_key = context.to_symbol(p) - # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term? - key_term = context.terms.get(p_key) - if key_term and (key_term.type or key_term.container): - p_key = p - if not term and p == RDF.type and not self.use_rdf_type: - if isinstance(o, URIRef): - node = context.to_symbol(o) - p_key = context.type_key - - if node is None: - node = self.to_raw_value(graph, s, o, nodemap) - - value = s_node.get(p_key) - if value: - if not isinstance(value, list): - value = [value] - value.append(node) - elif use_set: - value = [node] - else: - value = node - s_node[p_key] = value - - def type_coerce(self, o, coerce_type): - if coerce_type == ID: - if isinstance(o, URIRef): - return self.context.shrink_iri(o) - elif isinstance(o, BNode): - return o.n3() - else: - return o - elif coerce_type == VOCAB and isinstance(o, URIRef): - return self.context.to_symbol(o) - elif isinstance(o, Literal) and str(o.datatype) == coerce_type: - return o - else: - return None - - def to_raw_value(self, graph, s, o, nodemap): - context = self.context - coll = self.to_collection(graph, o) - if coll is not None: - coll = [ - self.to_raw_value(graph, s, lo, nodemap) - for lo in self.to_collection(graph, o) - ] - return {context.list_key: coll} - elif isinstance(o, BNode): - embed = ( - False # TODO: self.context.active or using startnode and only one ref - ) - onode = self.process_subject(graph, o, nodemap) - if onode: - if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s): - return onode - else: - nodemap[onode[context.id_key]] = onode - return {context.id_key: o.n3()} - elif isinstance(o, URIRef): - # TODO: embed if o != startnode (else reverse) - return {context.id_key: context.shrink_iri(o)} - elif isinstance(o, Literal): - # TODO: if compact - native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES - if native: - v = o.toPython() - else: - v = str(o) - if o.datatype: - if native: - if self.context.active: - return v - else: - return {context.value_key: v} - return { - context.type_key: context.to_symbol(o.datatype), - context.value_key: v, - } - elif o.language and o.language != context.language: - return {context.lang_key: o.language, context.value_key: v} - elif not context.active or context.language and not o.language: - return {context.value_key: v} - else: - return v - def to_collection(self, graph, l_): - if l_ != RDF.nil and not graph.value(l_, RDF.first): - return None - list_nodes = [] - chain = set([l_]) - while l_: - if l_ == RDF.nil: - return list_nodes - if isinstance(l_, URIRef): - return None - first, rest = None, None - for p, o in graph.predicate_objects(l_): - if not first and p == RDF.first: - first = o - elif not rest and p == RDF.rest: - rest = o - elif p != RDF.type or o != RDF.List: - return None - list_nodes.append(first) - l_ = rest - if l_ in chain: - return None - chain.add(l_) \ No newline at end of file +def distribute_nodes(jld): + # group nodes to be distributed into roots + # nodes are identified by a dictionary with {'@id': "_:N..."} + nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")} + roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")] + + # code for walking dictionaries and lists to replace node identifiers with the nodes + def is_node_id(d) -> bool: + if isinstance(d, dict): + if "@id" in d and d["@id"].startswith("_:N"): + return True + return False + + def get_node(d: dict): + return nodes_by_id[d["@id"]] + + def parse_list(l: list): + nodes = [] + for item in l: + if is_node_id(item): + nodes.append((item, get_node(item))) + if isinstance(item, list): + parse_list(item) + if isinstance(item, dict): + parse_dict(item) + for node in nodes: + l.remove(node[0]) + l.append(node[1]) + + def parse_dict(d: dict): + nodes = [] + for key, value in d.items(): + if is_node_id(value): + nodes.append((key, get_node(value))) + if isinstance(value, list): + parse_list(value) + if isinstance(value, dict): + parse_dict(value) + for node in nodes: + d[node[0]] = node[1] + # run the node replacements for each root + for d in roots: + parse_dict(d) + + +register( + 'json-ld-pretty', Serializer, + 'hsmodels.serializers', 'PrettyJsonLDSerializer') \ No newline at end of file From f5d62db2cd4546399964cb26f7a942ec4b25809c Mon Sep 17 00:00:00 2001 From: Scott Black Date: Fri, 11 Nov 2022 11:05:50 -0700 Subject: [PATCH 3/5] get jsonld working with node insertion --- hsmodels/__init__.py | 7 +++++++ hsmodels/schemas/__init__.py | 2 +- hsmodels/serializers.py | 14 ++++---------- requirements.txt | 2 +- setup.py | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/hsmodels/__init__.py b/hsmodels/__init__.py index e69de29..c385230 100644 --- a/hsmodels/__init__.py +++ b/hsmodels/__init__.py @@ -0,0 +1,7 @@ +from rdflib.serializer import Serializer +from rdflib.plugin import register + + +register( + 'json-ld-pretty', Serializer, + 'hsmodels.serializers', 'PrettyJsonLDSerializer') \ No newline at end of file diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py index f1b7c05..1ab5093 100644 --- a/hsmodels/schemas/__init__.py +++ b/hsmodels/schemas/__init__.py @@ -89,7 +89,7 @@ def rdf_graph(schema): def rdf_string(schema, rdf_format='pretty-xml'): - return rdf_graph(schema).serialize(format=rdf_format).decode() + return rdf_graph(schema).serialize(format=rdf_format) def _rdf_fields(schema): diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py index 96a2495..426f519 100644 --- a/hsmodels/serializers.py +++ b/hsmodels/serializers.py @@ -3,8 +3,6 @@ from rdflib.graph import Graph from rdflib.namespace import XSD -from rdflib.serializer import Serializer -from rdflib.plugin import register from rdflib.plugins.shared.jsonld.util import json from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf @@ -14,10 +12,9 @@ PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string} -class PrettyJsonLDSerializer(Serializer): +class PrettyJsonLDSerializer(JsonLDSerializer): def __init__(self, store: Graph): - super(JsonLDSerializer, self).__init__(store) - + super(PrettyJsonLDSerializer, self).__init__(store) def serialize( self, @@ -69,8 +66,8 @@ def serialize( def distribute_nodes(jld): # group nodes to be distributed into roots # nodes are identified by a dictionary with {'@id': "_:N..."} - nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")} - roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")] + nodes_by_id = {d.pop('@id'): d for d in jld if d['@id'].startswith("_:N")} + roots = [d for d in jld if '@id' in d and not d['@id'].startswith("_:N")] # code for walking dictionaries and lists to replace node identifiers with the nodes def is_node_id(d) -> bool: @@ -111,6 +108,3 @@ def parse_dict(d: dict): parse_dict(d) -register( - 'json-ld-pretty', Serializer, - 'hsmodels.serializers', 'PrettyJsonLDSerializer') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ec1d916..b88db18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -rdflib<6.0.0 +rdflib>=6.0.0 pydantic>=1.8.1,<2.0 email-validator jsonschema2md diff --git a/setup.py b/setup.py index 92807da..f72f7eb 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ packages=find_packages(include=['hsmodels', 'hsmodels.*', 'hsmodels.schemas.*', 'hsmodels.schemas.rdf.*'], exclude=("tests",)), install_requires=[ - 'rdflib<6.0.0', + 'rdflib>=6.0.0', 'pydantic>=1.8.1,<2.0', 'email-validator' ], From ce601a9a0d776e134e86a933227d87c9a0938586 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Fri, 11 Nov 2022 11:10:28 -0700 Subject: [PATCH 4/5] use jsonld auto compact to key contexts --- hsmodels/schemas/__init__.py | 2 +- hsmodels/serializers.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py index 1ab5093..6bddde4 100644 --- a/hsmodels/schemas/__init__.py +++ b/hsmodels/schemas/__init__.py @@ -89,7 +89,7 @@ def rdf_graph(schema): def rdf_string(schema, rdf_format='pretty-xml'): - return rdf_graph(schema).serialize(format=rdf_format) + return rdf_graph(schema).serialize(format=rdf_format, auto_compact=True) def _rdf_fields(schema): diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py index 426f519..e4f7de0 100644 --- a/hsmodels/serializers.py +++ b/hsmodels/serializers.py @@ -66,8 +66,8 @@ def serialize( def distribute_nodes(jld): # group nodes to be distributed into roots # nodes are identified by a dictionary with {'@id': "_:N..."} - nodes_by_id = {d.pop('@id'): d for d in jld if d['@id'].startswith("_:N")} - roots = [d for d in jld if '@id' in d and not d['@id'].startswith("_:N")] + nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")} + roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")] # code for walking dictionaries and lists to replace node identifiers with the nodes def is_node_id(d) -> bool: From 4e59956b3a388f5af1d488349d81024928844b59 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Fri, 11 Nov 2022 11:19:34 -0700 Subject: [PATCH 5/5] set contexts, we won't keep this commit --- hsmodels/schemas/__init__.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py index 6bddde4..2438fa8 100644 --- a/hsmodels/schemas/__init__.py +++ b/hsmodels/schemas/__init__.py @@ -4,7 +4,8 @@ from pydantic import AnyUrl, BaseModel from rdflib import Graph, Literal, URIRef -from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1 +from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1, HSRESOURCE, DCTERMS, SCHEMA, XML, RDFS, CITOTERMS, XSD, \ + SH, FOAF, DASH, HSUSER from hsmodels.schemas.aggregations import ( FileSetMetadata, GeographicFeatureMetadata, @@ -82,10 +83,27 @@ def parse_file(schema, file, file_format='xml', subject=None): def rdf_graph(schema): + g = Graph() + g.bind('hsresource', HSRESOURCE) + g.bind('dcterms', DCTERMS) + g.bind('rdfs1', RDFS1) + g.bind('schema', SCHEMA) + g.bind('hsterms', HSTERMS) + g.bind('xml', XML) + g.bind('rdfs', RDFS) + g.bind('dc', DC) + g.bind('citoterms', CITOTERMS) + g.bind('xsd', XSD) + g.bind('sh', SH) + g.bind('rdf', RDF) + g.bind('foaf', FOAF) + g.bind('dash', DASH) + g.bind('ORE', ORE) + g.bind('hsuser', HSUSER) for rdf_schema, user_schema in user_schemas.items(): if isinstance(schema, user_schema): - return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), Graph()) - return _rdf_graph(schema, Graph()) + return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), g) + return _rdf_graph(schema, g) def rdf_string(schema, rdf_format='pretty-xml'):