From 53f3c164bd69fb02a1d6bd706a14b9808d5c702f Mon Sep 17 00:00:00 2001
From: Scott Black <sblack@cuahsi.org>
Date: Fri, 11 Nov 2022 08:46:13 -0700
Subject: [PATCH 1/5] add the base json-ld serializer

---
 hsmodels/serializers.py | 350 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 350 insertions(+)
 create mode 100644 hsmodels/serializers.py

diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py
new file mode 100644
index 0000000..0be2de1
--- /dev/null
+++ b/hsmodels/serializers.py
@@ -0,0 +1,350 @@
+import warnings
+from typing import IO, Optional
+
+from rdflib.graph import Graph
+from rdflib.namespace import RDF, XSD
+from rdflib.serializer import Serializer
+from rdflib.term import BNode, Literal, URIRef
+
+from rdflib.plugins.shared.jsonld.context import UNDEF, Context
+from rdflib.plugins.shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB
+from rdflib.plugins.shared.jsonld.util import json
+
+__all__ = ["JsonLDSerializer", "from_rdf"]
+
+
+PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
+
+
+class JsonLDSerializer(Serializer):
+    def __init__(self, store: Graph):
+        super(JsonLDSerializer, self).__init__(store)
+
+
+    def serialize(
+        self,
+        stream: IO[bytes],
+        base: Optional[str] = None,
+        encoding: Optional[str] = None,
+        **kwargs,
+    ):
+        # TODO: docstring w. args and return value
+        encoding = encoding or "utf-8"
+        if encoding not in ("utf-8", "utf-16"):
+            warnings.warn(
+                "JSON should be encoded as unicode. " f"Given encoding was: {encoding}"
+            )
+
+        context_data = kwargs.get("context")
+        use_native_types = (kwargs.get("use_native_types", False),)
+        use_rdf_type = kwargs.get("use_rdf_type", False)
+        auto_compact = kwargs.get("auto_compact", False)
+
+        indent = kwargs.get("indent", 2)
+        separators = kwargs.get("separators", (",", ": "))
+        sort_keys = kwargs.get("sort_keys", True)
+        ensure_ascii = kwargs.get("ensure_ascii", False)
+
+        obj = from_rdf(
+            self.store,
+            context_data,
+            base,
+            use_native_types,
+            use_rdf_type,
+            auto_compact=auto_compact,
+        )
+
+        data = json.dumps(
+            obj,
+            indent=indent,
+            separators=separators,
+            sort_keys=sort_keys,
+            ensure_ascii=ensure_ascii,
+        )
+
+        stream.write(data.encode(encoding, "replace"))
+
+def from_rdf(
+    graph,
+    context_data=None,
+    base=None,
+    use_native_types=False,
+    use_rdf_type=False,
+    auto_compact=False,
+    startnode=None,
+    index=False,
+):
+    # TODO: docstring w. args and return value
+    # TODO: support for index and startnode
+
+    if not context_data and auto_compact:
+        context_data = dict(
+            (pfx, str(ns))
+            for (pfx, ns) in graph.namespaces()
+            if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace"
+        )
+
+    if isinstance(context_data, Context):
+        context = context_data
+        context_data = context.to_dict()
+    else:
+        context = Context(context_data, base=base)
+
+    converter = Converter(context, use_native_types, use_rdf_type)
+    result = converter.convert(graph)
+
+    if converter.context.active:
+        if isinstance(result, list):
+            result = {context.get_key(GRAPH): result}
+        result[CONTEXT] = context_data
+
+    return result
+
+class Converter(object):
+    def __init__(self, context, use_native_types, use_rdf_type):
+        self.context = context
+        self.use_native_types = context.active or use_native_types
+        self.use_rdf_type = use_rdf_type
+
+    def convert(self, graph):
+        # TODO: bug in rdflib dataset parsing (nquads et al):
+        # plain triples end up in separate unnamed graphs (rdflib issue #436)
+        if graph.context_aware:
+            default_graph = Graph()
+            graphs = [default_graph]
+            for g in graph.contexts():
+                if isinstance(g.identifier, URIRef):
+                    graphs.append(g)
+                else:
+                    default_graph += g
+        else:
+            graphs = [graph]
+
+        context = self.context
+
+        objs = []
+        for g in graphs:
+            obj = {}
+            graphname = None
+
+            if isinstance(g.identifier, URIRef):
+                graphname = context.shrink_iri(g.identifier)
+                obj[context.id_key] = graphname
+
+            nodes = self.from_graph(g)
+
+            if not graphname and len(nodes) == 1:
+                obj.update(nodes[0])
+            else:
+                if not nodes:
+                    continue
+                obj[context.graph_key] = nodes
+
+            if objs and objs[0].get(context.get_key(ID)) == graphname:
+                objs[0].update(obj)
+            else:
+                objs.append(obj)
+
+        if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
+            default = objs[0]
+            items = default.get(context.graph_key)
+            if len(default) == 1 and items:
+                objs = items
+        elif len(objs) == 1 and self.context.active:
+            objs = objs[0]
+
+        return objs
+
+    def from_graph(self, graph):
+        nodemap = {}
+
+        for s in set(graph.subjects()):
+            ## only iri:s and unreferenced (rest will be promoted to top if needed)
+            if isinstance(s, URIRef) or (
+                isinstance(s, BNode) and not any(graph.subjects(None, s))
+            ):
+                self.process_subject(graph, s, nodemap)
+
+        return list(nodemap.values())
+
+    def process_subject(self, graph, s, nodemap):
+        if isinstance(s, URIRef):
+            node_id = self.context.shrink_iri(s)
+        elif isinstance(s, BNode):
+            node_id = s.n3()
+        else:
+            node_id = None
+
+        # used_as_object = any(graph.subjects(None, s))
+        if node_id in nodemap:
+            return None
+
+        node = {}
+        node[self.context.id_key] = node_id
+        nodemap[node_id] = node
+
+        for p, o in graph.predicate_objects(s):
+            self.add_to_node(graph, s, p, o, node, nodemap)
+
+        return node
+
+    def add_to_node(self, graph, s, p, o, s_node, nodemap):
+        context = self.context
+
+        if isinstance(o, Literal):
+            datatype = str(o.datatype) if o.datatype else None
+            language = o.language
+            term = context.find_term(str(p), datatype, language=language)
+        else:
+            containers = [LIST, None] if graph.value(o, RDF.first) else [None]
+            for container in containers:
+                for coercion in (ID, VOCAB, UNDEF):
+                    term = context.find_term(str(p), coercion, container)
+                    if term:
+                        break
+                if term:
+                    break
+
+        node = None
+        use_set = not context.active
+
+        if term:
+            p_key = term.name
+
+            if term.type:
+                node = self.type_coerce(o, term.type)
+            elif term.language and o.language == term.language:
+                node = str(o)
+            elif context.language and (term.language is None and o.language is None):
+                node = str(o)
+
+            if LIST in term.container:
+                node = [
+                    self.type_coerce(v, term.type)
+                    or self.to_raw_value(graph, s, v, nodemap)
+                    for v in self.to_collection(graph, o)
+                ]
+            elif LANG in term.container and language:
+                value = s_node.setdefault(p_key, {})
+                values = value.get(language)
+                node = str(o)
+                if values or SET in term.container:
+                    if not isinstance(values, list):
+                        value[language] = values = [values]
+                    values.append(node)
+                else:
+                    value[language] = node
+                return
+            elif SET in term.container:
+                use_set = True
+
+        else:
+            p_key = context.to_symbol(p)
+            # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
+            key_term = context.terms.get(p_key)
+            if key_term and (key_term.type or key_term.container):
+                p_key = p
+            if not term and p == RDF.type and not self.use_rdf_type:
+                if isinstance(o, URIRef):
+                    node = context.to_symbol(o)
+                p_key = context.type_key
+
+        if node is None:
+            node = self.to_raw_value(graph, s, o, nodemap)
+
+        value = s_node.get(p_key)
+        if value:
+            if not isinstance(value, list):
+                value = [value]
+            value.append(node)
+        elif use_set:
+            value = [node]
+        else:
+            value = node
+        s_node[p_key] = value
+
+    def type_coerce(self, o, coerce_type):
+        if coerce_type == ID:
+            if isinstance(o, URIRef):
+                return self.context.shrink_iri(o)
+            elif isinstance(o, BNode):
+                return o.n3()
+            else:
+                return o
+        elif coerce_type == VOCAB and isinstance(o, URIRef):
+            return self.context.to_symbol(o)
+        elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
+            return o
+        else:
+            return None
+
+    def to_raw_value(self, graph, s, o, nodemap):
+        context = self.context
+        coll = self.to_collection(graph, o)
+        if coll is not None:
+            coll = [
+                self.to_raw_value(graph, s, lo, nodemap)
+                for lo in self.to_collection(graph, o)
+            ]
+            return {context.list_key: coll}
+        elif isinstance(o, BNode):
+            embed = (
+                False  # TODO: self.context.active or using startnode and only one ref
+            )
+            onode = self.process_subject(graph, o, nodemap)
+            if onode:
+                if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
+                    return onode
+                else:
+                    nodemap[onode[context.id_key]] = onode
+            return {context.id_key: o.n3()}
+        elif isinstance(o, URIRef):
+            # TODO: embed if o != startnode (else reverse)
+            return {context.id_key: context.shrink_iri(o)}
+        elif isinstance(o, Literal):
+            # TODO: if compact
+            native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
+            if native:
+                v = o.toPython()
+            else:
+                v = str(o)
+            if o.datatype:
+                if native:
+                    if self.context.active:
+                        return v
+                    else:
+                        return {context.value_key: v}
+                return {
+                    context.type_key: context.to_symbol(o.datatype),
+                    context.value_key: v,
+                }
+            elif o.language and o.language != context.language:
+                return {context.lang_key: o.language, context.value_key: v}
+            elif not context.active or context.language and not o.language:
+                return {context.value_key: v}
+            else:
+                return v
+
+    def to_collection(self, graph, l_):
+        if l_ != RDF.nil and not graph.value(l_, RDF.first):
+            return None
+        list_nodes = []
+        chain = set([l_])
+        while l_:
+            if l_ == RDF.nil:
+                return list_nodes
+            if isinstance(l_, URIRef):
+                return None
+            first, rest = None, None
+            for p, o in graph.predicate_objects(l_):
+                if not first and p == RDF.first:
+                    first = o
+                elif not rest and p == RDF.rest:
+                    rest = o
+                elif p != RDF.type or o != RDF.List:
+                    return None
+            list_nodes.append(first)
+            l_ = rest
+            if l_ in chain:
+                return None
+            chain.add(l_)
\ No newline at end of file

From d5fdfbd1b3f1cfa5bb3f6df5afea8bbe9b2c8fe4 Mon Sep 17 00:00:00 2001
From: Scott Black <sblack@cuahsi.org>
Date: Fri, 11 Nov 2022 10:44:47 -0700
Subject: [PATCH 2/5] inject node consolidation into the serializer

---
 hsmodels/serializers.py | 346 +++++++---------------------------------
 1 file changed, 56 insertions(+), 290 deletions(-)

diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py
index 0be2de1..96a2495 100644
--- a/hsmodels/serializers.py
+++ b/hsmodels/serializers.py
@@ -2,21 +2,19 @@
 from typing import IO, Optional
 
 from rdflib.graph import Graph
-from rdflib.namespace import RDF, XSD
+from rdflib.namespace import XSD
 from rdflib.serializer import Serializer
-from rdflib.term import BNode, Literal, URIRef
-
-from rdflib.plugins.shared.jsonld.context import UNDEF, Context
-from rdflib.plugins.shared.jsonld.keys import CONTEXT, GRAPH, ID, LANG, LIST, SET, VOCAB
+from rdflib.plugin import register
 from rdflib.plugins.shared.jsonld.util import json
+from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf
 
-__all__ = ["JsonLDSerializer", "from_rdf"]
+__all__ = ["PrettyJsonLDSerializer", "from_rdf"]
 
 
 PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
 
 
-class JsonLDSerializer(Serializer):
+class PrettyJsonLDSerializer(Serializer):
     def __init__(self, store: Graph):
         super(JsonLDSerializer, self).__init__(store)
 
@@ -54,6 +52,9 @@ def serialize(
             auto_compact=auto_compact,
         )
 
+        '''Here is where the compaction takes place!'''
+        distribute_nodes(obj)
+
         data = json.dumps(
             obj,
             indent=indent,
@@ -64,287 +65,52 @@ def serialize(
 
         stream.write(data.encode(encoding, "replace"))
 
-def from_rdf(
-    graph,
-    context_data=None,
-    base=None,
-    use_native_types=False,
-    use_rdf_type=False,
-    auto_compact=False,
-    startnode=None,
-    index=False,
-):
-    # TODO: docstring w. args and return value
-    # TODO: support for index and startnode
-
-    if not context_data and auto_compact:
-        context_data = dict(
-            (pfx, str(ns))
-            for (pfx, ns) in graph.namespaces()
-            if pfx and str(ns) != "http://www.w3.org/XML/1998/namespace"
-        )
-
-    if isinstance(context_data, Context):
-        context = context_data
-        context_data = context.to_dict()
-    else:
-        context = Context(context_data, base=base)
-
-    converter = Converter(context, use_native_types, use_rdf_type)
-    result = converter.convert(graph)
-
-    if converter.context.active:
-        if isinstance(result, list):
-            result = {context.get_key(GRAPH): result}
-        result[CONTEXT] = context_data
-
-    return result
-
-class Converter(object):
-    def __init__(self, context, use_native_types, use_rdf_type):
-        self.context = context
-        self.use_native_types = context.active or use_native_types
-        self.use_rdf_type = use_rdf_type
-
-    def convert(self, graph):
-        # TODO: bug in rdflib dataset parsing (nquads et al):
-        # plain triples end up in separate unnamed graphs (rdflib issue #436)
-        if graph.context_aware:
-            default_graph = Graph()
-            graphs = [default_graph]
-            for g in graph.contexts():
-                if isinstance(g.identifier, URIRef):
-                    graphs.append(g)
-                else:
-                    default_graph += g
-        else:
-            graphs = [graph]
-
-        context = self.context
-
-        objs = []
-        for g in graphs:
-            obj = {}
-            graphname = None
-
-            if isinstance(g.identifier, URIRef):
-                graphname = context.shrink_iri(g.identifier)
-                obj[context.id_key] = graphname
-
-            nodes = self.from_graph(g)
-
-            if not graphname and len(nodes) == 1:
-                obj.update(nodes[0])
-            else:
-                if not nodes:
-                    continue
-                obj[context.graph_key] = nodes
-
-            if objs and objs[0].get(context.get_key(ID)) == graphname:
-                objs[0].update(obj)
-            else:
-                objs.append(obj)
-
-        if len(graphs) == 1 and len(objs) == 1 and not self.context.active:
-            default = objs[0]
-            items = default.get(context.graph_key)
-            if len(default) == 1 and items:
-                objs = items
-        elif len(objs) == 1 and self.context.active:
-            objs = objs[0]
-
-        return objs
-
-    def from_graph(self, graph):
-        nodemap = {}
-
-        for s in set(graph.subjects()):
-            ## only iri:s and unreferenced (rest will be promoted to top if needed)
-            if isinstance(s, URIRef) or (
-                isinstance(s, BNode) and not any(graph.subjects(None, s))
-            ):
-                self.process_subject(graph, s, nodemap)
-
-        return list(nodemap.values())
-
-    def process_subject(self, graph, s, nodemap):
-        if isinstance(s, URIRef):
-            node_id = self.context.shrink_iri(s)
-        elif isinstance(s, BNode):
-            node_id = s.n3()
-        else:
-            node_id = None
-
-        # used_as_object = any(graph.subjects(None, s))
-        if node_id in nodemap:
-            return None
-
-        node = {}
-        node[self.context.id_key] = node_id
-        nodemap[node_id] = node
-
-        for p, o in graph.predicate_objects(s):
-            self.add_to_node(graph, s, p, o, node, nodemap)
-
-        return node
-
-    def add_to_node(self, graph, s, p, o, s_node, nodemap):
-        context = self.context
-
-        if isinstance(o, Literal):
-            datatype = str(o.datatype) if o.datatype else None
-            language = o.language
-            term = context.find_term(str(p), datatype, language=language)
-        else:
-            containers = [LIST, None] if graph.value(o, RDF.first) else [None]
-            for container in containers:
-                for coercion in (ID, VOCAB, UNDEF):
-                    term = context.find_term(str(p), coercion, container)
-                    if term:
-                        break
-                if term:
-                    break
-
-        node = None
-        use_set = not context.active
-
-        if term:
-            p_key = term.name
-
-            if term.type:
-                node = self.type_coerce(o, term.type)
-            elif term.language and o.language == term.language:
-                node = str(o)
-            elif context.language and (term.language is None and o.language is None):
-                node = str(o)
-
-            if LIST in term.container:
-                node = [
-                    self.type_coerce(v, term.type)
-                    or self.to_raw_value(graph, s, v, nodemap)
-                    for v in self.to_collection(graph, o)
-                ]
-            elif LANG in term.container and language:
-                value = s_node.setdefault(p_key, {})
-                values = value.get(language)
-                node = str(o)
-                if values or SET in term.container:
-                    if not isinstance(values, list):
-                        value[language] = values = [values]
-                    values.append(node)
-                else:
-                    value[language] = node
-                return
-            elif SET in term.container:
-                use_set = True
-
-        else:
-            p_key = context.to_symbol(p)
-            # TODO: for coercing curies - quite clumsy; unify to_symbol and find_term?
-            key_term = context.terms.get(p_key)
-            if key_term and (key_term.type or key_term.container):
-                p_key = p
-            if not term and p == RDF.type and not self.use_rdf_type:
-                if isinstance(o, URIRef):
-                    node = context.to_symbol(o)
-                p_key = context.type_key
-
-        if node is None:
-            node = self.to_raw_value(graph, s, o, nodemap)
-
-        value = s_node.get(p_key)
-        if value:
-            if not isinstance(value, list):
-                value = [value]
-            value.append(node)
-        elif use_set:
-            value = [node]
-        else:
-            value = node
-        s_node[p_key] = value
-
-    def type_coerce(self, o, coerce_type):
-        if coerce_type == ID:
-            if isinstance(o, URIRef):
-                return self.context.shrink_iri(o)
-            elif isinstance(o, BNode):
-                return o.n3()
-            else:
-                return o
-        elif coerce_type == VOCAB and isinstance(o, URIRef):
-            return self.context.to_symbol(o)
-        elif isinstance(o, Literal) and str(o.datatype) == coerce_type:
-            return o
-        else:
-            return None
-
-    def to_raw_value(self, graph, s, o, nodemap):
-        context = self.context
-        coll = self.to_collection(graph, o)
-        if coll is not None:
-            coll = [
-                self.to_raw_value(graph, s, lo, nodemap)
-                for lo in self.to_collection(graph, o)
-            ]
-            return {context.list_key: coll}
-        elif isinstance(o, BNode):
-            embed = (
-                False  # TODO: self.context.active or using startnode and only one ref
-            )
-            onode = self.process_subject(graph, o, nodemap)
-            if onode:
-                if embed and not any(s2 for s2 in graph.subjects(None, o) if s2 != s):
-                    return onode
-                else:
-                    nodemap[onode[context.id_key]] = onode
-            return {context.id_key: o.n3()}
-        elif isinstance(o, URIRef):
-            # TODO: embed if o != startnode (else reverse)
-            return {context.id_key: context.shrink_iri(o)}
-        elif isinstance(o, Literal):
-            # TODO: if compact
-            native = self.use_native_types and o.datatype in PLAIN_LITERAL_TYPES
-            if native:
-                v = o.toPython()
-            else:
-                v = str(o)
-            if o.datatype:
-                if native:
-                    if self.context.active:
-                        return v
-                    else:
-                        return {context.value_key: v}
-                return {
-                    context.type_key: context.to_symbol(o.datatype),
-                    context.value_key: v,
-                }
-            elif o.language and o.language != context.language:
-                return {context.lang_key: o.language, context.value_key: v}
-            elif not context.active or context.language and not o.language:
-                return {context.value_key: v}
-            else:
-                return v
 
-    def to_collection(self, graph, l_):
-        if l_ != RDF.nil and not graph.value(l_, RDF.first):
-            return None
-        list_nodes = []
-        chain = set([l_])
-        while l_:
-            if l_ == RDF.nil:
-                return list_nodes
-            if isinstance(l_, URIRef):
-                return None
-            first, rest = None, None
-            for p, o in graph.predicate_objects(l_):
-                if not first and p == RDF.first:
-                    first = o
-                elif not rest and p == RDF.rest:
-                    rest = o
-                elif p != RDF.type or o != RDF.List:
-                    return None
-            list_nodes.append(first)
-            l_ = rest
-            if l_ in chain:
-                return None
-            chain.add(l_)
\ No newline at end of file
+def distribute_nodes(jld):
+    # group nodes to be distributed into roots
+    # nodes are identified by a dictionary with {'@id': "_:N..."}
+    nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")}
+    roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")]
+
+    # code for walking dictionaries and lists to replace node identifiers with the nodes
+    def is_node_id(d) -> bool:
+        if isinstance(d, dict):
+            if "@id" in d and d["@id"].startswith("_:N"):
+                return True
+        return False
+
+    def get_node(d: dict):
+        return nodes_by_id[d["@id"]]
+
+    def parse_list(l: list):
+        nodes = []
+        for item in l:
+            if is_node_id(item):
+                nodes.append((item, get_node(item)))
+            if isinstance(item, list):
+                parse_list(item)
+            if isinstance(item, dict):
+                parse_dict(item)
+        for node in nodes:
+            l.remove(node[0])
+            l.append(node[1])
+
+    def parse_dict(d: dict):
+        nodes = []
+        for key, value in d.items():
+            if is_node_id(value):
+                nodes.append((key, get_node(value)))
+            if isinstance(value, list):
+                parse_list(value)
+            if isinstance(value, dict):
+                parse_dict(value)
+        for node in nodes:
+            d[node[0]] = node[1]
+    # run the node replacements for each root
+    for d in roots:
+        parse_dict(d)
+
+
+register(
+    'json-ld-pretty', Serializer,
+    'hsmodels.serializers', 'PrettyJsonLDSerializer')
\ No newline at end of file

From f5d62db2cd4546399964cb26f7a942ec4b25809c Mon Sep 17 00:00:00 2001
From: Scott Black <sblack@cuahsi.org>
Date: Fri, 11 Nov 2022 11:05:50 -0700
Subject: [PATCH 3/5] get jsonld working with node insertion

---
 hsmodels/__init__.py         |  7 +++++++
 hsmodels/schemas/__init__.py |  2 +-
 hsmodels/serializers.py      | 14 ++++----------
 requirements.txt             |  2 +-
 setup.py                     |  2 +-
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/hsmodels/__init__.py b/hsmodels/__init__.py
index e69de29..c385230 100644
--- a/hsmodels/__init__.py
+++ b/hsmodels/__init__.py
@@ -0,0 +1,7 @@
+from rdflib.serializer import Serializer
+from rdflib.plugin import register
+
+
+register(
+    'json-ld-pretty', Serializer,
+    'hsmodels.serializers', 'PrettyJsonLDSerializer')
\ No newline at end of file
diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py
index f1b7c05..1ab5093 100644
--- a/hsmodels/schemas/__init__.py
+++ b/hsmodels/schemas/__init__.py
@@ -89,7 +89,7 @@ def rdf_graph(schema):
 
 
 def rdf_string(schema, rdf_format='pretty-xml'):
-    return rdf_graph(schema).serialize(format=rdf_format).decode()
+    return rdf_graph(schema).serialize(format=rdf_format)
 
 
 def _rdf_fields(schema):
diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py
index 96a2495..426f519 100644
--- a/hsmodels/serializers.py
+++ b/hsmodels/serializers.py
@@ -3,8 +3,6 @@
 
 from rdflib.graph import Graph
 from rdflib.namespace import XSD
-from rdflib.serializer import Serializer
-from rdflib.plugin import register
 from rdflib.plugins.shared.jsonld.util import json
 from rdflib.plugins.serializers.jsonld import JsonLDSerializer, from_rdf
 
@@ -14,10 +12,9 @@
 PLAIN_LITERAL_TYPES = {XSD.boolean, XSD.integer, XSD.double, XSD.string}
 
 
-class PrettyJsonLDSerializer(Serializer):
+class PrettyJsonLDSerializer(JsonLDSerializer):
     def __init__(self, store: Graph):
-        super(JsonLDSerializer, self).__init__(store)
-
+        super(PrettyJsonLDSerializer, self).__init__(store)
 
     def serialize(
         self,
@@ -69,8 +66,8 @@ def serialize(
 def distribute_nodes(jld):
     # group nodes to be distributed into roots
     # nodes are identified by a dictionary with {'@id': "_:N..."}
-    nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")}
-    roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")]
+    nodes_by_id = {d.pop('@id'): d for d in jld if d['@id'].startswith("_:N")}
+    roots = [d for d in jld if '@id' in d and not d['@id'].startswith("_:N")]
 
     # code for walking dictionaries and lists to replace node identifiers with the nodes
     def is_node_id(d) -> bool:
@@ -111,6 +108,3 @@ def parse_dict(d: dict):
         parse_dict(d)
 
 
-register(
-    'json-ld-pretty', Serializer,
-    'hsmodels.serializers', 'PrettyJsonLDSerializer')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ec1d916..b88db18 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-rdflib<6.0.0
+rdflib>=6.0.0
 pydantic>=1.8.1,<2.0
 email-validator
 jsonschema2md
diff --git a/setup.py b/setup.py
index 92807da..f72f7eb 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
     packages=find_packages(include=['hsmodels', 'hsmodels.*', 'hsmodels.schemas.*', 'hsmodels.schemas.rdf.*'],
                            exclude=("tests",)),
     install_requires=[
-        'rdflib<6.0.0',
+        'rdflib>=6.0.0',
         'pydantic>=1.8.1,<2.0',
         'email-validator'
     ],

From ce601a9a0d776e134e86a933227d87c9a0938586 Mon Sep 17 00:00:00 2001
From: Scott Black <sblack@cuahsi.org>
Date: Fri, 11 Nov 2022 11:10:28 -0700
Subject: [PATCH 4/5] use jsonld auto compact to key contexts

---
 hsmodels/schemas/__init__.py | 2 +-
 hsmodels/serializers.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py
index 1ab5093..6bddde4 100644
--- a/hsmodels/schemas/__init__.py
+++ b/hsmodels/schemas/__init__.py
@@ -89,7 +89,7 @@ def rdf_graph(schema):
 
 
 def rdf_string(schema, rdf_format='pretty-xml'):
-    return rdf_graph(schema).serialize(format=rdf_format)
+    return rdf_graph(schema).serialize(format=rdf_format, auto_compact=True)
 
 
 def _rdf_fields(schema):
diff --git a/hsmodels/serializers.py b/hsmodels/serializers.py
index 426f519..e4f7de0 100644
--- a/hsmodels/serializers.py
+++ b/hsmodels/serializers.py
@@ -66,8 +66,8 @@ def serialize(
 def distribute_nodes(jld):
     # group nodes to be distributed into roots
     # nodes are identified by a dictionary with {'@id': "_:N..."}
-    nodes_by_id = {d.pop('@id'): d for d in jld if d['@id'].startswith("_:N")}
-    roots = [d for d in jld if '@id' in d and not d['@id'].startswith("_:N")]
+    nodes_by_id = {d.pop('@id'): d for d in jld['@graph'] if d['@id'].startswith("_:N")}
+    roots = [d for d in jld['@graph'] if '@id' in d and not d['@id'].startswith("_:N")]
 
     # code for walking dictionaries and lists to replace node identifiers with the nodes
     def is_node_id(d) -> bool:

From 4e59956b3a388f5af1d488349d81024928844b59 Mon Sep 17 00:00:00 2001
From: Scott Black <sblack@cuahsi.org>
Date: Fri, 11 Nov 2022 11:19:34 -0700
Subject: [PATCH 5/5] set contexts, we won't keep this commit

---
 hsmodels/schemas/__init__.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/hsmodels/schemas/__init__.py b/hsmodels/schemas/__init__.py
index 6bddde4..2438fa8 100644
--- a/hsmodels/schemas/__init__.py
+++ b/hsmodels/schemas/__init__.py
@@ -4,7 +4,8 @@
 from pydantic import AnyUrl, BaseModel
 from rdflib import Graph, Literal, URIRef
 
-from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1
+from hsmodels.namespaces import DC, HSTERMS, ORE, RDF, RDFS1, HSRESOURCE, DCTERMS, SCHEMA, XML, RDFS, CITOTERMS, XSD, \
+    SH, FOAF, DASH, HSUSER
 from hsmodels.schemas.aggregations import (
     FileSetMetadata,
     GeographicFeatureMetadata,
@@ -82,10 +83,27 @@ def parse_file(schema, file, file_format='xml', subject=None):
 
 
 def rdf_graph(schema):
+    g = Graph()
+    g.bind('hsresource', HSRESOURCE)
+    g.bind('dcterms', DCTERMS)
+    g.bind('rdfs1', RDFS1)
+    g.bind('schema', SCHEMA)
+    g.bind('hsterms', HSTERMS)
+    g.bind('xml', XML)
+    g.bind('rdfs', RDFS)
+    g.bind('dc', DC)
+    g.bind('citoterms', CITOTERMS)
+    g.bind('xsd', XSD)
+    g.bind('sh', SH)
+    g.bind('rdf', RDF)
+    g.bind('foaf', FOAF)
+    g.bind('dash', DASH)
+    g.bind('ORE', ORE)
+    g.bind('hsuser', HSUSER)
     for rdf_schema, user_schema in user_schemas.items():
         if isinstance(schema, user_schema):
-            return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), Graph())
-    return _rdf_graph(schema, Graph())
+            return _rdf_graph(rdf_schema(**schema.dict(to_rdf=True)), g)
+    return _rdf_graph(schema, g)
 
 
 def rdf_string(schema, rdf_format='pretty-xml'):