-
Notifications
You must be signed in to change notification settings - Fork 70
Expand file tree
/
Copy pathbase.py
More file actions
63 lines (48 loc) · 2.09 KB
/
base.py
File metadata and controls
63 lines (48 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import abc
import uuid
from share.util.graph import MutableGraph
class BaseTransformer(metaclass=abc.ABCMeta):
def __init__(self, source_config=None):
self.config = source_config
@abc.abstractmethod
def do_transform(self, datum, **kwargs):
raise NotImplementedError('Transformers must implement do_transform')
def transform(self, datum, **kwargs):
"""Transform a RawDatum
Args:
datum: RawDatum to transform
**kwargs: Forwared to do_transform. Overrides values in the source config's transformer_kwargs
Returns a MutableGraph
"""
source_id = None
if not isinstance(datum, (str, bytes)):
source_id = datum.suid.identifier
datum = datum.datum
if isinstance(datum, bytes):
datum = datum.decode()
jsonld, root_ref = self.do_transform(datum, **self._get_kwargs(**kwargs))
if not jsonld:
return None
if source_id and jsonld and root_ref:
self.add_source_identifier(source_id, jsonld, root_ref)
# TODO return a MutableGraph from do_transform, maybe build it directly in Parser?
return MutableGraph.from_jsonld(jsonld)
def add_source_identifier(self, source_id, jsonld, root_ref):
from share.transform.chain.links import IRILink
uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI']
if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri for n in jsonld['@graph']):
return
identifier_ref = {
'@id': '_:' + uuid.uuid4().hex,
'@type': 'workidentifier'
}
identifier = {
'uri': uri,
'creative_work': root_ref,
**identifier_ref
}
root_node = next(n for n in jsonld['@graph'] if n['@id'] == root_ref['@id'] and n['@type'] == root_ref['@type'])
root_node.setdefault('identifiers', []).append(identifier_ref)
jsonld['@graph'].append(identifier)
def _get_kwargs(self, **kwargs):
return {**(self.config.transformer_kwargs or {}), **kwargs}