NeotomaDB
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎example.py‎
Lines changed: 67 additions & 0 deletions b/‎example.py‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎neotomadoi.yaml‎
Lines changed: 27 additions & 0 deletions b/‎neotomadoi.yaml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎src/neotomadoi/__init__.py‎
Lines changed: 15 additions & 0 deletions b/‎src/neotomadoi/__init__.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_connect.py‎
Lines changed: 21 additions & 0 deletions b/‎src/neotomadoi/neo_connect.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_contributors.py‎
Lines changed: 88 additions & 0 deletions b/‎src/neotomadoi/neo_contributors.py‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_creators.py‎
Lines changed: 45 additions & 0 deletions b/‎src/neotomadoi/neo_creators.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_dates.py‎
Lines changed: 55 additions & 0 deletions b/‎src/neotomadoi/neo_dates.py‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_description.py‎
Lines changed: 35 additions & 0 deletions b/‎src/neotomadoi/neo_description.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎src/neotomadoi/neo_identifier.py‎
Lines changed: 29 additions & 0 deletions b/‎src/neotomadoi/neo_identifier.py‎
Lines changed: 29 additions & 0 deletions
@@ -2,3 +2,7 @@
 .env
 
 .coverage
+
+src/neotomadoi/__pycache__/
+
+*.log
@@ -0,0 +1,67 @@
+import neotomadoi
+from dotenv import load_dotenv
+import os
+import json
+import psycopg2
+import psycopg2.extras
+
+load_dotenv()
+
+DCITE = json.loads(os.getenv("DCITE"))
+
+datacite_meta = neotomadoi.credentials(DCITE)
+
+con = neotomadoi.neo_connect(test=False)
+
+# All datasets that are between two months and two days old.
+# Datasets cannot be geochronologic datasets.
+query = """SELECT DISTINCT ds.datasetid
+           FROM ndb.datasets AS ds
+           LEFT JOIN ndb.datasetdoi AS dsdoi ON dsdoi.datasetid = ds.datasetid
+           WHERE NOT ds.datasettypeid = 1;"""
+
+with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+    cur.execute(query)
+    datasetids = cur.fetchall()
+    datasetids = [i[0] for i in datasetids]
+
+for i in datasetids:
+    print(f"Working on {i}")
+    new_doi = neotomadoi.neotomaDOI(datasetid=i, defaults="neotomadoi.yaml")
+    new_doi.set_user(datacite_meta)
+    new_doi.prod_mode()
+    try:
+        try:
+            new_doi.update()
+        except ValueError as e:
+            if "critical" in str(e):
+                new_doi.freeze_data(con)
+                new_doi.update()
+        new_doi.validate()
+        new_doi.get_activity()
+        old_activity = len(new_doi.activity)
+        new_doi.mint_doi(publish=True)
+        if old_activity == 0:
+            with open("minting_dois.log", "a", encoding="UTF-8") as f:
+                new_doi.get_meta()
+                json.dump(
+                    {"datasetid": i, "doi": new_doi.identifiers, "meta": new_doi.meta},
+                    f,
+                )
+                a = f.write("\n")
+            print(f'  Minted new DOI: {new_doi.identifiers.get('identifier')}')
+        elif old_activity > 0:
+            with open("updating_dois.log", "a", encoding="UTF-8") as f:
+                new_doi.get_meta()
+                json.dump(
+                    {"datasetid": i, "doi": new_doi.identifiers, "meta": new_doi.meta},
+                    f,
+                )
+                a = f.write("\n")
+            print(f'  Updated DOI: {new_doi.identifiers.get('identifier')}')
+    except Exception as e:
+        print("Whoops.")
+        print(e)
+        with open("failing_dois.log", "a", encoding="UTF-8") as f:
+            json.dump({"datasetid": i, "error": str(e)}, f)
+            a = f.write("\n")
@@ -0,0 +1,27 @@
+language: EN
+rightsList:
+  - rights: CC-BY4
+    rightsUri: http://creativecommons.org/licenses/by/4.0/deed.en_US
+    schemeUri: https://spdx.org/licenses/
+    rightsIdentifierScheme: SPDX
+  - rights: TK Open to Collaboration (TK CB)
+    rightsUri: https://localcontexts.org/notice/open-to-collaborate/
+    schemeUri: https://localcontexts.org
+    rightsIdentifierScheme: Local Contexts
+schemaVersion: "http://datacite.org/schema/kernel-4"
+types:
+  resourceType: Dataset/Paleoecological Sample Data
+  resourceTypeGeneral: Dataset
+publisher:
+  name: Neotoma Paleoecology Database
+  publisherIdentifier: 10.17616/R3PD38
+  publisherIdentifierScheme: DOI
+  lang: EN
+subjects:
+  - subject: Paleoecology
+    subjectScheme: Library of Congress Subject Headings
+    schemeUri: https://id.loc.gov/authorities/subjects
+    valueUri: http://id.loc.gov/authorities/subjects/sh85097060
+    lang: en-us
+formats:
+  - application/json
@@ -0,0 +1,15 @@
+from .neo_connect import neo_connect as neo_connect
+from .neo_creators import neo_creators as neo_creators
+from .neotomaDOI import neotomaDOI as neotomaDOI
+from .neotomaDOI import testMode as testMode
+from .neotomaDOI import credentials as credentials
+from .neotomaDOI import activity as activity
+from .neo_contributors import neo_contributors as neo_contributors
+from .neo_subjects import neo_subjects as neo_subjects
+from .neo_title import neo_title as neo_title
+from .neo_location import neo_location as neo_location
+from .neo_relatedIdentifiers import neo_relatedIdentifiers as neo_relatedIdentifiers
+from .neo_identifier import neo_identifier as neo_identifier
+from .neo_dates import neo_dates as neo_dates
+from .neo_size import neo_size as neo_size
+from .neo_description import neo_description as neo_description
@@ -0,0 +1,21 @@
+import psycopg2
+from dotenv import dotenv_values
+from json import loads
+
+
+def neo_connect(test: bool = True) -> psycopg2.connect:
+    """_Connect to the Neotoma Database_
+
+    Args:
+        test (bool): _Are we connecting to the test or production database?_
+
+    Returns:
+        psycopg2.connect: _A valid connection the the Neotoma Database server_
+    """
+    secrets = dotenv_values()
+    if test:
+        CONN_STRING = loads(secrets["DBAUTH_TEST"])
+    else:
+        CONN_STRING = loads(secrets["DBAUTH"])
+    con = psycopg2.connect(**CONN_STRING, connect_timeout=5)
+    return con
@@ -0,0 +1,88 @@
+import psycopg2
+import psycopg2.extras
+
+
+def neo_contributors(con: psycopg2.connect, self) -> list:
+    """_Obtain a list of the dataset contributors by activity for a dataset._
+
+    Args:
+        con (psycopg2.connect): _A valid connection the the Neotoma Database server_
+
+    Returns:
+        list: _A list of Neotoma contributors, including external identifiers when available._
+    """
+    query = """
+        WITH chronfolk AS (
+        SELECT DISTINCT  contactid,
+                'Researcher'::text AS contributorType
+        FROM     ndb.datasets AS d
+        JOIN ndb.chronologies AS chron ON d.collectionunitid = chron.collectionunitid
+        WHERE d.datasetid = %(datasetid)s
+        ),
+        collfolk AS (
+        SELECT DISTINCT  contactid, 'DataCollector'::text AS contributortype
+        FROM     ndb.datasets AS d
+        JOIN   ndb.collectors AS coll ON d.collectionunitid = coll.collectionunitid
+        WHERE d.datasetid = %(datasetid)s
+        ),
+        dpi AS (
+        SELECT DISTINCT  contactid,
+                'ProjectLeader'::text AS contributortype
+        FROM ndb.datasetpis WHERE datasetpis.datasetid = %(datasetid)s
+        ),
+        curator AS (
+        /* In the DB stuff this should be a 'DataSteward' */
+        SELECT DISTINCT  contactid, 'DataCurator'::text AS contributortype
+        FROM ndb.datasetsubmissions
+        WHERE datasetsubmissions.datasetid = %(datasetid)s
+        ),
+        coauth AS (
+        SELECT DISTINCT contactid,
+                'Researcher'::text AS contributortype
+        FROM ndb.datasetpublications AS d
+        JOIN ndb.publicationauthors AS paut ON d.publicationid = paut.publicationid
+        WHERE d.datasetid = %(datasetid)s
+        ),
+        analyst AS (
+            SELECT DISTINCT sana.contactid,
+        /* In the DB stuff this should be a 'DataAnalyst' */
+                    'DataCollector'::text AS contributortype
+        FROM        ndb.samples AS samp
+        JOIN ndb.sampleanalysts AS sana ON samp.sampleid = sana.sampleid
+        WHERE samp.datasetid = %(datasetid)s
+        )
+        SELECT DISTINCT cts.contactname AS name,
+                        -- cts.address AS affiliation,
+                        lister.contributortype as "contributorType",
+                        jsonb_agg(DISTINCT 
+                                jsonb_build_object('nameIdentifier', exct.identifier,
+                                                   'nameIdentifierScheme', exdb.extdatabasename, 
+                                                   'schemeUri', exdb.url)) AS "nameIdentifiers"
+        FROM (SELECT * FROM analyst
+        UNION ALL
+        (SELECT * FROM coauth)
+        UNION ALL
+        (SELECT * FROM curator)
+        UNION ALL
+        (SELECT * FROM dpi)
+        UNION ALL
+        (SELECT * FROM collfolk)
+        UNION ALL
+        (SELECT * FROM chronfolk)) AS lister
+        JOIN ndb.contacts AS cts ON cts.contactid = lister.contactid
+        LEFT JOIN ndb.externalcontacts AS exct ON exct.contactid = cts.contactid
+        LEFT JOIN ndb.externaldatabases AS exdb ON exdb.extdatabaseid = exct.extdatabaseid
+        GROUP BY cts.contactid, lister.contributortype;
+    """
+    with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute(query, {"datasetid": self.datasetid})
+        response = cur.fetchall()
+        contributors = []
+        for i in response:
+            creator = dict(i)
+            if not all(
+                [i.get("nameIdentifier") for i in creator.get("nameIdentifiers")]
+            ):
+                _ = creator.pop("nameIdentifiers", None)
+            contributors.append(creator)
+    return contributors
@@ -0,0 +1,45 @@
+import psycopg2
+import psycopg2.extras
+
+
+def neo_creators(con: psycopg2.connect, self) -> list:
+    """_Obtain a list of Neotoma dataset PIs for a dataset._
+
+    Args:
+        con (psycopg2.connect): _A valid psycopg connection to the Neotoma database._
+
+    Returns:
+        list: _A list of dataset PIs, including any external identifiers._
+    """
+
+    query = """
+        SELECT DISTINCT cts.contactname AS name,
+                        -- cts.address AS affiliation,
+                        jsonb_agg(DISTINCT 
+                                jsonb_build_object('nameIdentifier', exct.identifier,
+                                                   'nameIdentifierScheme', exdb.extdatabasename, 
+                                                   'schemeUri', exdb.url)) AS "nameIdentifiers"
+        FROM ndb.datasetpis AS dspi
+        INNER JOIN ndb.contacts AS cts ON cts.contactid = dspi.contactid
+        LEFT JOIN ndb.externalcontacts AS exct ON exct.contactid = cts.contactid
+        LEFT JOIN ndb.externaldatabases AS exdb ON exdb.extdatabaseid = exct.extdatabaseid
+        WHERE dspi.datasetid = %(datasetid)s
+        GROUP BY cts.contactid;
+    """
+
+    with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute(query, {"datasetid": self.datasetid})
+        response = cur.fetchall()
+        creators = []
+        if len(response) == 0:
+            creators = [{"name": "None listed"}]
+        for i in response:
+            creator = dict(i)
+            if creator.get("name") is None:
+                creator["name"] = "None listed"
+            if not all(
+                [i.get("nameIdentifier") for i in creator.get("nameIdentifiers")]
+            ):
+                _ = creator.pop("nameIdentifiers", None)
+            creators.append(creator)
+    return creators
@@ -0,0 +1,55 @@
+import psycopg2
+import psycopg2.extras
+
+
+def neo_dates(con: psycopg2.connect, self) -> object:
+    """_Return critical dates associated with the dataset record._
+
+    Args:
+        con (psycopg2.connect): _A valid connection to the Neotoma database._
+
+    Returns:
+        object: _A object listing each date type (Submitted, Updated, etc.) and the relevant date._
+    """    
+    query = """
+        WITH creation AS (
+            SELECT MIN(ds.submissiondate)::date as date, 'Submitted'::text
+            FROM ndb.datasetsubmissions AS ds
+            WHERE ds.datasetid = %(datasetid)s
+        ),
+        resub AS (
+            SELECT ds.submissiondate as date, 'Updated'::text
+            FROM ndb.datasetsubmissions AS ds
+            WHERE ds.datasetid = %(datasetid)s
+            ORDER BY ds.submissiondate
+            OFFSET 1
+        ),
+        issued AS (
+            SELECT dsdoi.recdatecreated as date, 'Issued'::text
+            FROM ndb.datasetdoi AS dsdoi
+            WHERE dsdoi.datasetid = %(datasetid)s
+            ORDER BY dsdoi.recdatecreated
+            LIMIT 1
+        )
+        SELECT DISTINCT *
+        FROM (
+            (SELECT * FROM creation)
+        UNION ALL
+        (SELECT * FROM resub)
+        UNION ALL
+        (SELECT * FROM issued)) AS dates
+        WHERE date is not NULL;
+    """
+
+    with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute(query, {"datasetid": self.datasetid})
+        response = cur.fetchall()
+        dates = []
+        for i in response:
+            dates.append(dict(i))
+        date_out = []
+        for i in dates:
+            date_out.append(
+                {"dateType": i.get("text"), "date": i.get("date").strftime("%Y-%m-%d")}
+            )
+    return date_out
@@ -0,0 +1,35 @@
+import psycopg2
+import psycopg2.extras
+
+
+def neo_description(con: psycopg2.connect, self) -> object:
+    """_Return a formatted description string for the dataset to be used in the DOI metadata._
+
+    Args:
+        con (psycopg2.connect): _A valid connection to the Neotoma database._
+
+    Returns:
+        object: _An object with the description and description type._
+    """    
+    query = """
+        SELECT st.sitename || ' ' || dst.datasettype || ' dataset' AS title
+        FROM
+        ndb.datasets AS ds
+        INNER JOIN ndb.datasettypes AS dst ON dst.datasettypeid = ds.datasettypeid
+        INNER JOIN ndb.collectionunits AS cu ON cu.collectionunitid = ds.collectionunitid
+        INNER JOIN ndb.sites AS st ON st.siteid = cu.siteid
+        WHERE ds.datasetid = %(datasetid)s; 
+    """
+
+    with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute(query, {"datasetid": self.datasetid})
+        response = cur.fetchone()
+        string = (
+            f"Raw data for the {response[0]} submitted to the Neotoma Paleoecology Database. Data is available through the landing page in JSON format. "
+            "The landing page referenced by the DOI also contains links to publications and a map-based viewer for the dataset. "
+            "The Neotoma Paleoecology Database maintains a homepage at https://www.neotomadb.org."
+        )
+        description = [
+            {"descriptionType": "Abstract", "description": string, "lang": "EN"}
+        ]
+    return description
@@ -0,0 +1,29 @@
+import psycopg2
+import psycopg2.extras
+
+
+def neo_identifier(con: psycopg2.connect, self) -> object:
+    """_Return the dataset identifier (DOI)_
+
+    Args:
+        con (psycopg2.connect): _A valid connection the the Neotoma database._
+
+    Returns:
+        object: _An object with the dataset DOI._
+    """    
+    query = """
+        SELECT doi as identifier,
+        'DOI' as "identifierType"
+        FROM doi.doimeta
+        WHERE datasetid = %(datasetid)s
+        LIMIT 1;
+    """
+
+    with con.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        cur.execute(query, {"datasetid": self.datasetid})
+        response = cur.fetchone()
+        if response:
+            doi = dict(response)
+        else:
+            doi = {}
+    return doi