Skip to content

Commit 16133b0

Browse files
committed
handle language codes in different formats
1 parent 2682ca9 commit 16133b0

2 files changed

Lines changed: 10 additions & 11 deletions

File tree

pygeometa/schemas/gbif_eml/__init__.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
from pathlib import Path
33

4+
import pycountry
45
from bs4 import BeautifulSoup
56
from pygeometa.schemas.base import BaseOutputSchema
67

@@ -91,7 +92,10 @@ def import_(self, metadata):
9192
mcf["metadata"]["identifier"] = text_or_null(identifier)
9293

9394
if language := dataset.find("language"):
94-
mcf["metadata"]["language"] = text_or_null(language)
95+
lang = text_or_null(language)
96+
mcf["metadata"]["language"] = (
97+
pycountry.languages.get(alpha_3=lang).alpha_2 if lang else lang
98+
)
9599

96100
idf = mcf["identification"]
97101

@@ -118,8 +122,7 @@ def import_(self, metadata):
118122
# )
119123

120124
idf["maintenancefrequency"] = (
121-
text_or_null(dataset.find("maintenanceUpdateFrequency")) or
122-
"unknown"
125+
text_or_null(dataset.find("maintenanceUpdateFrequency")) or "unknown"
123126
)
124127

125128
idf["dates"] = {"publication": text_or_null(dataset.find("pubDate"))}
@@ -137,8 +140,7 @@ def import_(self, metadata):
137140
]
138141

139142
spatial["crs"] = "4326"
140-
spatial["description"] = \
141-
text_or_null(dataset.find("geographicDescription"))
143+
spatial["description"] = text_or_null(dataset.find("geographicDescription"))
142144

143145
# temporal = idf["extents"]["temporal"]
144146
# temporal["begin"]
@@ -152,17 +154,13 @@ def import_(self, metadata):
152154
for r, obj in to_contact_role(dataset, "contact", "pointOfContact"):
153155
ct[r] = obj
154156

155-
for r, obj in to_contact_role(dataset,
156-
"metadataProvider",
157-
"distributor"):
157+
for r, obj in to_contact_role(dataset, "metadataProvider", "distributor"):
158158
ct[r] = obj
159159

160160
for r, obj in to_contact_role(dataset, "creator"):
161161
ct[r] = obj
162162

163-
for r, obj in to_contact_role(dataset,
164-
"personnel",
165-
"projectPersonnel"):
163+
for r, obj in to_contact_role(dataset, "personnel", "projectPersonnel"):
166164
ct[r] = obj
167165

168166
for idx, keyword_set in enumerate(dataset.find_all("keywordSet")):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ lxml
55
OWSLib
66
pyyaml
77
beautifulsoup4
8+
pycountry

0 commit comments

Comments
 (0)