When the data.dwc:day or data.dwc:month is missing, but a data.dwc:year is provided, the datecollected column is assigned the current month and day.
This error comes from this:
|
def dateGrabber(t, d): |
|
r = {} |
|
df = { |
|
"records": [ |
|
["datemodified", "idigbio:dateModified"], |
|
["datecollected", "dwc:eventDate"], |
|
], |
|
"mediarecords": [ |
|
["modified", "dcterms:modified"], |
|
["datemodified", "idigbio:dateModified"], |
|
], |
|
"publishers": [ |
|
["datemodified", "idigbio:dateModified"], |
|
], |
|
"recordsets": [ |
|
["datemodified", "idigbio:dateModified"], |
|
] |
|
} |
|
for f in df[t]: |
|
fv = getfield(f[1], d) |
|
if fv is not None: |
|
# dates are more sensitivie to lower case then upper. |
|
fv = fv.upper() |
|
try: |
|
x = dateutil.parser.parse(fv) |
|
if x.tzinfo is None: |
|
x = x.replace(tzinfo=pytz.utc) |
|
try: |
|
x < datetime.datetime.now(pytz.utc) |
|
except: |
|
x = x.replace(tzinfo=pytz.utc) |
|
r[f[0]] = x |
|
except: |
|
pass |
|
if f[0] not in r: |
|
r[f[0]] = None |
|
|
|
if "datecollected" in r and r["datecollected"] is None: |
|
year = getfield("dwc:year", d) |
|
month = getfield("dwc:month", d) |
|
day = getfield("dwc:day", d) |
|
sd_of_year = getfield("dwc:startDayOfYear", d) |
|
if year is not None: |
|
try: |
|
if month is not None: |
|
if day is not None: |
|
r["datecollected"] = dateutil.parser.parse( |
|
"{0}-{1}-{2}".format(year, month, day)).date() |
|
elif sd_of_year is not None: |
|
r["datecollected"] = (datetime.datetime( |
|
year, 1, 1) + datetime.timedelta(locale.atoi(sd_of_year) - 1)).date() |
|
else: |
|
r["datecollected"] = dateutil.parser.parse( |
|
"{0}-{1}".format(year, month)).date() |
|
else: |
|
r["datecollected"] = dateutil.parser.parse(year).date() |
|
except: |
|
pass |
|
|
|
if "datecollected" in r and r["datecollected"] is not None: |
|
r["startdayofyear"] = r["datecollected"].timetuple().tm_yday |
|
|
|
return r |
Here is the line causing this issue:
|
r["datecollected"] = dateutil.parser.parse(year).date() |
This is really easy to recreate in python as well:
import dateutil.parser
import datetime
year = "2010"
dateutil.parser.parse(year).date()
Out[1]: datetime.date(2010, 4, 1)
When the data.dwc:day or data.dwc:month is missing, but a data.dwc:year is provided, the datecollected column is assigned the current month and day.
This error comes from this:
idb-backend/idb/helpers/conversions.py
Lines 544 to 606 in 3c9551c
Here is the line causing this issue:
idb-backend/idb/helpers/conversions.py
Line 599 in 3c9551c
This is really easy to recreate in python as well:
Out[1]: datetime.date(2010, 4, 1)