Skip to content

datecollected assigned as current month and day #229

@mgaynor1

Description

@mgaynor1

When the data.dwc:day or data.dwc:month is missing, but a data.dwc:year is provided, the datecollected column is assigned the current month and day.

This error comes from this:

def dateGrabber(t, d):
r = {}
df = {
"records": [
["datemodified", "idigbio:dateModified"],
["datecollected", "dwc:eventDate"],
],
"mediarecords": [
["modified", "dcterms:modified"],
["datemodified", "idigbio:dateModified"],
],
"publishers": [
["datemodified", "idigbio:dateModified"],
],
"recordsets": [
["datemodified", "idigbio:dateModified"],
]
}
for f in df[t]:
fv = getfield(f[1], d)
if fv is not None:
# dates are more sensitivie to lower case then upper.
fv = fv.upper()
try:
x = dateutil.parser.parse(fv)
if x.tzinfo is None:
x = x.replace(tzinfo=pytz.utc)
try:
x < datetime.datetime.now(pytz.utc)
except:
x = x.replace(tzinfo=pytz.utc)
r[f[0]] = x
except:
pass
if f[0] not in r:
r[f[0]] = None
if "datecollected" in r and r["datecollected"] is None:
year = getfield("dwc:year", d)
month = getfield("dwc:month", d)
day = getfield("dwc:day", d)
sd_of_year = getfield("dwc:startDayOfYear", d)
if year is not None:
try:
if month is not None:
if day is not None:
r["datecollected"] = dateutil.parser.parse(
"{0}-{1}-{2}".format(year, month, day)).date()
elif sd_of_year is not None:
r["datecollected"] = (datetime.datetime(
year, 1, 1) + datetime.timedelta(locale.atoi(sd_of_year) - 1)).date()
else:
r["datecollected"] = dateutil.parser.parse(
"{0}-{1}".format(year, month)).date()
else:
r["datecollected"] = dateutil.parser.parse(year).date()
except:
pass
if "datecollected" in r and r["datecollected"] is not None:
r["startdayofyear"] = r["datecollected"].timetuple().tm_yday
return r

Here is the line causing this issue:

r["datecollected"] = dateutil.parser.parse(year).date()

This is really easy to recreate in python as well:

import dateutil.parser     
import datetime    
 
year = "2010"   
dateutil.parser.parse(year).date()

Out[1]: datetime.date(2010, 4, 1)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions