I need to parse RFC 3339 strings like \"2008-09-03T20:56:35.450686Z\"
into Python\'s datetime
type.
I have found strptime in the Python sta
Because ISO 8601 allows many variations of optional colons and dashes being present, basically CCYY-MM-DDThh:mm:ss[Z|(+|-)hh:mm]
. If you want to use strptime, you need to strip out those variations first.
The goal is to generate a utc datetime object.
2016-06-29T19:36:29.3453Z
:
datetime.datetime.strptime(timestamp.translate(None, ':-'), "%Y%m%dT%H%M%S.%fZ")
2016-06-29T19:36:29.3453-0400
or 2008-09-03T20:56:35.450686+05:00
use the following. These will convert all variations into something without variable delimiters like 20080903T205635.450686+0500
making it more consistent/easier to parse.
import re
# this regex removes all colons and all
# dashes EXCEPT for the dash indicating + or - utc offset for the timezone
conformed_timestamp = re.sub(r"[:]|([-](?!((\d{2}[:]\d{2})|(\d{4}))$))", '', timestamp)
datetime.datetime.strptime(conformed_timestamp, "%Y%m%dT%H%M%S.%f%z" )
%z
strptime directive (you see something like ValueError: 'z' is a bad directive in format '%Y%m%dT%H%M%S.%f%z'
) then you need to manually offset the time from Z
(UTC). Note %z
may not work on your system in python versions < 3 as it depended on the c library support which varies across system/python build type (i.e. Jython, Cython, etc.).
import re
import datetime
# this regex removes all colons and all
# dashes EXCEPT for the dash indicating + or - utc offset for the timezone
conformed_timestamp = re.sub(r"[:]|([-](?!((\d{2}[:]\d{2})|(\d{4}))$))", '', timestamp)
# split on the offset to remove it. use a capture group to keep the delimiter
split_timestamp = re.split(r"[+|-]",conformed_timestamp)
main_timestamp = split_timestamp[0]
if len(split_timestamp) == 3:
sign = split_timestamp[1]
offset = split_timestamp[2]
else:
sign = None
offset = None
# generate the datetime object without the offset at UTC time
output_datetime = datetime.datetime.strptime(main_timestamp +"Z", "%Y%m%dT%H%M%S.%fZ" )
if offset:
# create timedelta based on offset
offset_delta = datetime.timedelta(hours=int(sign+offset[:-2]), minutes=int(sign+offset[-2:]))
# offset datetime with timedelta
output_datetime = output_datetime + offset_delta
Initially I tried with:
from operator import neg, pos
from time import strptime, mktime
from datetime import datetime, tzinfo, timedelta
class MyUTCOffsetTimezone(tzinfo):
@staticmethod
def with_offset(offset_no_signal, signal): # type: (str, str) -> MyUTCOffsetTimezone
return MyUTCOffsetTimezone((pos if signal == '+' else neg)(
(datetime.strptime(offset_no_signal, '%H:%M') - datetime(1900, 1, 1))
.total_seconds()))
def __init__(self, offset, name=None):
self.offset = timedelta(seconds=offset)
self.name = name or self.__class__.__name__
def utcoffset(self, dt):
return self.offset
def tzname(self, dt):
return self.name
def dst(self, dt):
return timedelta(0)
def to_datetime_tz(dt): # type: (str) -> datetime
fmt = '%Y-%m-%dT%H:%M:%S.%f'
if dt[-6] in frozenset(('+', '-')):
dt, sign, offset = strptime(dt[:-6], fmt), dt[-6], dt[-5:]
return datetime.fromtimestamp(mktime(dt),
tz=MyUTCOffsetTimezone.with_offset(offset, sign))
elif dt[-1] == 'Z':
return datetime.strptime(dt, fmt + 'Z')
return datetime.strptime(dt, fmt)
But that didn't work on negative timezones. This however I got working fine, in Python 3.7.3:
from datetime import datetime
def to_datetime_tz(dt): # type: (str) -> datetime
fmt = '%Y-%m-%dT%H:%M:%S.%f'
if dt[-6] in frozenset(('+', '-')):
return datetime.strptime(dt, fmt + '%z')
elif dt[-1] == 'Z':
return datetime.strptime(dt, fmt + 'Z')
return datetime.strptime(dt, fmt)
Some tests, note that the out only differs by precision of microseconds. Got to 6 digits of precision on my machine, but YMMV:
for dt_in, dt_out in (
('2019-03-11T08:00:00.000Z', '2019-03-11T08:00:00'),
('2019-03-11T08:00:00.000+11:00', '2019-03-11T08:00:00+11:00'),
('2019-03-11T08:00:00.000-11:00', '2019-03-11T08:00:00-11:00')
):
isoformat = to_datetime_tz(dt_in).isoformat()
assert isoformat == dt_out, '{} != {}'.format(isoformat, dt_out)
import re,datetime s="2008-09-03T20:56:35.450686Z" d=datetime.datetime(*map(int, re.split('[^\d]', s)[:-1]))