I have strings that show a date in the following format:
x minutes/hours/days/months/years ago
I need to parse that to a datetime using pyt
Custom function to convert x hours ago
to datetime
, x hour, y mins ago
to datetime
, etc in Python.
Function takes single parameter of type string which is parsed using RegExp. RegExp can be customized to match function input.
For usage see examples below.
import re
from datetime import datetime, timedelta
def convert_datetime(datetime_ago):
matches = re.search(r"(\d+ weeks?,? )?(\d+ days?,? )?(\d+ hours?,? )?(\d+ mins?,? )?(\d+ secs? )?ago", datetime_ago)
if not matches:
return None
date_pieces = {'week': 0, 'day': 0, 'hour': 0, 'min': 0, 'sec': 0}
for i in range(1, len(date_pieces) + 1):
if matches.group(i):
value_unit = matches.group(i).rstrip(', ')
if len(value_unit.split()) == 2:
value, unit = value_unit.split()
date_pieces[unit.rstrip('s')] = int(value)
d = datetime.today() - timedelta(
weeks=date_pieces['week'],
days=date_pieces['day'],
hours=date_pieces['hour'],
minutes=date_pieces['min'],
seconds=date_pieces['sec']
)
return d
Example usage:
dates = [
'1 week, 6 days, 11 hours, 20 mins, 13 secs ago',
'1 week, 10 hours ago',
'1 week, 1 day ago',
'6 days, 11 hours, 20 mins ago',
'1 hour ago',
'11 hours, 20 mins ago',
'20 mins 10 secs ago',
'10 secs ago',
'1 sec ago',
]
for date in dates:
print(convert_datetime(date))
Output:
2019-05-10 06:26:40.937027
2019-05-16 07:46:53.937027
2019-05-15 17:46:53.937027
2019-05-17 06:26:53.937027
2019-05-23 16:46:53.937027
2019-05-23 06:26:53.937027
2019-05-23 17:26:43.937027
2019-05-23 17:46:43.937027
2019-05-23 17:46:52.937027
This can be done easily with timedeltas:
import datetime
def string_to_delta(string_delta):
value, unit, _ = string_delta.split()
return datetime.timedelta(**{unit: float(value)})
Producing:
>>> string_to_delta("20 hours ago")
datetime.timedelta(0, 72000)
Although this will require some extra work to deal with months/years - as adding a month to a date is an ambiguous operation, but it should be a simple addition if you know what you want it to mean.
To get an actual time, simply take the delta away from datetime.datetime.now().
make sure to install dependencies using pip3
from datetime import date
from dateutil.relativedelta import relativedelta
import re
baseDate = date.today() #date(2020, 4, 29)
hoursPattern = re.compile(r'(\d\d?\d?) hours? ago')
daysPattern = re.compile(r'(\d\d?\d?) days? ago')
weeksPattern = re.compile(r'(\d\d?\d?) weeks? ago')
monthsPattern = re.compile(r'(\d\d?\d?) months? ago')
yearsPattern = re.compile(r'(\d\d?\d?) years? ago')
days = 0
daysMatch = daysPattern.search(ago)
if daysMatch:
days += int(daysMatch.group(1))
hours = 0
hoursMatch = hoursPattern.search(ago)
if hoursMatch:
hours += int(hoursMatch.group(1))
weeks = 0
weeksMatch = weeksPattern.search(ago)
if weeksMatch:
weeks += int(weeksMatch.group(1))
months = 0
monthsMatch = monthsPattern.search(ago)
if monthsMatch:
months += int(monthsMatch.group(1))
years = 0
yearsMatch = yearsPattern.search(ago)
if yearsMatch:
years += int(yearsMatch.group(1))
yourDate = baseDate - relativedelta(hours=hours, days=days, weeks=weeks, months=months, years=years)
completely exaggerated solution but I needed something more flexible:
def string_to_delta(relative):
#using simplistic year (no leap months are 30 days long.
#WARNING: 12 months != 1 year
unit_mapping = [('mic', 'microseconds', 1),
('millis', 'microseconds', 1000),
('sec', 'seconds', 1),
('day', 'days', 1),
('week', 'days', 7),
('mon', 'days', 30),
('year', 'days', 365)]
try:
tokens = relative.lower().split(' ')
past = False
if tokens[-1] == 'ago':
past = True
tokens = tokens[:-1]
elif tokens[0] == 'in':
tokens = tokens[1:]
units = dict(days = 0, seconds = 0, microseconds = 0)
#we should always get pairs, if not we let this die and throw an exception
while len(tokens) > 0:
value = tokens.pop(0)
if value == 'and': #just skip this token
continue
else:
value = float(value)
unit = tokens.pop(0)
for match, time_unit, time_constant in unit_mapping:
if unit.startswith(match):
units[time_unit] += value * time_constant
return datetime.timedelta(**units), past
except Exception as e:
raise ValueError("Don't know how to parse %s: %s" % (relative, e))
This can parse things like:
2 days ago
in 60 seconds
2 DAY and 4 Secs
in 1 year, 1 Month, 2 days and 4 MICRO
2 Weeks 4 secs ago
7 millis ago
A huge but: It simplifies month and year to 30 and 365 days respectively. Not always what you want, though it's enough for some cases.
Easiest way is to use dateparser:
import dateparser
date_ago="4 months ago"
date=dateparser.parse(date).strftime("%Y-%m-%d")
date
Output:
'2020-01-08'
Since your arguments are something like 2 days ago, 3 months ago, 2 years ago. The function below could be of help in getting the exact date for the arguments. You first need to import the following date utils
import datetime
from dateutil.relativedelta import relativedelta
Then implement the function below
def get_past_date(str_days_ago):
TODAY = datetime.date.today()
splitted = str_days_ago.split()
if len(splitted) == 1 and splitted[0].lower() == 'today':
return str(TODAY.isoformat())
elif len(splitted) == 1 and splitted[0].lower() == 'yesterday':
date = TODAY - relativedelta(days=1)
return str(date.isoformat())
elif splitted[1].lower() in ['hour', 'hours', 'hr', 'hrs', 'h']:
date = datetime.datetime.now() - relativedelta(hours=int(splitted[0]))
return str(date.date().isoformat())
elif splitted[1].lower() in ['day', 'days', 'd']:
date = TODAY - relativedelta(days=int(splitted[0]))
return str(date.isoformat())
elif splitted[1].lower() in ['wk', 'wks', 'week', 'weeks', 'w']:
date = TODAY - relativedelta(weeks=int(splitted[0]))
return str(date.isoformat())
elif splitted[1].lower() in ['mon', 'mons', 'month', 'months', 'm']:
date = TODAY - relativedelta(months=int(splitted[0]))
return str(date.isoformat())
elif splitted[1].lower() in ['yrs', 'yr', 'years', 'year', 'y']:
date = TODAY - relativedelta(years=int(splitted[0]))
return str(date.isoformat())
else:
return "Wrong Argument format"
You can then call the function like this:
print get_past_date('5 hours ago')
print get_past_date('yesterday')
print get_past_date('3 days ago')
print get_past_date('4 months ago')
print get_past_date('2 years ago')
print get_past_date('today')