diff --git a/test/test_utils.py b/test/test_utils.py index 44a4f6ff7..14607f6b8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -371,6 +371,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) + self.assertEqual(unified_timestamp('11:31 17-Jun-2021'), 1623929460) + self.assertEqual(unified_timestamp('11:31 17-Jun-2021-0000'), 1623929460) + from youtube_dl.utils import DATE_FORMATS_DAY_FIRST + DATE_FORMATS_DAY_FIRST.append('%H:%M %d-%m-%Y') + self.assertEqual(unified_timestamp('17:30 27-02-2016'), 1456594200) + self.assertEqual(unified_timestamp('17:30 27-02-2016-0000'), 1456594200) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5dde9768d..90eb9f93c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # coding: utf-8 from __future__ import unicode_literals @@ -1717,8 +1716,6 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) DATE_FORMATS = ( - '%d %B %Y', - '%d %b %Y', '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', @@ -1763,6 +1760,11 @@ DATE_FORMATS_DAY_FIRST.extend([ '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', + '%d %B %Y', + '%d %b %Y', + '%d-%b-%Y', + '%H:%M %d-%b-%Y', + '%H:%M:%S %d-%b-%Y', ]) DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) @@ -1772,6 +1774,11 @@ DATE_FORMATS_MONTH_FIRST.extend([ '%m/%d/%Y', '%m/%d/%y', '%m/%d/%Y %H:%M:%S', + '%B %d %Y', + '%b %d %Y', + '%b-%d-%Y', + '%H:%M %b-%d-%Y', + '%H:%M:%S %b-%d-%Y', ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" @@ -2939,7 +2946,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): def extract_timezone(date_str): m = re.search( - r'^.{8,}?(?PZ$| ?(?P\+|-)(?P[0-9]{2}):?(?P[0-9]{2})$)', + r'''(?x) + ^.{8,}? # >=8 char non-TZ prefix, if present + (?PZ| # just the UTC Z, or + (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or + (?= 4 alpha or 2 digits + [ ]? # optional space + (?P\+|-) # +/- + (?P[0-9]{2}):?(?P[0-9]{2}) # hh[:]mm + $) + ''', date_str) if not m: timezone = datetime.timedelta()