Disambiguate 4-digit year and time-zone suffix
Restore check omitted from extract_timezone(); adjust DATE_FORMATS_DAY/MONTH_FIRST; add tests.
This commit is contained in:
parent
1e222005ba
commit
f798b40cf3
@ -371,6 +371,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
self.assertEqual(unified_timestamp('11:31 17-Jun-2021'), 1623929460)
|
||||
self.assertEqual(unified_timestamp('11:31 17-Jun-2021-0000'), 1623929460)
|
||||
from youtube_dl.utils import DATE_FORMATS_DAY_FIRST
|
||||
DATE_FORMATS_DAY_FIRST.append('%H:%M %d-%m-%Y')
|
||||
self.assertEqual(unified_timestamp('17:30 27-02-2016'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('17:30 27-02-2016-0000'), 1456594200)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
|
@ -1,4 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
@ -1717,8 +1716,6 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
|
||||
|
||||
DATE_FORMATS = (
|
||||
'%d %B %Y',
|
||||
'%d %b %Y',
|
||||
'%B %d %Y',
|
||||
'%B %dst %Y',
|
||||
'%B %dnd %Y',
|
||||
@ -1763,6 +1760,11 @@ DATE_FORMATS_DAY_FIRST.extend([
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
'%d %B %Y',
|
||||
'%d %b %Y',
|
||||
'%d-%b-%Y',
|
||||
'%H:%M %d-%b-%Y',
|
||||
'%H:%M:%S %d-%b-%Y',
|
||||
])
|
||||
|
||||
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
||||
@ -1772,6 +1774,11 @@ DATE_FORMATS_MONTH_FIRST.extend([
|
||||
'%m/%d/%Y',
|
||||
'%m/%d/%y',
|
||||
'%m/%d/%Y %H:%M:%S',
|
||||
'%B %d %Y',
|
||||
'%b %d %Y',
|
||||
'%b-%d-%Y',
|
||||
'%H:%M %b-%d-%Y',
|
||||
'%H:%M:%S %b-%d-%Y',
|
||||
])
|
||||
|
||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||
@ -2939,7 +2946,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
|
||||
def extract_timezone(date_str):
|
||||
m = re.search(
|
||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
r'''(?x)
|
||||
^.{8,}? # >=8 char non-TZ prefix, if present
|
||||
(?P<tz>Z| # just the UTC Z, or
|
||||
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
|
||||
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
|
||||
[ ]? # optional space
|
||||
(?P<sign>\+|-) # +/-
|
||||
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
|
||||
$)
|
||||
''',
|
||||
date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
|
Loading…
x
Reference in New Issue
Block a user