Fix urlhandle_detect_ext() non-ASCII error in Py2, with test

This commit is contained in:
df 2021-08-29 05:34:20 +01:00
parent 197215782b
commit 1e222005ba
2 changed files with 31 additions and 2 deletions

View File

@ -105,6 +105,7 @@ from youtube_dl.utils import (
cli_valueless_option,
cli_bool_option,
parse_codecs,
urlhandle_detect_ext,
)
from youtube_dl.compat import (
compat_chr,
@ -1475,6 +1476,30 @@ Line 1
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
def test_urlhandle_detect_ext(self):
class UrlHandle(object):
_info = {}
def __init__(self, info):
self._info = info
@property
def headers(self):
return self._info
# header with non-ASCII character and contradictory Content-Type
urlh = UrlHandle({
'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"',
'Content-Type': b'audio/aac',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with no Content-Disposition
urlh = UrlHandle({
'Content-Type': b'audio/mp3',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
if __name__ == '__main__':
unittest.main()

View File

@ -4288,7 +4288,10 @@ def parse_codecs(codecs_str):
def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get
cd = getheader('Content-Disposition')
def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'):
return encode_compat_str(x, encoding=encoding, errors=errors) if x else None
cd = encode_compat_str_or_none(getheader('Content-Disposition'))
if cd:
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
if m:
@ -4296,7 +4299,8 @@ def urlhandle_detect_ext(url_handle):
if e:
return e
return mimetype2ext(getheader('Content-Type'))
ct = encode_compat_str_or_none(getheader('Content-Type'))
return mimetype2ext(ct)
def encode_data_uri(data, mime_type):