From 4af599143aa72f3dc5f5607b702f22a731d1cf11 Mon Sep 17 00:00:00 2001 From: df Date: Sun, 19 Sep 2021 05:06:21 +0100 Subject: [PATCH] Shorten proposed file name on create if too long --- test/test_compat.py | 4 +++ test/test_utils.py | 26 +++++++++++++++++ youtube_dl/compat.py | 24 ++++++++++++++++ youtube_dl/utils.py | 67 ++++++++++++++++++++++++++++++++++++++------ 4 files changed, 113 insertions(+), 8 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 86ff389fd..574495e9f 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -19,6 +19,7 @@ from youtube_dl.compat import ( compat_shlex_split, compat_str, compat_struct_unpack, + compat_textwrap_shorten, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, @@ -121,6 +122,9 @@ class TestCompat(unittest.TestCase): def test_struct_unpack(self): self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) + def test_compat_textwrap_shorten(self): + self.assertEqual(compat_textwrap_shorten('Hello world!', width=11), 'Hello [...]') + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 259c4763e..f9bb45776 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -63,6 +63,7 @@ from youtube_dl.utils import ( pkcs1pad, read_batch_urls, sanitize_filename, + sanitize_open, sanitize_path, sanitize_url, expand_path, @@ -118,6 +119,16 @@ from youtube_dl.compat import ( class TestUtil(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.tearDown() + + @classmethod + def tearDown(cls): + for tf in os.listdir('.'): + if os.path.splitext(tf)[1] == '.test': + os.remove(tf) + def test_timeconvert(self): self.assertTrue(timeconvert('') is None) self.assertTrue(timeconvert('bougrg') is None) @@ -231,6 +242,21 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_open(self): + long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)]) + if sys.platform == 'win32': + result = sanitize_open('.\\' + long_name + '.test', open_mode='w') + result[0].close() + self.assertEqual( + result[1], + "I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test") + else: + result = sanitize_open('./' + long_name + '.test', open_mode='w') + result[0].close() + self.assertEqual( + result[1], + "./I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test") + def test_sanitize_url(self): self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 9e45c454b..6511f0b29 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2997,6 +2997,29 @@ else: def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) +# Compat version of textwrap.shorten(), not in Py2 textwrap +# Extractors can use this to précis a long metadata field, eg +# to make a title from a description +try: + from textwrap import shorten as compat_textwrap_shorten +except ImportError: # Python 2 + def compat_textwrap_shorten( + text, width, fix_sentence_endings=False, break_long_words=True, + break_on_hyphens=True, placeholder=' [...]'): + import textwrap + try_text = textwrap.wrap( + text, width, + fix_sentence_endings=fix_sentence_endings, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens) + if len(try_text) == 1: + return try_text[0] + return textwrap.wrap( + text, width - len(placeholder), + fix_sentence_endings=fix_sentence_endings, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens)[0] + placeholder + __all__ = [ 'compat_HTMLParseError', @@ -3040,6 +3063,7 @@ __all__ = [ 'compat_struct_pack', 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', + 'compat_textwrap_shorten', 'compat_tokenize_tokenize', 'compat_urllib_error', 'compat_urllib_parse', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e722eed58..ad5f29146 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -57,6 +57,7 @@ from .compat import ( compat_str, compat_struct_pack, compat_struct_unpack, + compat_textwrap_shorten, compat_urllib_error, compat_urllib_parse, compat_urllib_parse_urlencode, @@ -2036,6 +2037,28 @@ def clean_html(html): return html.strip() +def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'): + """Try to reduce the filename by a specified reduction factor + + Arguments: + path -- the path name to reduce + reduction -- factor by which to reduce its filename component + ellipsis -- placeholder for removed text + + Returns path name with reduced filename, or None + """ + + fname = os.path.split(path) + fname = list(fname[:1] + os.path.splitext(fname[1])) + fname[1] = remove_end(fname[1], ellipsis) + flen = len(fname[1]) + if flen < min_length: + # give up + return None + fname[1] = compat_textwrap_shorten(fname[1], int(1 + reduction * flen), placeholder=ellipsis) + return os.path.join(fname[0], ''.join(fname[1:])) + + def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -2046,26 +2069,54 @@ def sanitize_open(filename, open_mode): It returns the tuple (stream, definitive_file_name). """ + def openfile(filename, open_mode): + stream = open(encodeFilename(filename), open_mode) + return (stream, filename) + try: if filename == '-': if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) - stream = open(encodeFilename(filename), open_mode) - return (stream, filename) + return openfile(filename, open_mode) except (IOError, OSError) as err: if err.errno in (errno.EACCES,): raise - # In case of error, try to remove win32 forbidden chars - alt_filename = sanitize_path(filename) - if alt_filename == filename: + if 'w' not in open_mode or '+' in open_mode: + # only mung filename when creating the file raise + + org_err = err + + # In case of error, try to remove win32 forbidden chars + if err.errno in (errno.EINVAL, ): + alt_filename = sanitize_path(filename) + if alt_filename != filename: + try: + return openfile(alt_filename, open_mode) + except (IOError, OSError) as new_err: + err = new_err else: - # An exception here should be caught in the caller - stream = open(encodeFilename(alt_filename), open_mode) - return (stream, alt_filename) + alt_filename = filename + + # Windows: an over-long file name can be detected by the CreateFile() + # API, and then get EINVAL, or by the filesystem, and then perhaps + # ENAMETOOLONG + # POSIX: ENAMETOOLONG in general + while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ): + alt_filename = reduce_filename(alt_filename) + if not alt_filename: + break + try: + return openfile(alt_filename, open_mode) + except (IOError, OSError) as new_err: + err = new_err + + # Reduction didn't help; give up and report what initially went wrong + # This exception should be caught in the caller + raise org_err def timeconvert(timestr):