From 4af599143aa72f3dc5f5607b702f22a731d1cf11 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Sun, 19 Sep 2021 05:06:21 +0100
Subject: [PATCH] Shorten proposed file name on create if too long

---
 test/test_compat.py  |  4 +++
 test/test_utils.py   | 26 +++++++++++++++++
 youtube_dl/compat.py | 24 ++++++++++++++++
 youtube_dl/utils.py  | 67 ++++++++++++++++++++++++++++++++++++++------
 4 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 86ff389fd..574495e9f 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -19,6 +19,7 @@ from youtube_dl.compat import (
     compat_shlex_split,
     compat_str,
     compat_struct_unpack,
+    compat_textwrap_shorten,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlencode,
@@ -121,6 +122,9 @@ class TestCompat(unittest.TestCase):
     def test_struct_unpack(self):
         self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
 
+    def test_compat_textwrap_shorten(self):
+        self.assertEqual(compat_textwrap_shorten('Hello  world!', width=11), 'Hello [...]')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index 259c4763e..f9bb45776 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -63,6 +63,7 @@ from youtube_dl.utils import (
     pkcs1pad,
     read_batch_urls,
     sanitize_filename,
+    sanitize_open,
     sanitize_path,
     sanitize_url,
     expand_path,
@@ -118,6 +119,16 @@ from youtube_dl.compat import (
 
 
 class TestUtil(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.tearDown()
+
+    @classmethod
+    def tearDown(cls):
+        for tf in os.listdir('.'):
+            if os.path.splitext(tf)[1] == '.test':
+                os.remove(tf)
+
     def test_timeconvert(self):
         self.assertTrue(timeconvert('') is None)
         self.assertTrue(timeconvert('bougrg') is None)
@@ -231,6 +242,21 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_sanitize_open(self):
+        long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)])
+        if sys.platform == 'win32':
+            result = sanitize_open('.\\' + long_name + '.test', open_mode='w')
+            result[0].close()
+            self.assertEqual(
+                result[1],
+                "I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")
+        else:
+            result = sanitize_open('./' + long_name + '.test', open_mode='w')
+            result[0].close()
+            self.assertEqual(
+                result[1],
+                "./I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")
+
     def test_sanitize_url(self):
         self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
         self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 9e45c454b..6511f0b29 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2997,6 +2997,29 @@ else:
     def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
+# Compat version of textwrap.shorten(), not in Py2 textwrap
+# Extractors can use this to précis a long metadata field, eg
+# to make a title from a description
+try:
+    from textwrap import shorten as compat_textwrap_shorten
+except ImportError:  # Python 2
+    def compat_textwrap_shorten(
+            text, width, fix_sentence_endings=False, break_long_words=True,
+            break_on_hyphens=True, placeholder=' [...]'):
+        import textwrap
+        try_text = textwrap.wrap(
+            text, width,
+            fix_sentence_endings=fix_sentence_endings,
+            break_long_words=break_long_words,
+            break_on_hyphens=break_on_hyphens)
+        if len(try_text) == 1:
+            return try_text[0]
+        return textwrap.wrap(
+            text, width - len(placeholder),
+            fix_sentence_endings=fix_sentence_endings,
+            break_long_words=break_long_words,
+            break_on_hyphens=break_on_hyphens)[0] + placeholder
+
 
 __all__ = [
     'compat_HTMLParseError',
@@ -3040,6 +3063,7 @@ __all__ = [
     'compat_struct_pack',
     'compat_struct_unpack',
     'compat_subprocess_get_DEVNULL',
+    'compat_textwrap_shorten',
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e722eed58..ad5f29146 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -57,6 +57,7 @@ from .compat import (
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
+    compat_textwrap_shorten,
     compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_parse_urlencode,
@@ -2036,6 +2037,28 @@ def clean_html(html):
     return html.strip()
 
 
+def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'):
+    """Try to reduce the filename by a specified reduction factor
+
+    Arguments:
+    path -- the path name to reduce
+    reduction -- factor by which to reduce its filename component
+    ellipsis -- placeholder for removed text
+
+    Returns path name with reduced filename, or None
+    """
+
+    fname = os.path.split(path)
+    fname = list(fname[:1] + os.path.splitext(fname[1]))
+    fname[1] = remove_end(fname[1], ellipsis)
+    flen = len(fname[1])
+    if flen < min_length:
+        # give up
+        return None
+    fname[1] = compat_textwrap_shorten(fname[1], int(1 + reduction * flen), placeholder=ellipsis)
+    return os.path.join(fname[0], ''.join(fname[1:]))
+
+
 def sanitize_open(filename, open_mode):
     """Try to open the given filename, and slightly tweak it if this fails.
 
@@ -2046,26 +2069,54 @@ def sanitize_open(filename, open_mode):
 
     It returns the tuple (stream, definitive_file_name).
     """
+    def openfile(filename, open_mode):
+        stream = open(encodeFilename(filename), open_mode)
+        return (stream, filename)
+
     try:
         if filename == '-':
             if sys.platform == 'win32':
                 import msvcrt
                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
-        stream = open(encodeFilename(filename), open_mode)
-        return (stream, filename)
+        return openfile(filename, open_mode)
     except (IOError, OSError) as err:
         if err.errno in (errno.EACCES,):
             raise
 
-        # In case of error, try to remove win32 forbidden chars
-        alt_filename = sanitize_path(filename)
-        if alt_filename == filename:
+        if 'w' not in open_mode or '+' in open_mode:
+            # only mung filename when creating the file
             raise
+
+        org_err = err
+
+        # In case of error, try to remove win32 forbidden chars
+        if err.errno in (errno.EINVAL, ):
+            alt_filename = sanitize_path(filename)
+            if alt_filename != filename:
+                try:
+                    return openfile(alt_filename, open_mode)
+                except (IOError, OSError) as new_err:
+                    err = new_err
         else:
-            # An exception here should be caught in the caller
-            stream = open(encodeFilename(alt_filename), open_mode)
-            return (stream, alt_filename)
+            alt_filename = filename
+
+        # Windows: an over-long file name can be detected by the CreateFile()
+        # API, and then get EINVAL, or by the filesystem, and then perhaps
+        # ENAMETOOLONG
+        # POSIX: ENAMETOOLONG in general
+        while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ):
+            alt_filename = reduce_filename(alt_filename)
+            if not alt_filename:
+                break
+            try:
+                return openfile(alt_filename, open_mode)
+            except (IOError, OSError) as new_err:
+                err = new_err
+
+        # Reduction didn't help; give up and report what initially went wrong
+        # This exception should be caught in the caller
+        raise org_err
 
 
 def timeconvert(timestr):