Shorten proposed file name on create if too long

This commit is contained in:
df 2021-09-19 05:06:21 +01:00
parent a803582717
commit 4af599143a
4 changed files with 113 additions and 8 deletions

View File

@ -19,6 +19,7 @@ from youtube_dl.compat import (
compat_shlex_split,
compat_str,
compat_struct_unpack,
compat_textwrap_shorten,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
@ -121,6 +122,9 @@ class TestCompat(unittest.TestCase):
def test_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
def test_compat_textwrap_shorten(self):
self.assertEqual(compat_textwrap_shorten('Hello world!', width=11), 'Hello [...]')
if __name__ == '__main__':
unittest.main()

View File

@ -63,6 +63,7 @@ from youtube_dl.utils import (
pkcs1pad,
read_batch_urls,
sanitize_filename,
sanitize_open,
sanitize_path,
sanitize_url,
expand_path,
@ -118,6 +119,16 @@ from youtube_dl.compat import (
class TestUtil(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.tearDown()
@classmethod
def tearDown(cls):
for tf in os.listdir('.'):
if os.path.splitext(tf)[1] == '.test':
os.remove(tf)
def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
@ -231,6 +242,21 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_sanitize_open(self):
long_name = " I'm a lumberjack ".join(['I sleep all night and I work all day %d' % n for n in range(50)])
if sys.platform == 'win32':
result = sanitize_open('.\\' + long_name + '.test', open_mode='w')
result[0].close()
self.assertEqual(
result[1],
"I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")
else:
result = sanitize_open('./' + long_name + '.test', open_mode='w')
result[0].close()
self.assertEqual(
result[1],
"./I sleep all night and I work all day 0 I'm a lumberjack I sleep all night and I work all day 1 I'm a lumberjack I sleep all night and I work all day 2 I'm a lumberjack[...].test")
def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')

View File

@ -2997,6 +2997,29 @@ else:
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
# Compat version of textwrap.shorten(), not in Py2 textwrap
# Extractors can use this to précis a long metadata field, eg
# to make a title from a description
try:
from textwrap import shorten as compat_textwrap_shorten
except ImportError: # Python 2
def compat_textwrap_shorten(
text, width, fix_sentence_endings=False, break_long_words=True,
break_on_hyphens=True, placeholder=' [...]'):
import textwrap
try_text = textwrap.wrap(
text, width,
fix_sentence_endings=fix_sentence_endings,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens)
if len(try_text) == 1:
return try_text[0]
return textwrap.wrap(
text, width - len(placeholder),
fix_sentence_endings=fix_sentence_endings,
break_long_words=break_long_words,
break_on_hyphens=break_on_hyphens)[0] + placeholder
__all__ = [
'compat_HTMLParseError',
@ -3040,6 +3063,7 @@ __all__ = [
'compat_struct_pack',
'compat_struct_unpack',
'compat_subprocess_get_DEVNULL',
'compat_textwrap_shorten',
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',

View File

@ -57,6 +57,7 @@ from .compat import (
compat_str,
compat_struct_pack,
compat_struct_unpack,
compat_textwrap_shorten,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlencode,
@ -2036,6 +2037,28 @@ def clean_html(html):
return html.strip()
def reduce_filename(path, reduction=0.5, min_length=20, ellipsis='[...]'):
"""Try to reduce the filename by a specified reduction factor
Arguments:
path -- the path name to reduce
reduction -- factor by which to reduce its filename component
ellipsis -- placeholder for removed text
Returns path name with reduced filename, or None
"""
fname = os.path.split(path)
fname = list(fname[:1] + os.path.splitext(fname[1]))
fname[1] = remove_end(fname[1], ellipsis)
flen = len(fname[1])
if flen < min_length:
# give up
return None
fname[1] = compat_textwrap_shorten(fname[1], int(1 + reduction * flen), placeholder=ellipsis)
return os.path.join(fname[0], ''.join(fname[1:]))
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
@ -2046,26 +2069,54 @@ def sanitize_open(filename, open_mode):
It returns the tuple (stream, definitive_file_name).
"""
def openfile(filename, open_mode):
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
try:
if filename == '-':
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
return openfile(filename, open_mode)
except (IOError, OSError) as err:
if err.errno in (errno.EACCES,):
raise
# In case of error, try to remove win32 forbidden chars
alt_filename = sanitize_path(filename)
if alt_filename == filename:
if 'w' not in open_mode or '+' in open_mode:
# only mung filename when creating the file
raise
org_err = err
# In case of error, try to remove win32 forbidden chars
if err.errno in (errno.EINVAL, ):
alt_filename = sanitize_path(filename)
if alt_filename != filename:
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err
else:
# An exception here should be caught in the caller
stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
alt_filename = filename
# Windows: an over-long file name can be detected by the CreateFile()
# API, and then get EINVAL, or by the filesystem, and then perhaps
# ENAMETOOLONG
# POSIX: ENAMETOOLONG in general
while err.errno in (errno.ENAMETOOLONG, errno.EINVAL, ):
alt_filename = reduce_filename(alt_filename)
if not alt_filename:
break
try:
return openfile(alt_filename, open_mode)
except (IOError, OSError) as new_err:
err = new_err
# Reduction didn't help; give up and report what initially went wrong
# This exception should be caught in the caller
raise org_err
def timeconvert(timestr):