mirror of
https://git.savannah.gnu.org/git/coreutils.git
synced 2025-09-10 07:59:52 +02:00
csplit: do not rely on undefined behavior in printf formats
* doc/coreutils.texi (csplit invocation): Say that %d and %i are aliases for %u. * src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants. (get_format_flags): Now take char const * and int * and return size_t. It now stores info about the flags instead of merely scanning them. Also, it handles '0' correctly. Drop support for the undocumented '+' and ' ' flags since the value is unsigned. Add support for the (undocumented) "'" flag. All uses changed. (get_format_width, get_format_prec): Remove. (check_format_conv_type): Renamed from get_format_conv_type, with a different signature. It now converts the format to one that is compatible with unsigned int, and checks flags. All uses changed. (max_out): Have snprintf compute the number of bytes needed rather than attempting to do it ourselves (which doesn't work portably with outlandish formats such as %4294967296d). (check_format_conv_type, main): Check for overflow in size calculations. Don't assume size_t fits in unsigned int. * tests/misc/csplit: Check for proper handling of flags, with %0#6.3x. Coreutils 8.6 mishandles this somewhat-weird example.
This commit is contained in:
committed by
Jim Meyering
parent
0339eb4598
commit
6568b173db
@@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one
|
||||
@code{printf(3)}-style conversion specification, possibly including
|
||||
format specification flags, a field width, a precision specifications,
|
||||
or all of these kinds of modifiers. The format letter must convert a
|
||||
binary integer argument to readable form; thus, only @samp{d}, @samp{i},
|
||||
binary unsigned integer argument to readable form. The format letters
|
||||
@samp{d} and @samp{i} are aliases for @samp{u}, and the
|
||||
@samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The
|
||||
entire @var{suffix} is given (with the current output file number) to
|
||||
@code{sprintf(3)} to form the file name suffixes for each of the
|
||||
|
||||
143
src/csplit.c
143
src/csplit.c
@@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
get_format_flags (char **format_ptr)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
|
||||
for (; **format_ptr; (*format_ptr)++)
|
||||
|
||||
/* Names for the printf format flags ' and #. These can be ORed together. */
|
||||
enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
|
||||
|
||||
/* Scan the printf format flags in FORMAT, storing info about the
|
||||
flags into *FLAGS_PTR. Return the number of flags found. */
|
||||
static size_t
|
||||
get_format_flags (char const *format, int *flags_ptr)
|
||||
{
|
||||
int flags = 0;
|
||||
|
||||
for (size_t count = 0; ; count++)
|
||||
{
|
||||
switch (**format_ptr)
|
||||
switch (format[count])
|
||||
{
|
||||
case '-':
|
||||
case '0':
|
||||
break;
|
||||
|
||||
case '+':
|
||||
case ' ':
|
||||
count |= 1;
|
||||
case '\'':
|
||||
flags |= FLAG_THOUSANDS;
|
||||
break;
|
||||
|
||||
case '#':
|
||||
count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
|
||||
flags |= FLAG_ALTERNATIVE;
|
||||
break;
|
||||
|
||||
default:
|
||||
*flags_ptr = flags;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static size_t
|
||||
get_format_width (char **format_ptr)
|
||||
{
|
||||
unsigned long int val = 0;
|
||||
|
||||
if (ISDIGIT (**format_ptr)
|
||||
&& (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
|
||||
|| SIZE_MAX < val))
|
||||
error (EXIT_FAILURE, 0, _("invalid format width"));
|
||||
|
||||
/* Allow for enough octal digits to represent the value of UINT_MAX,
|
||||
even if the field width is less than that. */
|
||||
return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
|
||||
}
|
||||
|
||||
static size_t
|
||||
get_format_prec (char **format_ptr)
|
||||
{
|
||||
if (**format_ptr != '.')
|
||||
return 0;
|
||||
(*format_ptr)++;
|
||||
|
||||
if (! ISDIGIT (**format_ptr))
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
unsigned long int val;
|
||||
if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
|
||||
|| SIZE_MAX < val)
|
||||
error (EXIT_FAILURE, 0, _("invalid format precision"));
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check that the printf format conversion specifier *FORMAT is valid
|
||||
and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
|
||||
since the format will be used with an unsigned value. */
|
||||
static void
|
||||
get_format_conv_type (char **format_ptr)
|
||||
check_format_conv_type (char *format, int flags)
|
||||
{
|
||||
unsigned char ch = *(*format_ptr)++;
|
||||
unsigned char ch = *format;
|
||||
int compatible_flags = FLAG_THOUSANDS;
|
||||
|
||||
switch (ch)
|
||||
{
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'o':
|
||||
*format = 'u';
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
case 'x':
|
||||
case 'X':
|
||||
compatible_flags = FLAG_ALTERNATIVE;
|
||||
break;
|
||||
|
||||
case 0:
|
||||
@@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr)
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("invalid conversion specifier in suffix: \\%.3o"), ch);
|
||||
}
|
||||
|
||||
if (flags & ~ compatible_flags)
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("invalid flags in conversion specification: %%%c%c"),
|
||||
(flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
|
||||
}
|
||||
|
||||
/* Return the maximum number of bytes that can be generated by
|
||||
applying FORMAT to an unsigned int value. If the format is
|
||||
invalid, diagnose the problem and exit. */
|
||||
static size_t
|
||||
max_out (char *format)
|
||||
{
|
||||
size_t out_count = 0;
|
||||
bool percent = false;
|
||||
|
||||
while (*format)
|
||||
{
|
||||
if (*format++ != '%')
|
||||
out_count++;
|
||||
else if (*format == '%')
|
||||
{
|
||||
format++;
|
||||
out_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (percent)
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("too many %% conversion specifications in suffix"));
|
||||
percent = true;
|
||||
out_count += get_format_flags (&format);
|
||||
{
|
||||
size_t width = get_format_width (&format);
|
||||
size_t prec = get_format_prec (&format);
|
||||
|
||||
out_count += MAX (width, prec);
|
||||
}
|
||||
get_format_conv_type (&format);
|
||||
}
|
||||
}
|
||||
for (char *f = format; *f; f++)
|
||||
if (*f == '%' && *++f != '%')
|
||||
{
|
||||
if (percent)
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("too many %% conversion specifications in suffix"));
|
||||
percent = true;
|
||||
unsigned int flags;
|
||||
f += get_format_flags (f, &flags);
|
||||
while (ISDIGIT (*f))
|
||||
f++;
|
||||
if (*f == '.')
|
||||
while (ISDIGIT (*++f))
|
||||
continue;
|
||||
check_format_conv_type (f, flags);
|
||||
}
|
||||
|
||||
if (! percent)
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("missing %% conversion specification in suffix"));
|
||||
|
||||
return out_count;
|
||||
int maxlen = snprintf (NULL, 0, format, UINT_MAX);
|
||||
if (! (0 <= maxlen && maxlen <= SIZE_MAX))
|
||||
xalloc_die ();
|
||||
return maxlen;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1349,7 +1333,7 @@ main (int argc, char **argv)
|
||||
|
||||
case 'n':
|
||||
if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
|
||||
|| val > INT_MAX)
|
||||
|| MIN (INT_MAX, SIZE_MAX) < val)
|
||||
error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
|
||||
digits = val;
|
||||
break;
|
||||
@@ -1380,11 +1364,14 @@ main (int argc, char **argv)
|
||||
usage (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
unsigned int max_digit_string_len
|
||||
size_t prefix_len = strlen (prefix);
|
||||
size_t max_digit_string_len
|
||||
= (suffix
|
||||
? max_out (suffix)
|
||||
: MAX (INT_STRLEN_BOUND (unsigned int), digits));
|
||||
filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1);
|
||||
if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
|
||||
xalloc_die ();
|
||||
filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
|
||||
|
||||
set_input_file (argv[optind++]);
|
||||
|
||||
|
||||
@@ -67,6 +67,21 @@ EOF
|
||||
compare err experr || fail=1
|
||||
rm -f in out exp err experr
|
||||
|
||||
# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format
|
||||
# up through coreutils 8.6.
|
||||
echo > in
|
||||
csplit -b '%0#6.3x' in 1 > out 2> err || fail=1
|
||||
cat <<EOF > exp
|
||||
0
|
||||
1
|
||||
EOF
|
||||
compare out exp || fail=1
|
||||
touch experr
|
||||
compare err experr || fail=1
|
||||
compare 'xx 000' experr || fail=1
|
||||
compare 'xx 0x001' in || fail=1
|
||||
rm -f in out exp err experr xx*
|
||||
|
||||
# make sure `csplit FILE 0' fails.
|
||||
echo > in
|
||||
csplit in 0 > out 2> err && fail=1
|
||||
|
||||
Reference in New Issue
Block a user