csplit: do not rely on undefined behavior in printf formats

* doc/coreutils.texi (csplit invocation): Say that %d and %i are
aliases for %u.
* src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants.
(get_format_flags): Now take char const * and int * and return
size_t.  It now stores info about the flags instead of merely
scanning them.  Also, it handles '0' correctly.  Drop support for
the undocumented '+' and ' ' flags since the value is unsigned.
Add support for the (undocumented) "'" flag.  All uses changed.
(get_format_width, get_format_prec): Remove.
(check_format_conv_type): Renamed from get_format_conv_type, with
a different signature.  It now converts the format to one that is
compatible with unsigned int, and checks flags.  All uses changed.
(max_out): Have snprintf compute the number of bytes needed rather
than attempting to do it ourselves (which doesn't work portably
with outlandish formats such as %4294967296d).
(check_format_conv_type, main): Check for overflow in size
calculations.  Don't assume size_t fits in unsigned int.
* tests/misc/csplit: Check for proper handling of flags, with
%0#6.3x.  Coreutils 8.6 mishandles this somewhat-weird example.
This commit is contained in:
Paul Eggert
2010-11-10 20:34:52 -08:00
committed by Jim Meyering
parent 0339eb4598
commit 6568b173db
3 changed files with 82 additions and 79 deletions

View File

@@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one
@code{printf(3)}-style conversion specification, possibly including
format specification flags, a field width, a precision specifications,
or all of these kinds of modifiers. The format letter must convert a
binary integer argument to readable form; thus, only @samp{d}, @samp{i},
binary unsigned integer argument to readable form. The format letters
@samp{d} and @samp{i} are aliases for @samp{u}, and the
@samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The
entire @var{suffix} is given (with the current output file number) to
@code{sprintf(3)} to form the file name suffixes for each of the

View File

@@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv)
}
}
static unsigned int
get_format_flags (char **format_ptr)
{
unsigned int count = 0;
for (; **format_ptr; (*format_ptr)++)
/* Names for the printf format flags ' and #. These can be ORed together. */
enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
/* Scan the printf format flags in FORMAT, storing info about the
flags into *FLAGS_PTR. Return the number of flags found. */
static size_t
get_format_flags (char const *format, int *flags_ptr)
{
int flags = 0;
for (size_t count = 0; ; count++)
{
switch (**format_ptr)
switch (format[count])
{
case '-':
case '0':
break;
case '+':
case ' ':
count |= 1;
case '\'':
flags |= FLAG_THOUSANDS;
break;
case '#':
count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
flags |= FLAG_ALTERNATIVE;
break;
default:
*flags_ptr = flags;
return count;
}
}
return count;
}
static size_t
get_format_width (char **format_ptr)
{
unsigned long int val = 0;
if (ISDIGIT (**format_ptr)
&& (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
|| SIZE_MAX < val))
error (EXIT_FAILURE, 0, _("invalid format width"));
/* Allow for enough octal digits to represent the value of UINT_MAX,
even if the field width is less than that. */
return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
}
static size_t
get_format_prec (char **format_ptr)
{
if (**format_ptr != '.')
return 0;
(*format_ptr)++;
if (! ISDIGIT (**format_ptr))
return 0;
else
{
unsigned long int val;
if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
|| SIZE_MAX < val)
error (EXIT_FAILURE, 0, _("invalid format precision"));
return val;
}
}
/* Check that the printf format conversion specifier *FORMAT is valid
and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
since the format will be used with an unsigned value. */
static void
get_format_conv_type (char **format_ptr)
check_format_conv_type (char *format, int flags)
{
unsigned char ch = *(*format_ptr)++;
unsigned char ch = *format;
int compatible_flags = FLAG_THOUSANDS;
switch (ch)
{
case 'd':
case 'i':
case 'o':
*format = 'u';
break;
case 'u':
break;
case 'o':
case 'x':
case 'X':
compatible_flags = FLAG_ALTERNATIVE;
break;
case 0:
@@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr)
error (EXIT_FAILURE, 0,
_("invalid conversion specifier in suffix: \\%.3o"), ch);
}
if (flags & ~ compatible_flags)
error (EXIT_FAILURE, 0,
_("invalid flags in conversion specification: %%%c%c"),
(flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
}
/* Return the maximum number of bytes that can be generated by
applying FORMAT to an unsigned int value. If the format is
invalid, diagnose the problem and exit. */
static size_t
max_out (char *format)
{
size_t out_count = 0;
bool percent = false;
while (*format)
{
if (*format++ != '%')
out_count++;
else if (*format == '%')
{
format++;
out_count++;
}
else
{
if (percent)
error (EXIT_FAILURE, 0,
_("too many %% conversion specifications in suffix"));
percent = true;
out_count += get_format_flags (&format);
{
size_t width = get_format_width (&format);
size_t prec = get_format_prec (&format);
out_count += MAX (width, prec);
}
get_format_conv_type (&format);
}
}
for (char *f = format; *f; f++)
if (*f == '%' && *++f != '%')
{
if (percent)
error (EXIT_FAILURE, 0,
_("too many %% conversion specifications in suffix"));
percent = true;
unsigned int flags;
f += get_format_flags (f, &flags);
while (ISDIGIT (*f))
f++;
if (*f == '.')
while (ISDIGIT (*++f))
continue;
check_format_conv_type (f, flags);
}
if (! percent)
error (EXIT_FAILURE, 0,
_("missing %% conversion specification in suffix"));
return out_count;
int maxlen = snprintf (NULL, 0, format, UINT_MAX);
if (! (0 <= maxlen && maxlen <= SIZE_MAX))
xalloc_die ();
return maxlen;
}
int
@@ -1349,7 +1333,7 @@ main (int argc, char **argv)
case 'n':
if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
|| val > INT_MAX)
|| MIN (INT_MAX, SIZE_MAX) < val)
error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
digits = val;
break;
@@ -1380,11 +1364,14 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
unsigned int max_digit_string_len
size_t prefix_len = strlen (prefix);
size_t max_digit_string_len
= (suffix
? max_out (suffix)
: MAX (INT_STRLEN_BOUND (unsigned int), digits));
filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1);
if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
xalloc_die ();
filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
set_input_file (argv[optind++]);

View File

@@ -67,6 +67,21 @@ EOF
compare err experr || fail=1
rm -f in out exp err experr
# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format
# up through coreutils 8.6.
echo > in
csplit -b '%0#6.3x' in 1 > out 2> err || fail=1
cat <<EOF > exp
0
1
EOF
compare out exp || fail=1
touch experr
compare err experr || fail=1
compare 'xx 000' experr || fail=1
compare 'xx 0x001' in || fail=1
rm -f in out exp err experr xx*
# make sure `csplit FILE 0' fails.
echo > in
csplit in 0 > out 2> err && fail=1