mirror of
https://git.savannah.gnu.org/git/coreutils.git
synced 2025-09-10 07:59:52 +02:00
fold: fix handling of invalid multi-byte characters
* src/fold.c (fold_file): Continue the loop when we have buffered bytes but nothing left to read from the file. (adjust_column): Don't assume that the character is printable. * tests/fold/fold-characters.sh: Add a new test case. (bad_unicode): New function.
This commit is contained in:
17
src/fold.c
17
src/fold.c
@@ -115,10 +115,16 @@ adjust_column (size_t column, mcel_t g)
|
||||
column = 0;
|
||||
else if (g.ch == '\t')
|
||||
column += TAB_WIDTH - column % TAB_WIDTH;
|
||||
else /* if (c32isprint (g.ch)) */
|
||||
else
|
||||
{
|
||||
last_character_width = (counting_mode == COUNT_CHARACTERS
|
||||
? 1 : c32width (g.ch));
|
||||
if (counting_mode == COUNT_CHARACTERS)
|
||||
last_character_width = 1;
|
||||
else
|
||||
{
|
||||
int width = c32width (g.ch);
|
||||
/* Default to a width of 1 if there is an invalid character. */
|
||||
last_character_width = width < 0 ? 1 : width;
|
||||
}
|
||||
column += last_character_width;
|
||||
}
|
||||
}
|
||||
@@ -160,7 +166,8 @@ fold_file (char const *filename, size_t width)
|
||||
fadvise (istream, FADVISE_SEQUENTIAL);
|
||||
|
||||
while (0 < (length_in = fread (line_in + offset_in, 1,
|
||||
sizeof line_in - offset_in, istream)))
|
||||
sizeof line_in - offset_in, istream))
|
||||
|| 0 < offset_in)
|
||||
{
|
||||
char *p = line_in;
|
||||
char *lim = p + length_in + offset_in;
|
||||
@@ -172,7 +179,7 @@ fold_file (char const *filename, size_t width)
|
||||
{
|
||||
/* Replace the character with the byte if it cannot be a
|
||||
truncated multibyte sequence. */
|
||||
if (!(lim - p <= MCEL_LEN_MAX))
|
||||
if (!(lim - p <= MCEL_LEN_MAX) || length_in == 0)
|
||||
g.ch = p[0];
|
||||
else
|
||||
{
|
||||
|
||||
@@ -80,6 +80,17 @@ env printf '\naaaa\n' >> exp3 || framework_failure_
|
||||
fold --characters input3 | tail -n 4 > out3 || fail=1
|
||||
compare exp3 out3 || fail=1
|
||||
|
||||
# Sequence derived from <https://datatracker.ietf.org/doc/rfc9839>.
|
||||
bad_unicode ()
|
||||
{
|
||||
# invalid UTF8|unpaired surrogate|NUL|C1 control|noncharacter
|
||||
env printf '\xC3|\xED\xBA\xAD|\u0000|\u0089|\xED\xA6\xBF\xED\xBF\xBF\n'
|
||||
}
|
||||
bad_unicode > /dev/null || framework_failure_
|
||||
test $({ bad_unicode | fold; bad_unicode; } | uniq | wc -l) = 1 || fail=1
|
||||
# Check bad character at EOF
|
||||
test $(env printf '\xC3' | fold | wc -c) = 1 || fail=1
|
||||
|
||||
# Ensure bounded memory operation
|
||||
vm=$(get_min_ulimit_v_ fold /dev/null) && {
|
||||
yes | tr -d '\n' | (ulimit -v $(($vm+8000)) && fold 2>err) | head || fail=1
|
||||
|
||||
Reference in New Issue
Block a user