mirror of
https://git.savannah.gnu.org/git/coreutils.git
synced 2025-09-10 07:59:52 +02:00
fold: don't truncate multibyte characters at the end of the buffer
* src/fold.c (fold_file): Replace invalid characters with the original byte read. Copy multibyte sequences that may not yet be read to the start of the buffer before reading more bytes. * tests/fold/fold-characters.sh: Add a test case.
This commit is contained in:
24
src/fold.c
24
src/fold.c
@@ -139,6 +139,7 @@ fold_file (char const *filename, size_t width)
|
||||
idx_t offset_out = 0; /* Index in 'line_out' for next char. */
|
||||
static char line_out[IO_BUFSIZE];
|
||||
static char line_in[IO_BUFSIZE];
|
||||
static size_t offset_in = 0;
|
||||
static size_t length_in = 0;
|
||||
int saved_errno;
|
||||
|
||||
@@ -158,14 +159,30 @@ fold_file (char const *filename, size_t width)
|
||||
|
||||
fadvise (istream, FADVISE_SEQUENTIAL);
|
||||
|
||||
while (0 < (length_in = fread (line_in, 1, sizeof line_in, istream)))
|
||||
while (0 < (length_in = fread (line_in + offset_in, 1,
|
||||
sizeof line_in - offset_in, istream)))
|
||||
{
|
||||
char *p = line_in;
|
||||
char *lim = p + length_in;
|
||||
char *lim = p + length_in + offset_in;
|
||||
mcel_t g;
|
||||
for (; p < lim; p += g.len)
|
||||
{
|
||||
g = mcel_scan (p, lim);
|
||||
if (g.err)
|
||||
{
|
||||
/* Replace the character with the byte if it cannot be a
|
||||
truncated multibyte sequence. */
|
||||
if (!(lim - p <= MCEL_LEN_MAX))
|
||||
g.ch = p[0];
|
||||
else
|
||||
{
|
||||
/* It may be a truncated multibyte sequence. Move it to the
|
||||
front of the input buffer. */
|
||||
memmove (line_in, p, lim - p);
|
||||
offset_in = lim - p;
|
||||
goto next_line;
|
||||
}
|
||||
}
|
||||
if (g.ch == '\n')
|
||||
{
|
||||
memcpy (line_out + offset_out, p, g.len);
|
||||
@@ -241,6 +258,9 @@ fold_file (char const *filename, size_t width)
|
||||
}
|
||||
if (feof (istream))
|
||||
break;
|
||||
/* We read a full buffer of complete characters. */
|
||||
offset_in = 0;
|
||||
next_line:
|
||||
}
|
||||
|
||||
saved_errno = errno;
|
||||
|
||||
@@ -58,6 +58,25 @@ compare column-exp2 column-out2 || fail=1
|
||||
fold --characters -w 10 input2 > character-out2 || fail=1
|
||||
compare character-exp2 character-out2 || fail=1
|
||||
|
||||
# Test a Unicode character on the edge of the input buffer.
|
||||
# Keep in sync with IO_BUFSIZE - 1.
|
||||
yes a | head -n 262143 | tr -d '\n' > input3 || framework_failure_
|
||||
env printf '\uB250' >> input3 || framework_failure_
|
||||
yes a | head -n 100 | tr -d '\n' >> input3 || framework_failure_
|
||||
env printf '\n' >> input3 || framework_failure_
|
||||
|
||||
yes a | head -n 80 | tr -d '\n' > exp3 || framework_failure_
|
||||
env printf '\n' >> exp3 || framework_failure_
|
||||
yes a | head -n 63 | tr -d '\n' >> exp3 || framework_failure_
|
||||
env printf '\uB250' >> exp3 || framework_failure_
|
||||
yes a | head -n 16 | tr -d '\n' >> exp3 || framework_failure_
|
||||
env printf '\n' >> exp3 || framework_failure_
|
||||
yes a | head -n 80 | tr -d '\n' >> exp3 || framework_failure_
|
||||
env printf '\naaaa\n' >> exp3 || framework_failure_
|
||||
|
||||
fold --characters input3 | tail -n 4 > out3 || fail=1
|
||||
compare exp3 out3 || fail=1
|
||||
|
||||
# Ensure bounded memory operation
|
||||
vm=$(get_min_ulimit_v_ fold /dev/null) && {
|
||||
yes | tr -d '\n' | (ulimit -v $(($vm+8000)) && fold 2>err) | head || fail=1
|
||||
|
||||
Reference in New Issue
Block a user