Avoid tr case-conversion failure in some locales.

* src/tr.c (skip_construct): New function.
(main): When processing a pair of case-converting classes, don't
iterate through the elements of each [:upper:] or [:lower:] class.
Reported by Gerald Pfeifer in
<http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/12218>.
* tests/tr/Test.pm [tolower-F]: New test for the above fix.
[upcase-xtra, dncase-xtra]: New tests, for a related code path.
* NEWS: Mention the tr bug fix.
This commit is contained in:
Jim Meyering
2008-01-05 23:55:01 +01:00
parent df8c2bcfe0
commit 6efd10462d
5 changed files with 54 additions and 1 deletions

View File

@@ -1,3 +1,15 @@
2008-01-05 Jim Meyering <meyering@redhat.com>
Avoid tr case-conversion failure in some locales.
* src/tr.c (skip_construct): New function.
(main): When processing a pair of case-converting classes, don't
iterate through the elements of each [:upper:] or [:lower:] class.
Reported by Gerald Pfeifer in
<http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/12218>.
* tests/tr/Test.pm [tolower-F]: New test for the above fix.
[upcase-xtra, dncase-xtra]: New tests, for a related code path.
* NEWS: Mention the tr bug fix.
2008-01-02 Jim Meyering <jim@meyering.net>
* .gitignore: Ignore lzma-compressed files, too.

6
NEWS
View File

@@ -2,6 +2,12 @@ GNU coreutils NEWS -*- outline -*-
* Noteworthy changes in release 6.? (????-??-??) [?]
** Bug fixes
tr's case conversion would fail in a locale with differing numbers
of lower case and upper case characters. E.g., this would fail:
env LC_CTYPE=en_US.iso88591 tr '[:upper:]' '[:lower:]'
[bug introduced in coreutils-6.9.90]
* Noteworthy changes in release 6.9.91 (2007-12-15) [beta]

1
THANKS
View File

@@ -179,6 +179,7 @@ Geoff Collyer geoff at collyer.net
Geoff Kuenning geoff@cs.hmc.edu
Geoff Odhner geoff@franklin.com
Geoff Whale geoffw@cse.unsw.EDU.AU
Gerald Pfeifer gerald@pfeifer.com
Gerhard Poul gpoul@gnu.org
Germano Leichsenring germano@jedi.cs.kobe-u.ac.jp
Göran Uddeborg goeran@uddeborg.pp.se

View File

@@ -1,5 +1,5 @@
/* tr -- a filter to translate characters
Copyright (C) 91, 1995-2007 Free Software Foundation, Inc.
Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1019,6 +1019,15 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
return true;
}
/* Advance past the current construct.
S->tail must be non-NULL. */
static void
skip_construct (struct Spec_list *s)
{
s->tail = s->tail->next;
s->state = NEW_ELEMENT;
}
/* Given a Spec_list S (with its saved state implicit in the values
of its members `tail' and `state'), return the next single character
in the expansion of S's constructs. If the last character of S was
@@ -1809,6 +1818,7 @@ main (int argc, char **argv)
{
int c1, c2;
int i;
bool case_convert = false;
enum Upper_Lower_class class_s1;
enum Upper_Lower_class class_s2;
@@ -1818,6 +1828,16 @@ main (int argc, char **argv)
s2->state = BEGIN_STATE;
for (;;)
{
/* When the previous pair identified case-converting classes,
advance S1 and S2 so that each points to the following
construct. */
if (case_convert)
{
skip_construct (s1);
skip_construct (s2);
case_convert = false;
}
c1 = get_next (s1, &class_s1);
c2 = get_next (s2, &class_s2);
@@ -1831,12 +1851,14 @@ main (int argc, char **argv)
if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
{
case_convert = true;
for (i = 0; i < N_CHARS; i++)
if (islower (i))
xlate[i] = toupper (i);
}
else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
{
case_convert = true;
for (i = 0; i < N_CHARS; i++)
if (isupper (i))
xlate[i] = tolower (i);

View File

@@ -139,8 +139,20 @@ my @tv = (
# Up to coreutils-6.9, tr rejected an unmatched [:lower:] or [:upper:] in SET1.
['s1-lower', q|'[:lower:]' '[.*]'|, '#$%123abcABC', '#$%123...ABC', 0],
['s1-upper', q|'[:upper:]' '[.*]'|, '#$%123abcABC', '#$%123abc...', 0],
# Up to coreutils-6.9.91, this would fail with the diagnostic:
# tr: misaligned [:upper:] and/or [:lower:] construct
# with LC_CTYPE=en_US.iso88591.
['tolower-F',q|'[:upper:]' '[:lower:]'|, 'A', 'a', 0],
# When doing a case-converting translation with something after the
# [:upper:] and [:lower:] elements, ensure that tr honors the following byte.
['upcase-xtra',q|'[:lower:].' '[:upper:]x'|, 'abc.', 'ABCx', 0],
['dncase-xtra',q|'[:upper:].' '[:lower:]x'|, 'ABC.', 'abcx', 0],
);
$Test::env{'tolower-F'} = ['LC_CTYPE=en_US.iso88591'];
sub test_vector
{
my $t;