mirror of
https://git.savannah.gnu.org/git/coreutils.git
synced 2025-09-10 07:59:52 +02:00
du: tune, and fix some -L bugs with dangling or cyclic symlinks
* src/du.c (process_file): Avoid recalculation of hashes and of file-exclusion for directories. Do not descend into the same directory more than once, unless -l is given; this is faster. Calculate stat buffer lazily, since it need not be computed at all for excluded files. Count space if FTS_ERR, since stat buffer is always valid then. No need for 'print' local variable. (main): Use FTS_NOSTAT. Use FTS_TIGHT_CYCLE_CHECK only when not hashing everything, since process_file finds cycles on its own when hashing everything. * tests/du/deref: Add test cases for -L bugs.
This commit is contained in:
committed by
Paul Eggert
parent
9a3c600f11
commit
77428214f1
146
src/du.c
146
src/du.c
@@ -392,7 +392,7 @@ print_size (const struct duinfo *pdui, const char *string)
|
||||
static bool
|
||||
process_file (FTS *fts, FTSENT *ent)
|
||||
{
|
||||
bool ok;
|
||||
bool ok = true;
|
||||
struct duinfo dui;
|
||||
struct duinfo dui_to_print;
|
||||
size_t level;
|
||||
@@ -407,78 +407,85 @@ process_file (FTS *fts, FTSENT *ent)
|
||||
The sum of the sizes of all entries in the hierarchy at or below the
|
||||
directory at the specified level. */
|
||||
static struct dulevel *dulvl;
|
||||
bool print = true;
|
||||
|
||||
const char *file = ent->fts_path;
|
||||
const struct stat *sb = ent->fts_statp;
|
||||
bool skip;
|
||||
int info = ent->fts_info;
|
||||
|
||||
/* If necessary, set FTS_SKIP before returning. */
|
||||
skip = excluded_file_name (exclude, file);
|
||||
if (skip)
|
||||
fts_set (fts, ent, FTS_SKIP);
|
||||
|
||||
switch (ent->fts_info)
|
||||
if (info == FTS_DNR)
|
||||
{
|
||||
case FTS_NS:
|
||||
error (0, ent->fts_errno, _("cannot access %s"), quote (file));
|
||||
return false;
|
||||
|
||||
case FTS_ERR:
|
||||
/* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */
|
||||
error (0, ent->fts_errno, "%s", quote (file));
|
||||
return false;
|
||||
|
||||
case FTS_DNR:
|
||||
/* Don't return just yet, since although the directory is not readable,
|
||||
we were able to stat it, so we do have a size. */
|
||||
/* An error occurred, but the size is known, so count it. */
|
||||
error (0, ent->fts_errno, _("cannot read directory %s"), quote (file));
|
||||
ok = false;
|
||||
break;
|
||||
|
||||
case FTS_DC: /* directory that causes cycles */
|
||||
if (cycle_warning_required (fts, ent))
|
||||
}
|
||||
else if (info != FTS_DP)
|
||||
{
|
||||
bool excluded = excluded_file_name (exclude, file);
|
||||
if (! excluded)
|
||||
{
|
||||
emit_cycle_warning (file);
|
||||
return false;
|
||||
/* Make the stat buffer *SB valid, or fail noisily. */
|
||||
|
||||
if (info == FTS_NSOK)
|
||||
{
|
||||
fts_set (fts, ent, FTS_AGAIN);
|
||||
FTSENT const *e = fts_read (fts);
|
||||
assert (e == ent);
|
||||
info = ent->fts_info;
|
||||
}
|
||||
|
||||
if (info == FTS_NS || info == FTS_SLNONE)
|
||||
{
|
||||
error (0, ent->fts_errno, _("cannot access %s"), quote (file));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
ok = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
ok = true;
|
||||
break;
|
||||
if (excluded
|
||||
|| (! opt_count_all
|
||||
&& (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink))
|
||||
&& ! hash_ins (sb->st_ino, sb->st_dev)))
|
||||
{
|
||||
/* If ignoring a directory in preorder, skip its children.
|
||||
Ignore the next fts_read output too, as it's a postorder
|
||||
visit to the same directory. */
|
||||
if (info == FTS_D)
|
||||
{
|
||||
fts_set (fts, ent, FTS_SKIP);
|
||||
FTSENT const *e = fts_read (fts);
|
||||
assert (e == ent);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (info)
|
||||
{
|
||||
case FTS_D:
|
||||
return true;
|
||||
|
||||
case FTS_ERR:
|
||||
/* An error occurred, but the size is known, so count it. */
|
||||
error (0, ent->fts_errno, "%s", quote (file));
|
||||
ok = false;
|
||||
break;
|
||||
|
||||
case FTS_DC:
|
||||
if (cycle_warning_required (fts, ent))
|
||||
{
|
||||
emit_cycle_warning (file);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* If this is the first (pre-order) encounter with a directory,
|
||||
or if it's the second encounter for a skipped directory, then
|
||||
return right away. */
|
||||
if (ent->fts_info == FTS_D || skip)
|
||||
return ok;
|
||||
|
||||
/* If the file is being excluded or if it has already been counted
|
||||
via a hard link, then don't let it contribute to the sums. */
|
||||
if (skip
|
||||
|| (!opt_count_all
|
||||
&& (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink))
|
||||
&& ! hash_ins (sb->st_ino, sb->st_dev)))
|
||||
{
|
||||
/* Note that we must not simply return here.
|
||||
We still have to update prev_level and maybe propagate
|
||||
some sums up the hierarchy. */
|
||||
duinfo_init (&dui);
|
||||
print = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
duinfo_set (&dui,
|
||||
(apparent_size
|
||||
? sb->st_size
|
||||
: (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
|
||||
(time_type == time_mtime ? get_stat_mtime (sb)
|
||||
: time_type == time_atime ? get_stat_atime (sb)
|
||||
: get_stat_ctime (sb)));
|
||||
}
|
||||
duinfo_set (&dui,
|
||||
(apparent_size
|
||||
? sb->st_size
|
||||
: (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
|
||||
(time_type == time_mtime ? get_stat_mtime (sb)
|
||||
: time_type == time_atime ? get_stat_atime (sb)
|
||||
: get_stat_ctime (sb)));
|
||||
|
||||
level = ent->fts_level;
|
||||
dui_to_print = dui;
|
||||
@@ -535,20 +542,14 @@ process_file (FTS *fts, FTSENT *ent)
|
||||
|
||||
/* Let the size of a directory entry contribute to the total for the
|
||||
containing directory, unless --separate-dirs (-S) is specified. */
|
||||
if ( ! (opt_separate_dirs && IS_DIR_TYPE (ent->fts_info)))
|
||||
if (! (opt_separate_dirs && IS_DIR_TYPE (info)))
|
||||
duinfo_add (&dulvl[level].ent, &dui);
|
||||
|
||||
/* Even if this directory is unreadable or we can't chdir into it,
|
||||
do let its size contribute to the total. */
|
||||
duinfo_add (&tot_dui, &dui);
|
||||
|
||||
/* If we're not counting an entry, e.g., because it's a hard link
|
||||
to a file we've already counted (and --count-links), then don't
|
||||
print a line for it. */
|
||||
if (!print)
|
||||
return ok;
|
||||
|
||||
if ((IS_DIR_TYPE (ent->fts_info) && level <= max_depth)
|
||||
if ((IS_DIR_TYPE (info) && level <= max_depth)
|
||||
|| ((opt_all && level <= max_depth) || level == 0))
|
||||
print_size (&dui_to_print, file);
|
||||
|
||||
@@ -608,7 +609,7 @@ main (int argc, char **argv)
|
||||
char *files_from = NULL;
|
||||
|
||||
/* Bit flags that control how fts works. */
|
||||
int bit_flags = FTS_TIGHT_CYCLE_CHECK | FTS_DEFER_STAT;
|
||||
int bit_flags = FTS_NOSTAT;
|
||||
|
||||
/* Select one of the three FTS_ options that control if/when
|
||||
to follow a symlink. */
|
||||
@@ -902,6 +903,11 @@ main (int argc, char **argv)
|
||||
if (!di_set)
|
||||
xalloc_die ();
|
||||
|
||||
/* If not hashing everything, process_file won't find cycles on its
|
||||
own, so ask fts_read to check for them accurately. */
|
||||
if (opt_count_all || ! hash_all)
|
||||
bit_flags |= FTS_TIGHT_CYCLE_CHECK;
|
||||
|
||||
bit_flags |= symlink_deref_bits;
|
||||
static char *temp_argv[] = { NULL, NULL };
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#!/bin/sh
|
||||
# prior to coreutils-4.5.3, du -D didn't work in some cases
|
||||
# Based on an example from Andreas Schwab and/or Michal Svec.
|
||||
# Also, up to coreutils-8.5, du -L sometimes incorrectly
|
||||
# counted the space of the followed symlinks.
|
||||
|
||||
# Copyright (C) 2002, 2006-2010 Free Software Foundation, Inc.
|
||||
|
||||
@@ -27,10 +29,24 @@ fi
|
||||
mkdir -p a/sub || framework_failure
|
||||
ln -s a/sub slink || framework_failure
|
||||
touch b || framework_failure
|
||||
ln -s .. a/sub/dotdot || framework_failure
|
||||
ln -s nowhere dangle || framework_failure
|
||||
|
||||
|
||||
# This used to fail with the following diagnostic:
|
||||
# du: `b': No such file or directory
|
||||
du -sD slink b > /dev/null 2>&1 || fail=1
|
||||
|
||||
# This used to fail to report the dangling symlink.
|
||||
du -L dangle > /dev/null 2>&1 && fail=1
|
||||
|
||||
# du -L used to mess up, either by counting the symlink's disk space itself
|
||||
# (-L should follow symlinks, not count their space)
|
||||
# or (briefly in July 2010) by omitting the entry for "a".
|
||||
du_L_output=`du -L a` || fail=1
|
||||
du_lL_output=`du -lL a` || fail=1
|
||||
du_x_output=`du --exclude=dotdot a` || fail=1
|
||||
test "X$du_L_output" = "X$du_x_output" || fail=1
|
||||
test "X$du_lL_output" = "X$du_x_output" || fail=1
|
||||
|
||||
Exit $fail
|
||||
|
||||
Reference in New Issue
Block a user