ports//sysutils/dupfind/work/dupfind-3.0/dupfind.c

/*-
 * Copyright (c) 2007 B. Luevelsmeyer
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $Id: dupfind.c,v 1.19 2007/05/03 14:34:47 bernd Exp $
 */


/* Find duplicate files; duplicates in this context are files
 * that have the same contents without being hardlinked or
 * softlinked to each other.
 *
 * All files are compared by their length, and files of equal
 * length are compared by a checksum. (These checksums are stored
 * in a database, so the checksum is computed only once per
 * file.) If the checksums match, then a direct comparison of the
 * files' contents is done.
 *
 * This source is using the 'fastcrc' library by G. Adam
 * Stanislav to compute the checksums.
 *
 * I've used CRC checksums rather than MD5 checksums because they
 * compute faster, and because they are 32 bits wide, which
 * hopefully will make life easier for the database. Anyway, CRCs
 * are good enough; I've only had 2 or 3 matching CRCs for
 * non-matching files while comparing several million files.
 */


#include <unistd.h>
#include <stdio.h>
#include <ftw.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <db.h>
#include <fcntl.h>
#include <curses.h>
#include <term.h>
#include <errno.h>
#include <fastcrc.h>
#include <signal.h>
#include <sys/time.h>

#ifdef NDEBUG
#error NDEBUG should not be defined for this source
#endif



/* Command line options */
static int symbolics,       /* output symlinks */
           hards,           /* output hardlinks */
           duplicates,      /* output file copies */
           debug,           /* debug output for developers */
           progress,        /* progress output (current directory) */
           counters,        /* output statistic counters */
           zero;            /* don't output files of size 0 as duplicates */


/* Statistics counters */
static unsigned directories_visited, unreadable_directories, found_files,
                nonreg_found, unstat_files, filenames_stored,
                filesizes_stored, possible_hardlinks, found_hardlinks,
                duplicates_considered, matching_sizes, calculated_crcs,
                cached_crcs, matching_crcs_found, duplicates_found,
                symlinks_found,unreadable_files,zero_files;
static unsigned long long bytes_in_dups,usec_read,bytes_read;


/* Clear line to eol if progress output is shown, preserving
 * errno.
 */

static int p(int c) { return putc(c,stderr); }
static void prog_clean(void)
{
    if(progress)
    {
        int e = errno;
        tputs(clr_eol,1,p);
        errno = e;
    }
}


/* Two buffers for calculating CRCs and comparing files. */
/* I tried mmap()ing the files instead of reading them into
 * buffers, but it only gives me 60% of the read() speed, and
 * there's a 2 GB limit on mmap().
 */
#define BUFFLEN (20*1024)
static char buf1[BUFFLEN], buf2[BUFFLEN];


/* Compute a file's CRC checksum, using the fastcrc lib. Returns
 * the CRC, or a random number if the file cannot be read.
 *
 * In case a CRC cannot be computed, we simulate a CRC by rand()
 * in order to not have all unreadable files appear to have the
 * same constant CRC, forcing useless comparisons. This "wrong"
 * CRC doesn't harm, as ultimately file duplicates will only be
 * found if the files actually are readable and equal. For the
 * same reason, the quality of the random number generator
 * doesn't matter too much as long as the numbers don't repeat
 * too often.
 */
static unsigned get_crc(const char *path, off_t len)
{
    int fd = open(path,O_RDONLY);
    ++calculated_crcs;
    if(fd<0)
    {
        ++unreadable_files;
        prog_clean();
        fprintf(stderr,"cannot open >%s<: ",path);
        perror(0);
        return rand();
    }
    else
    {
        unsigned crc = ecrcinit();
        off_t complete = 0;
        struct timeval prev,aft;
        gettimeofday(&prev,0);
        for(;;)
        {
            const ssize_t bytes=read(fd,buf1,sizeof buf1);
            if(bytes<0)
            {
                ++unreadable_files;
                prog_clean();
                fprintf(stderr,"error reading from >%s<: ",path);
                perror(0);
                close(fd);
                return rand();
            }
            else if(bytes==0)
                break;
            else
            {
                crc = ecrcpartial(crc,buf1,bytes);
                complete += bytes;
            }
        }
        gettimeofday(&aft,0);
        usec_read += ((long long)aft.tv_sec*1000000+aft.tv_usec)
                   - ((long long)prev.tv_sec*1000000+prev.tv_usec);
        bytes_read += complete;
        crc = ecrcfinish(crc);
        close(fd);
        if(complete!=len)
        {
            prog_clean();
            fprintf(stderr,"file size mismatch for >%s<\n",path);
        }
        if(debug)
            prog_clean(),printf("::%s: %#x\n",path,crc);
        return crc;
    }
}


/* Compare two files. Returns 0 if the files are equal, !=0 else. */
static int files_compare(const char *a, const char *b, off_t len)
{
    struct timeval prev,aft;
    int fd_a = open(a,O_RDONLY);
    int fd_b;
    off_t complete = 0;
    if(fd_a<0)
    {
        prog_clean();
        fprintf(stderr,"cannot open >%s<: ",a);
        perror(0);
        return 1;
    }
    fd_b = open(b,O_RDONLY);
    if(fd_b<0)
    {
        prog_clean();
        fprintf(stderr,"cannot open >%s<: ",b);
        perror(0);
        close(fd_a);
        return 1;
    }

    gettimeofday(&prev,0);
    for(;;)
    {
        const ssize_t bytes_a = read(fd_a,buf1,sizeof buf1);
        const ssize_t bytes_b = bytes_a>=0 ? read(fd_b,buf2,bytes_a?bytes_a:1) : 0;
        if(bytes_a<0 || bytes_b<0)
        {
            /* read error */
            prog_clean();
            fprintf(stderr,"cannot read >%s<: ",bytes_a<0?a:b);
            perror(0);
            close(fd_a);
            close(fd_b);
            return 1;
        }
        else if(bytes_a==0 && bytes_b==0)
        {
            /* both ended */
            close(fd_a);
            close(fd_b);
            gettimeofday(&aft,0);
            usec_read += ((long long)aft.tv_sec*1000000+aft.tv_usec)
                       - ((long long)prev.tv_sec*1000000+prev.tv_usec);
            bytes_read += 2*complete;
            if(complete!=len)
            {
                prog_clean();
                fprintf(stderr,"files of wrong length: >%s< and >%s<\n",a,b);
                return 1;
            }
            else
                return 0;
        }
        else if(bytes_a==0 || bytes_b==0)
        {
            /* one file ended, the other not */
            prog_clean();
            fprintf(stderr,"filesize mismatch: >%s< and >%s<\n",a,b);
            close(fd_a);
            close(fd_b);
            return 1;
        }
        else
        {
            /* read from both files */

            complete += bytes_a;

            /* now look if we got the same amount... */
            if(bytes_a!=bytes_b)
            {
                prog_clean();
                fprintf(stderr,"short read() from >%s<\n",bytes_a<bytes_b?a:b);
                close(fd_a);
                close(fd_b);
                return 1;
            }

            if(memcmp(buf1,buf2,bytes_a))
            {
                /* so they differ. */
                close(fd_a);
                close(fd_b);
                gettimeofday(&aft,0);
                usec_read += ((long long)aft.tv_sec*1000000+aft.tv_usec)
                           - ((long long)prev.tv_sec*1000000+prev.tv_usec);
                bytes_read += 2*complete;
                return 1;
            }
        }
    }
}




/* Databases */

/* Generally, for all database accesses a database failure will
 * not be tolerated, i.e. there is a lot of assert()s in this
 * source. The reason is: the program has no possibility of
 * continuing without the databases, so it *must* be aborted if
 * the databases fail; and in such a case, the way of aborting
 * doesn't matter.
 */


/* This database is used to find filenames for known device/inode
 * pairs in case a matching CRC is found, or in case of
 * hardlinks.
 */
static DB *name_db;
/* The key structure */
struct dev_ino_key
{
    dev_t dev;
    ino_t inode;
};
/* The data is simply the filename, 0-terminated */

/* Compare device/inode pairs; compare first on device, and if
 * they match on inode.
 */
static int device_inode_compare(const DBT *a_, const DBT *b_)
{
    const struct dev_ino_key *a = a_->data;
    const struct dev_ino_key *b = b_->data;
    assert(a_->size == b_->size && a_->size==sizeof *a);
    if(a->dev==b->dev)
        return a->inode<b->inode ? -1 : a->inode>b->inode;
    else
        return a->dev<b->dev ? -1 : 1;
}



/* Size-database, indexed by sizes and containing CRC and
 * device/inode. This database is used to check whether there are
 * matching CRCs. Of course, this database may contain many
 * instances of the same key, because files of the same length
 * aren't unusual.
 */
static DB *size_db;

/* The key is simply an off_t, containing the file's length. */

/* The data is a struct: */
struct size_data
{
    struct dev_ino_key dik;     /* device and inode of the file */
    off_t size;                 /* length of the file */
    unsigned crc;               /* CRC for the file, if known */
    unsigned crc_known;         /* indicates whether the CRC is known */
};

/* Compare two file sizes */
static int size_compare(const DBT *a_, const DBT *b_)
{
    off_t a,b;
    assert(a_->size==b_->size && a_->size==sizeof a);
    a=*(off_t*)(a_->data);
    b=*(off_t*)(b_->data);
    return a<b ? -1 : a>b;
}




/* A new filename has been read by nftw(). Look if it's some sort
 * of copy.
 */

static void gather(dev_t device, ino_t inode, nlink_t nlinks, off_t size, const char *path)
{
    /* description of this file, for entry into the databases */
    struct size_data size_data;
    size_data.dik.dev = device;
    size_data.dik.inode = inode;
    size_data.size = size;
    size_data.crc_known = 0;

    ++found_files;
    if(!size)
        ++zero_files;

    if(debug)
        prog_clean(),printf(">>%s\n",path);

    /* Look if device/inode already known (i.e. this is a
     * hardlink to a known file); skip this, if the file doesn't
     * have hardlinks, because then we don't need to look for
     * them.
     *
     * Even if we don't output hardlinks, this is still
     * necessary, because if it's a hardlink to a known file then
     * it also contains the same contents, but we don't want to
     * report it as a duplicate, so we must find the hardlink
     * information in order to avoid finding the file as a
     * duplicate.
     */
    if(nlinks>1)
    {
        DBT name_dbt;
        DBT dev_ino_dbt;
        dev_ino_dbt.data = &size_data.dik;
        dev_ino_dbt.size = sizeof size_data.dik;
        ++possible_hardlinks;
        switch(name_db->get(name_db,&dev_ino_dbt,&name_dbt,0))
        {
            case 0:
                ++found_hardlinks;
                if(debug)
                    prog_clean(),puts("(hardlink found)");
                if(hards)
                    prog_clean(),printf("== >%s<\t>%s<\t>%lld<\n",(char*)name_dbt.data,path,size);
                /* It is not necessary to consider the file any
                 * more, because all the considering may be done
                 * to the hardlink that's in the database
                 * already.
                 */
                return;
            case 1:     /* not found */
                if(debug)
                    prog_clean(),puts("(no hardlink)");
                break;
            case -1:    /* db error */
                perror("name_db->get() failed");
                assert(0);
                return;
        }
    }

    /* This is not a known hard link; might be a duplicate. */

    /* For all files of this size in the size db, check if the
     * CRC matches, and if so, compare the files directly; omit
     * if duplicates are not required. Don't consider files of
     * size 0 if the user wants them to be suppressed.
     */
    if(duplicates && (size || !zero))
    {
        int res;
        DBT s_key,dev_ino_crc;
        off_t req_size = size;      /* Don't use the parameter, it might
                                     * be overwritten by the
                                     * search result.
                                     */
        s_key.data = &req_size;
        s_key.size = sizeof req_size;
        ++duplicates_considered;
        for(res=size_db->seq(size_db,&s_key,&dev_ino_crc,R_CURSOR);
            !res;
            res=size_db->seq(size_db,&s_key,&dev_ino_crc,R_NEXT))
        {
            off_t *sk = s_key.data;
            assert(s_key.size == sizeof *sk);
            if(*sk!=size)
                /* We are done, no more size-matches, so the file
                 * is unique.
                 */
                break;
            else
                /* The file has the same size - this is a
                 * potential duplicate.
                 */
            {
                /* So at this point in time and space, we need
                 * the CRCs to match.
                 */

                /* The filename of the old file in the databases */
                DBT name_dbt = { 0,0 };

                /* Device, inode and CRC of the old file - we've
                 * got that already.
                 */
                struct size_data sd;
                assert(dev_ino_crc.size == sizeof sd);
                sd = *(struct size_data *)dev_ino_crc.data;

                ++matching_sizes;

                /* get CRC of current file */
                if(!size_data.crc_known)
                {
                    size_data.crc = get_crc(path,size);
                    size_data.crc_known = 1;
                }

                /* If CRC of old file unknown, get it (and update
                 * database).
                 */
                if(!sd.crc_known)
                {
                    /* First, get the filename from the name
                     * database
                     */
                    {
                        DBT dev_ino_dbt;
                        dev_ino_dbt.size = sizeof sd.dik;
                        dev_ino_dbt.data = &sd.dik;
                        assert(name_db->get(name_db,&dev_ino_dbt,&name_dbt,0)==0);
                    }
                    /* Then calculate the old file's CRC */
                    sd.crc = get_crc(name_dbt.data,size);
                    sd.crc_known = 1;
                    /* And stuff it into the size database so it
                     * won't be calculated again
                     */
                    {
                        DBT key,data;
                        key.data = &size;
                        key.size = sizeof size;
                        data.data = &sd;
                        data.size = sizeof sd;
                        assert(size_db->put(size_db,&key,&data,R_CURSOR)==0);
                    }
                }
                else
                    ++cached_crcs;

                /* At this point, we have both CRCs. */
                if(size_data.crc==sd.crc)
                {
                    /* The files have the same checksum and hence
                     * are very likely to have the same contents.
                     * A direct comparison won't hurt the
                     * performance too much.
                     */

                    /* Get name of old file from database, if not
                     * already done for CRC calculation.
                     */
                    if(!name_dbt.data)
                    {
                        DBT dev_ino_dbt;
                        dev_ino_dbt.size = sizeof sd.dik;
                        dev_ino_dbt.data = &sd.dik;
                        assert(name_db->get(name_db,&dev_ino_dbt,&name_dbt,0)==0);
                    }

                    ++matching_crcs_found;

                    if(debug)
                        prog_clean(),printf("? comparing >%s< >%s<\n",path,(char*)name_dbt.data);

                    if(!files_compare(path,name_dbt.data,size))
                    {
                        /* Now the files really have the same
                         * contents.
                         */

                        ++duplicates_found;
                        bytes_in_dups += size;

                        prog_clean();
                        printf("|| >%s<\t>%s<\t>%lld<\n",(char*)name_dbt.data,path,size);

                        /* If the current file has link count 1, then
                         * it cannot be relevant in the future,
                         * because it won't have hardlinks and
                         * duplicates may compare to the existing
                         * entry.
                         */
                        if(nlinks==1)
                            return;
                        else
                            /* No need to compare to other files
                             * of the same size in the database;
                             * but since the file has more than 1
                             * link, it has to go into the
                             * database, in order to find
                             * hardlinks.
                             */
                            break;
                    }
                    else
                        /* Strange stuff: The files have the same
                         * CRC and the same length, but different
                         * contents. Now this *may* happen, but
                         * it's quite unlikely.
                         */
                        if(debug)
                            prog_clean(),printf("! >%s< >%s< CRC mis-match\n",path, (char*)name_dbt.data);
                }   /* same CRC */
            }   /* same size */
        }   /* while files of at least this size */
    }   /* if duplicates */


    /* At this point, the file is unique or it is the first of
     * several hardlinked files.
     */


    /* Put it into name database; omit files with only 1 link if
     * not searching for duplicates. Also omit files of size 0 if
     * the user wants them suppressed, unless they have more than
     * 1 link.
     */
    if(nlinks>1 || (duplicates && (size || !zero)))
    {
        DBT name_dbt,dev_ino_dbt;
        if(debug)
            prog_clean(),printf("take %s\n",path);
        ++filenames_stored;
        name_dbt.data = (void*)path;
        name_dbt.size = strlen(path)+1;
        dev_ino_dbt.data = &size_data.dik;
        dev_ino_dbt.size = sizeof size_data.dik;
        switch(name_db->put(name_db,&dev_ino_dbt,&name_dbt,R_NOOVERWRITE))
        {
            case -1: /* database error */
                perror("name_db->put() failed");
                assert(0);
                return;
            case 1: /* Same device/inode pair a second time,
                     * without hardlink; happens if you supply it
                     * twice on the command line. We will ignore
                     * it silently.
                     */
                if(debug)
                    prog_clean(),fprintf(stderr,"found >%s< twice\n",path);
                return;
            case 0: /* ok */
                break;
        }
    }

    /* Size db; only necessary if searching for duplicates. Omit
     * files of size 0 if the user doesn't want to see them.
     */
    if(duplicates && (size || !zero))
    {
        DBT size_dbt, dev_ino_dbt;
        ++filesizes_stored;
        size_dbt.data = &size;
        size_dbt.size = sizeof size;
        dev_ino_dbt.data = &size_data;
        dev_ino_dbt.size = sizeof size_data;
        switch(size_db->put(size_db,&size_dbt,&dev_ino_dbt,0))
        {
            case -1: /* database error */
                perror("size_db->put() failed");
                assert(0);
                return;
            case 1: /* error because key already in database;
                     * should be impossible because they are
                     * allowed
                     */
                assert(0);
                return;
            case 0: /* ok */
                break;
        }
    }
}


/* nftw() callback */
static int addfile(const char *path, const struct stat *stat, int flag, struct FTW *ftw_info)
{
    /* unused */
    ftw_info = ftw_info;

    switch(flag)
    {
        case FTW_F:     /* A regular file. */
            /* ignore devices etc. */
            if(S_ISREG(stat->st_mode))
                gather(stat->st_dev,stat->st_ino,stat->st_nlink,stat->st_size,path);
            else
                ++nonreg_found;
            return 0;

        case FTW_NS:    /* A file for which no stat(2)
                         * information was available. The
                         * contents of the stat structure are
                         * undefined.
                         */
            ++unstat_files;
            prog_clean();
            fprintf(stderr,"cannot stat() >%s<\n",path);
            return 0;

        case FTW_DNR:   /* A directory which cannot be read. The
                         * directory will not be descended into.
                         */
            ++unreadable_directories;
            prog_clean();
            fprintf(stderr,"cannot read directory >%s<: ",path);
            perror(0);
            return 0;

        case FTW_D:     /* A directory being visited in
                         * pre-order.
                         */

        case FTW_DP:    /* A directory being visited in post-order
                         * (nftw() only).
                         */
            ++directories_visited;
            if(progress)
            {
                fprintf(stderr,"%.*s",(int)(columns-1),path);
                prog_clean();
                putc('\r',stderr);
                fflush(stderr);
            }
            return 0;

        case FTW_SL:    /* A symbolic link. */

        case FTW_SLN:   /* A symbolic link with a non-existent
                         * target (nftw() only).
                         */
            ++symlinks_found;
            if(symbolics)
            {
                char buf[2000];
                int len = readlink(path,buf, sizeof buf);
                if(len==-1)
                {
                    prog_clean();
                    fprintf(stderr,"cannot read symlink >%s<: ",path);
                    perror(0);
                }
                else
                    prog_clean(),printf("%s >%.*s<\t>%s<\t>%lld<\n",flag==FTW_SL?"->":"o>",len,buf,path,stat->st_size);
            }
            return 0;

            /* ftw() errors */
        default:
            return 1;
    }
}


/* statistics are output at program end, if desired, and on
 * SIGINFO
 */
static void stat_dump(FILE *f)
{
    prog_clean();
    fprintf(f,
            "%8u directories visited\n"
            "%8u unreadable directories skipped\n"
            "%8u regular files found\n"
            "%8u files of size 0\n"
            "%8u symbolic links found\n"
            "%8u devices and sockets skipped\n"
            "%8u files without stat information skipped\n"
            "%8u unreadable files\n"
            "%8u files having more than one link\n"
            "%8u hardlink pairs confirmed\n"
            "%8u duplicates considered\n"
            "%8u cases of matching filesizes\n"
            "%8u calculated CRCs\n"
            "%8u times found CRCs in database\n"
            "%8u pairs of matching CRCs found\n"
            "%8u duplicates found\n"
            "%8llu bytes in duplicates\n"
            "%8u filenames in database\n"
            "%8u sizes and CRCs in database\n"
            "%8g bytes/sec read\n",
            directories_visited,
            unreadable_directories,
            found_files,
            zero_files,
            symlinks_found,
            nonreg_found,
            unstat_files,
            unreadable_files,
            possible_hardlinks,
            found_hardlinks,
            duplicates_considered,
            matching_sizes,
            calculated_crcs,
            cached_crcs,
            matching_crcs_found,
            duplicates_found,
            bytes_in_dups,
            filenames_stored,
            filesizes_stored,
            (double)bytes_read/(usec_read?usec_read:1)*1000000
                );

}

static void siginfo(int signo)
{
    const int e = errno;
    signo=signo;
    stat_dump(stderr);
    errno = e;
}


int main(int argc, char **argv)
{
    int c;

    /* look where to place the databases */
    char *tmp = getenv("TMPDIR");
    if(!tmp) tmp = "/tmp";

    setvbuf(stdout,0,_IOLBF,0);

    while((c=getopt(argc,argv,"cshdv0p"))!=-1)
        switch(c)
        {
            case 'c': counters=1;   break;
            case 's': symbolics=1;  break;
            case 'h': hards=1;      break;
            case 'd': duplicates=1; break;
            case 'v': debug=1;      break;
            case '0': zero=1;       break;
            case 'p':
                      if(setupterm(0, STDERR_FILENO, 0)==OK)
                          progress=1;
                      else
                      {
                          perror("cannot show progress indicator");
                          return 1;
                      }
                      break;
            case '?':
            default:
                      fputs("usage: dupfind [-cshdv0p] [path ...]\n",stderr);
                      return 2;
        }

    argc-=optind;
    argv+=optind;


    {   /* btree access method for name database */
        char namedb_name[80];
        BTREEINFO bi = { 0,0,0,0,0,device_inode_compare,0,0 };
        int fd;
        int namlen = snprintf(namedb_name,sizeof namedb_name,"%s/dupfind.names.XXXXXX",tmp);
        assert(namlen>0 && namlen<(int)sizeof namedb_name);
        assert((fd=mkstemp(namedb_name))>=0);
        /* can't use the fd for the database... */
        close(fd);
        if(!(name_db = dbopen(namedb_name,O_EXLOCK|O_RDWR,0,DB_BTREE,&bi)))
        {
            perror("dbopen() failed");
            return 3;
        }
        /* don't leave the file hanging around after program end */
        if(!debug)
            remove(namedb_name);

    }
    {   /* btree access method for size database */
        char sizedb_name[80];
        BTREEINFO bi = { R_DUP,0,0,0,0,size_compare,0,0 };
        int fd;
        int namlen=snprintf(sizedb_name,sizeof sizedb_name,"%s/dupfind.sizes.XXXXXX",tmp);
        assert(namlen>0 && namlen<(int)sizeof sizedb_name);
        assert((fd=mkstemp(sizedb_name))>=0);
        close(fd);
        if(!(size_db = dbopen(sizedb_name,O_EXLOCK|O_RDWR,0,DB_BTREE,&bi)))
        {
            perror("dbopen() failed");
            return 4;
        }
        if(!debug)
            remove(sizedb_name);
    }

    signal(SIGINFO, siginfo);

    srand(time(0));

    for(;*argv;argv++)
        if(nftw(*argv,addfile,10,FTW_PHYS)==-1)
        {
            perror("nftw() failed");
            return 5;
        }

    name_db->close(name_db);
    size_db->close(size_db);

    prog_clean();

    if(counters)
        stat_dump(stdout);

    return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1