Logo Search packages:      
Sourcecode: dcc version File versions  Download package

db.c

/* Distributed Checksum Clearinghouse server database functions
 *
 * Copyright (c) 2005 by Rhyolite Software
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.2.74-1.95 $Revision$
 */

#include "srvr_defs.h"
#include <sys/resource.h>
#ifdef HAVE_HW_PHYSMEM
#include <sys/sysctl.h>
#endif

#define PSTATIC static
/* #define PSTATIC */               /* for profiling */

static const u_char hash_magic[sizeof(HASH_ENTRY)] = DB_HASH_MAGIC;

DB_STATS db_stats;

DB_STATES db_sts;

int db_fd = -1;
DCC_PATH db_nm;
int db_hash_fd = -1;
DCC_PATH db_hash_nm;
struct timeval db_locked;           /* 1=database not locked */
u_int db_flags;                     /* same as magic.s.flags */

struct timeval db_time;

/* Common UNIX buffer cache flushing mechanisms are too quick for
 * this database, causing far too much disk traffic.  However, it
 * is necessary to push the database toward the disk so that when dccd
 * shuts down, the system need not be confronted with GBytes to write.
 * So choose a delay that tries to push the database to the disk in less
 * than an hour but no more than 1 MByte/sec for a 4 GByte database.
 * 128 buffers covering 4 GBytes implies 30 MByte/buffer.  If 20% if
 * each buffer is dirty, then we don't want to flush more than about
 * 1 buffer every 5 seconds. */
#define MSYNC_TIME 10
static time_t msync_time;

int db_debug;

u_char grey_on;
static u_char db_no_mmap;
static u_char db_dirty;
static u_char db_extended;
static u_char db_rdonly;
DB_PG_NUM db_end_pg_num = -1;       /* have rebuilt hash to this page */
u_char db_failed;             /* something bad happened */
static u_char db_invalidate;        /* do not write to the files */


int db_buf_total;             /* total # of db buffers */
DB_PTR db_max_rss;                  /* maximum db resident set size */
/* use DB_PTR instead of off_t because off_t is often only 32-bits */

#define DB_HASH_TOTAL DB_BUF_MAX
static DB_BUF *db_buf_hash[DB_HASH_TOTAL];
static DB_BUF db_bufs[DB_HASH_TOTAL];     /* control mmap()'ed blocks */
static DB_BUF *buf_oldest, *buf_newest;

#define B2PATH(b) ((b)->type == DB_BUF_TYPE_DB              \
               ? DCC_NM2PATH(db_nm)                   \
               : DCC_NM2PATH(db_hash_nm))

#define DB_BUF_HASH(pg_num) (&db_buf_hash[(pg_num) % DIM(db_buf_hash)])

static const DB_VERSION_BUF version_buf = DB_VERSION_STR;

u_int db_page_size;                 /* size of 1 mmap()'ed buffer */

static off_t hash_fsize;
DB_HADDR db_hash_len;               /* # of hash table entries */
DB_HADDR db_hash_used;              /* # of hash table entries in use */
u_int db_hash_page_len;             /* # of HASH_ENTRY's per buffer */
DB_HADDR db_max_hash_entries = 0;   /* after db_buf_init()*/
static off_t db_fsize;              /* size of database file */
DB_PTR db_csize;              /* size of database contents in bytes */
static DB_PTR db_csize_stored_hash; /* DB size stored in hash file */
static DB_PTR db_csize_stored_db;   /*  "  "     "    in database file */
DB_SN db_sn, db_sn_stored;          /* creation or expiration serial # */
DB_EX_TS db_ex_ts;                  /* cleaned to these dates */
DB_EX_SECS db_ex_secs;              /*    with these durations */
static DB_EX_SECS db_ex_secs_stored;
DB_NOKEEP_CKS db_nokeep_cks;        /* ignore some checksums */
static DB_NOKEEP_CKS db_nokeep_cks_stored;
DB_FLOD_THOLDS db_flod_tholds;
static DB_FLOD_THOLDS db_flod_tholds_stored;
u_int db_page_max;                  /* only padding after this in DB buf */
char db_window_size[32];            /* size of mmap() window */

static const u_char dcc_ck_fuzziness[DCC_DIM_CKS] = {
      0,                      /* DCC_CK_INVALID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_IP */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_ENV_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_SUB */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_MESSAGE_ID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_RECEIVED */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_BODY */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_FUZ1 */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_FUZ2 */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_FUZ3 */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_GREY_TRIPLE */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_SRVR_ID */
      DCC_CK_FUZ_LVL2               /* DCC_CK_ENV_TO */
};
static const u_char grey_ck_fuzziness[DCC_DIM_CKS] = {
      0,                      /* DCC_CK_INVALID */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_IP */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_ENV_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_SUB */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_MESSAGE_ID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_RECEIVED */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_BODY */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FUZ1 */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FUZ2 */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_GREY_MSG */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_GREY_TRIPLE */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_SRVR_ID */
      DCC_CK_FUZ_LVL1               /* DCC_CK_ENV_TO */
};
const u_char *db_ck_fuzziness = dcc_ck_fuzziness;


PSTATIC u_char buf_msync(DCC_EMSG, DB_BUF *);
PSTATIC u_char buf_munmap(DCC_EMSG, DB_BUF *);
PSTATIC u_char buf_mmap(DCC_EMSG, DB_BUF *, DB_PG_NUM);
PSTATIC DB_BUF *find_buf(DCC_EMSG, DB_BUF_TYPE, DB_PG_NUM);
PSTATIC u_char map_hash(DCC_EMSG, DB_HADDR, DB_STATE *);
PSTATIC u_char map_db(DCC_EMSG, DB_PTR, u_int, DB_STATE *);
PSTATIC void rel_db_states(u_char);
PSTATIC u_char db_flush_len(DCC_EMSG);
PSTATIC u_char db_make_clean(DCC_EMSG, u_char);


static u_int
gcd(u_int n, u_int m)
{
      u_int r;

      if (n > m) {
            r = m; m = n; n = r;
      }
      for (;;) {
            r = m % n;
            if (r == 0)
                  return n;
            m = n;
            n = r;
      }
}



DB_NOKEEP_CKS
def_nokeep_cks(void)
{
      DCC_CK_TYPES type;
      DB_NOKEEP_CKS nokeep = 0;

      DB_SET_NOKEEP(nokeep, DCC_CK_INVALID);
      DB_SET_NOKEEP(nokeep, DCC_CK_FLOD_PATH);
      for (type = 0; type < DCC_NUM_CKS; ++type) {
            if ((!grey_on && DB_DEF_NOKEEP(type))
                || (grey_on && DB_GREY_NOKEEP(type)))
                  DB_SET_NOKEEP(nokeep, type);
      }

      return nokeep;
}



/* At least in BSD/OS, mmap() cannot extend a file */
u_char
db_extend(DCC_EMSG emsg, int fd, const char *nm,
        DB_PTR new_size, DB_PTR old_size)
{
      static u_char zeros[4096];
      int len, i;

      if (new_size > DB_PTR_MAX) {
            dcc_pemsg(EX_SOFTWARE, emsg,
                    "invalid new size "L_HPAT" for %s",
                    new_size, DCC_NM2PATH(nm));
            return 0;
      }
      if (new_size <= old_size) {
            dcc_pemsg(EX_SOFTWARE, emsg,
                    "new_size "L_HPAT" <= old_size "
                    L_HPAT" in db_extend(%s)",
                    new_size, old_size, DCC_NM2PATH(nm));
            return 0;
      }

      /* Use write() because FreeBSD documentation cautions against mmap() on
       * files with holes. */
      if (old_size != (DB_PTR)lseek(fd, old_size, SEEK_SET)) {
            dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"L_HPAT"): %s",
                    DCC_NM2PATH(nm), old_size, ERROR_STR());
            return 0;
      }

      for (;;) {
            len = new_size - old_size;
            if (len > ISZ(zeros))
                  len = sizeof(zeros);
            else if (len <= 0)
                  return 1;
            old_size += len;
            i = write(fd, &zeros, len);
            if (i != len) {
                  dcc_pemsg(EX_IOERR, emsg, "write(%s,%d)=%d: %s",
                          DCC_NM2PATH(nm), i, len, ERROR_STR());
                  return 0;
            }
      }
}



/* release all unneeded buffers */
u_char                              /* 0=problem 1=finished */
db_unload(DCC_EMSG emsg)
{
      DB_BUF *b;
      u_char result = 1;

      for (b = buf_oldest; b != 0; b = b->newer) {
            if (b->type == DB_BUF_TYPE_FREE)
                  continue;
            if (b->lock_cnt != 0)
                  continue;
            if (!buf_munmap(emsg, b)) {
                  emsg = 0;
                  result = 0;
            }
      }

      return result;
}



/* msync() all important buffers and forget some oldest buffer
 *    This does not seem to have any effects on many systems */
u_char
db_sync_some(DCC_EMSG emsg)
{
      DB_BUF *b;
      u_char result;

      result = 1;
      for (b = buf_oldest; b != 0; b = b->newer) {
            if (b->type == DB_BUF_TYPE_FREE
                || b->lock_cnt != 0
                || !(b->flags & DB_BUF_FG_DIRTY))
                  continue;

            if ((b->flags & DB_BUF_FG_MSYNC)
                || DB_IS_TIME(msync_time, MSYNC_TIME)) {
                  if (!buf_msync(emsg, b)) {
                        result = 0;
                        emsg = 0;
                  }
            }
      }
      return result;
}



static void
db_rel_state(DB_STATE *st)
{
      DB_BUF *b;

      if ((b = st->b) != 0) {
            st->b = 0;
            st->d.p = 0;
            st->s.rptr = DB_PTR_BAD;
            if (--b->lock_cnt == 0) {
                  if (!DB_IS_LOCKED())
                        buf_munmap(0, b);
            } else if (b->lock_cnt < 0) {
                  dcc_logbad(EX_SOFTWARE,
                           "negative database buffer lock");
            }
      }
}



PSTATIC void
rel_db_states(u_char not_hash_magic)
{
      DB_STATE *st;

      for (st = &db_sts.rcd; st < &db_sts.hash_ctl; ++st)
            db_rel_state(st);

      /* release the buffer with the dirty flag only if allowed */
      if (!not_hash_magic)
            db_rel_state(st);
}



/* shut down the database, including flushing and releasing all
 * mmap()'ed buffers */
u_char
db_close(DCC_EMSG emsg,
       int mode)              /* -1=invalidate cache, 0=flush, 1=ok */
{
      u_char result = 1;

      if (mode < 0) {
            db_invalidate = 1;
            mode = 0;
      }

      /* flush the data and then release and flush the dirty flags */
      if (!db_make_clean(emsg, mode)) {
            emsg = 0;
            result = 0;
      }
      rel_db_states(0);
      if (!db_unload(emsg)) {
            emsg = 0;
            result = 0;
      }

      /* close the hash table first because the server is often
       * waiting for the lock on the main file held by dbclean */
      if (db_hash_fd >= 0) {
            if (0 > close(db_hash_fd)) {
                  dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
                          DCC_NM2PATH(db_hash_nm), ERROR_STR());
                  emsg = 0;   /* print next error message directly */
                  result = 0;
            }
            db_hash_fd = -1;
      }
      if (db_fd >= 0) {
            if (0 > close(db_fd)) {
                  dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
                          DCC_NM2PATH(db_nm), ERROR_STR());
                  emsg = 0;   /* print next error message directly */
                  result = 0;
            }
            db_fd = -1;
      }

      db_invalidate = 0;
      db_failed = 0;
      db_dirty = 0;
      db_extended = 0;
      db_locked.tv_sec = 0;         /* clear DB_IS_LOCKED() */
      return result;
}



/* This locking does only multiple-readers/single-writer */
int                           /* -1=failed, 0=was not locked, 1=was */
db_lock(DCC_EMSG emsg)
{
      struct stat sb;

      if (DB_IS_LOCKED())
            return 1;

      if (!dcc_exlock_fd(emsg, db_fd, DCC_LOCK_ALL_FILE, "", db_nm))
            return -1;
      if (0 > fstat(db_fd, &sb)) {
            dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s",
                    DCC_NM2PATH(db_nm), ERROR_STR());
            return -1;
      }
      if (db_fsize != sb.st_size) {
            if (db_fsize > sb.st_size || !db_rdonly) {
                  dcc_pemsg(EX_IOERR, emsg,
                          "%s changed from "OFF_HPAT" to "OFF_HPAT,
                          DCC_NM2PATH(db_nm), db_fsize, sb.st_size);
                  return -1;
            }
            db_fsize = sb.st_size;
      }

      gettimeofday(&db_locked, 0);
      return 0;
}



PSTATIC u_char                      /* 0=failed, 1=clean, 2=too much work */
db_make_clean(DCC_EMSG emsg, u_char ok)
{
      u_char result;

      if (!db_dirty)
            return 1;

      result = 1;

      /* send any changes to the disk,
       * but keep the database-dirty flags in RAM */
      if (!db_failed
          && ok
          && db_hash_fd >= 0
          && !map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash_ctl)) {
            emsg = 0;
            result = 0;
      }

      rel_db_states(1);
      result = db_unload(emsg);
      if (!result) {
            emsg = 0;
            result = 0;
      }

      if (db_extended) {
            /* Send the meta-data to disk so that other processes
             * such as dbclean can find the new length of the file
             * on Solaris. */
            if (0 > fsync(db_fd)) {
                  dcc_pemsg(EX_IOERR, emsg, "fsync(%s): %s",
                          DCC_NM2PATH(db_nm), ERROR_STR());
                  emsg = 0;
                  result = 0;
            }
            db_extended = 0;
      }

      /* Clean the dirty flag in the hash table.
       * With luck, this will reach the disk after everything else */
      if (!db_failed
          && ok
          && db_hash_fd >= 0
          && (DB_HADDR_EX(db_sts.hash_ctl.d.h
                      ->HASH_STORE_USED) != db_hash_used)) {
            DB_HADDR_CP(db_sts.hash_ctl.d.h->HASH_STORE_USED, db_hash_used);
            db_sts.hash_ctl.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
            if (!buf_msync(emsg, db_sts.hash_ctl.b))
                  result = 0;
      }

      db_dirty = 0;
      return result;
}



/* mark the hash file and so the database dirty */
static u_char
db_make_dirty(DCC_EMSG emsg)
{
      if (db_dirty)
            return 1;

      if (!DB_IS_LOCKED()) {
            dcc_pemsg(EX_SOFTWARE, emsg, "dirtying unlocked database");
            return 0;
      }

      if (db_rdonly)
            dcc_logbad(EX_SOFTWARE, "dirtying read-only database");

      if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash_ctl))
            return 0;
      DB_HADDR_CP(db_sts.hash_ctl.d.h->HASH_STORE_USED, 0);
      db_sts.hash_ctl.b->flags |= DB_BUF_FG_MSYNC;
      if (!buf_msync(emsg, db_sts.hash_ctl.b))
            return 0;

      db_dirty = 1;
      return 1;
}



/* (start to) unlock the database */
u_char                              /* 0=failed, 1=at least started */
db_unlock(DCC_EMSG emsg)
{
      int result;

      if (!DB_IS_LOCKED())
            return 1;

      /* when we unlock frequently, we cannot use the write() hack */
      db_no_mmap = 0;

      result = db_make_clean(emsg, 1);
      if (!result)
            emsg = 0;         /* print next error message directly */
      if (!dcc_unlock_fd(emsg, db_fd, DCC_LOCK_ALL_FILE, "", db_nm))
            result = 0;
      db_locked.tv_sec = 0;
      return result;
}



static void
get_db_max_rss(void)
{
#define MIN_DB_MBYTE 32
#define MAX_DB_MBYTE 3072           /* 3 GByte on 32 bit machines is fair */
#if DCC_MAX_DB_MBYTE < MIN_DB_MBYTE || DCC_MAX_DB_MBYTE > MAX_DB_MBYTE
#undef DCC_MAX_DB_MBYTE
#define DCC_MAX_DB_MBYTE MAX_DB_MBYTE
#endif
#if DCC_DB_MBYTE >= MIN_DB_MBYTE && DCC_DB_MBYTE <= DCC_MAX_DB_MBYTE
#define DEF_DB_MBYTE DCC_DB_MBYTE
#else
#define DEF_DB_MBYTE 64
#endif
      DB_PTR physmem = 0;

#ifdef HAVE_PHYSMEM_TOTAL
      /* maybe someday physmem_total() will be widely available */
      physmem = physmem_total();
#else
#ifdef HAVE__SC_PHYS_PAGES
      long pages, pagesize;

      if ((pages = sysconf(_SC_PHYS_PAGES)) == -1) {
            dcc_error_msg("sysconf(_SC_PHYS_PAGES): %s",
                        ERROR_STR());
      } else if ((pagesize = sysconf(_SC_PAGESIZE)) == -1) {
            dcc_error_msg("sysconf(_SC_PAGESIZE): %s",
                        ERROR_STR());
      } else {
            physmem = (DB_PTR)pages * (DB_PTR)pagesize;
      }
#else
#ifdef HAVE_HW_PHYSMEM
      int mib[2] = {CTL_HW, HW_PHYSMEM};
      int hw_physmem;
      size_t hw_physmem_len = sizeof(hw_physmem);

      if (0 <= sysctl(mib, 2, &hw_physmem, &hw_physmem_len, 0, 0)) {
            physmem = hw_physmem;
      } else {
            dcc_error_msg("sysctl(HW_PHYSMEM): %s", ERROR_STR());
      }
#endif
#endif /* HAVE__SC_PHYS_PAGES */
#endif /* HAVE_PHYSMEM_TOTAL */

      /* Try to use physical memory less 512 MByte or half if there
       * is less than 1 GByte.
       * If we got a reasonable memory size from the kernel, use it
       * use a default if not */
      if (physmem > 1024*1024*1024)
            db_max_rss = physmem - 512*1024*1024;
      else
            db_max_rss = physmem/2;
      if (db_max_rss/(1024*1024) < DEF_DB_MBYTE) {
            db_max_rss = DEF_DB_MBYTE;
            db_max_rss *= 1024*1024;
      } else if (db_max_rss/(1024*1024) > DCC_MAX_DB_MBYTE) {
            db_max_rss = DCC_MAX_DB_MBYTE;
            db_max_rss *= 1024*1024;
      }
#undef MIN_DB_MBYTE
#undef MAX_DB_MBYTE
#undef DEF_DB_MBYTE
}



/* Pick a buffer size that will hold an integral number of DB hash
 * table entries and is a multiple of system's page size.
 * The entire hash table should reside in memory
 * if the system has enough memory. */
int
db_get_page_size(u_int old_page_size,     /* 0 or required page size */
             u_int tgt_page_size)   /* 0 or target page size */
{
      u_int min_page_size, max_page_size;

      /* Ask the operating system only once so we don't get differing
       * answers and so compute a varying page size.
       * Somesystems can't keep their stories straight. */
      if (db_max_rss == 0)
            get_db_max_rss();

      /* Compute the least common multiple of system the page size
       * and the DB hash table entry size.  This will give us the
       * smallest page size that we can use. */
      min_page_size = getpagesize();
      min_page_size *= (sizeof(HASH_ENTRY)
                    / gcd(sizeof(HASH_ENTRY), min_page_size));

      /* The DB buffer or page size must also be a multiple of the
       * the end-of-page padding used in the main database file. */
      if (sizeof(DB_RCD) % DB_RCD_PAD != 0)
            dcc_logbad(EX_SOFTWARE,
                     "DB padding size %d"
                     " is not a divisor of DB entry size %d",
                     DB_RCD_PAD, ISZ(DB_RCD));
      min_page_size *= (DB_RCD_PAD / gcd(DB_RCD_PAD, min_page_size));

      /* Use the old buffer size if possible so we are not confused
       * by padding at the ends of the old pages.
       * Fail if it is impossible.  This should cause dbclean to
       * rebuild the database. */
      if (old_page_size != 0) {
            if ((old_page_size % min_page_size) != 0)
                  return 0;
            /* adjust the number of buffers to fit our window size */
            db_buf_total = (db_max_rss + old_page_size-1) / old_page_size;
            if (db_buf_total > DB_BUF_MAX)
                  db_buf_total = DB_BUF_MAX;
            if (db_buf_total < (int)DB_BUF_MIN)
                  db_buf_total = DB_BUF_MAX;
            return old_page_size;
      }

      db_buf_total = DB_BUF_MAX;
      max_page_size = db_max_rss / db_buf_total;
      max_page_size -= max_page_size % min_page_size;

      /* If we have a target page size, try to use it.
       * A target page size is big enough to hold 25% of the entire
       * greylist database */
      if (tgt_page_size != 0 && tgt_page_size < max_page_size) {
            tgt_page_size -= tgt_page_size % min_page_size;
            if (tgt_page_size < min_page_size)
                  tgt_page_size = min_page_size;
            return tgt_page_size;
      } else if (max_page_size > min_page_size) {
            return max_page_size;
      } else {
            return min_page_size;
      }
}



/* (re)create the buffer pool
 * The buffers are small blocks that point to the real mmap()'ed memory.
 */
u_char
db_buf_init(u_int old_page_size)    /* 0 or required page size */
{
      DB_BUF *b, *bprev, *bnext;
      int i;


      db_page_size = db_get_page_size(old_page_size, 0);
      if (!db_page_size)
            return 0;

      db_page_max = db_page_size - DB_RCD_PAD;
      db_hash_page_len = db_page_size/sizeof(HASH_ENTRY);

      db_max_hash_entries = (MAX_HASH_ENTRIES
                         - MAX_HASH_ENTRIES % db_hash_page_len);

      for (b = db_bufs, i = DB_BUF_MAX; --i != 0; ++b) {
            if (b->buf.v)
                  free(b->buf.v);
      }
      memset(db_bufs, 0, sizeof(db_bufs));

      b = db_bufs;
      buf_oldest = b;
      bprev = 0;
      for (i = db_buf_total; --i != 0; b = bnext) {
            bnext = b+1;
            b->older = bprev;
            b->newer = bnext;
            bprev = b;
      }
      if (b->buf.v)
            free(b->buf.v);
      memset(b, 0, sizeof(*b));
      b->older = bprev;
      buf_newest = b;

      memset(db_buf_hash, 0, sizeof(db_buf_hash));

      return 1;
}



static void
clear_hash_entry(HASH_ENTRY *hash, DB_HADDR rcd_num)
{
      DB_HADDR rcd_p;

      if (rcd_num == DB_HADDR_MAGIC) {
            memcpy(hash, &hash_magic, sizeof(hash_magic));
            return;
      }

      memset(hash, 0, sizeof(*hash));

      if (rcd_num == DB_HADDR_SIZES) {
            DB_HADDR_CP(hash->HASH_STORE_LEN, db_hash_len);
            DB_HADDR_CP(hash->HASH_STORE_USED, DB_HADDR_MIN);
            return;
      }

      if (rcd_num == DB_HADDR_MIN) {
            rcd_p = DB_HADDR_FREE;
      } else if (rcd_num == DB_HADDR_FREE) {
            rcd_p = db_hash_len - 1;
      } else {
            rcd_p = rcd_num - 1;
      }
      DB_HADDR_CP(hash->bak, rcd_p);

      if (rcd_num == DB_HADDR_FREE) {
            rcd_p = DB_HADDR_MIN;
      } else {
            rcd_p = rcd_num+1;
            if (rcd_p >= db_hash_len)
                  rcd_p = DB_HADDR_FREE;
      }
      DB_HADDR_CP(hash->fwd, rcd_p);
}



#ifdef MAP_ANON
/* Clear new hash file by linking all of its entries into
 * the free list using the DB buffer/page mechanism */
static u_char
clear_hash(DCC_EMSG emsg)
{
      DB_HADDR haddr;
      DB_BUF *b;
      HASH_ENTRY *hash, *hash_lim;
      void *p;
      int i;

      db_dirty = 1;
      haddr = DB_HADDR_NULL;
      hash = hash_lim = 0;
      for (haddr = 0; haddr < db_hash_len; ++haddr, ++hash) {
            if (hash >= hash_lim) {
                  b = find_buf(emsg, DB_BUF_TYPE_HASH,
                             haddr / db_hash_page_len);
                  if (!b)
                        return 0;
                  hash = b->buf.h;
                  if (!hash) {
                        p = mmap(0, db_page_size,
                               PROT_READ | PROT_WRITE,
                               MAP_ANON | MAP_PRIVATE, -1, 0);
                        if (p == MAP_FAILED) {
                              dcc_pemsg(EX_IOERR, emsg,
                                      "mmap(anon, %d): %s",
                                      db_page_size, ERROR_STR());
                              return 0;
                        }
                        b->buf.v = p;
                        hash = p;
                        b->flags |= (DB_BUF_FG_NO_MMAP
                                   | DB_BUF_FG_ANON);
                  }
                  hash_lim = hash;
                  if (db_hash_len - haddr > db_hash_page_len)
                        hash_lim += db_hash_page_len;
                  else
                        hash_lim += db_hash_len - haddr;
            }
            clear_hash_entry(hash, haddr);
      }

      /* The hash table might not be an even number of pages,
       * but the file must be.  We know the excess is less than
       * one hash table entry. */
      i = hash_fsize - db_hash_len*sizeof(*hash);
      if (i > 0)
            memset(hash, 0, i);

      return 1;
}

#else /* !defined(MAP_ANON) */

/* Clear new hash file by linking all of its entries into
 * the free list and using write(). */
static u_char
write_hash_buf(DCC_EMSG emsg, void *buf, void *ptr)
{
      int i, len;

      len = (char *)ptr - (char *)buf;
      if (!len)
            return 1;
      i = write(db_hash_fd, buf, len);
      if (i != len) {
            dcc_pemsg(EX_IOERR, emsg, "write(%s,%d)=%d: %s",
                    DCC_NM2PATH(db_hash_nm), len, i, ERROR_STR());
            return 0;
      }
      return 1;
}



static u_char
clear_hash(DCC_EMSG emsg)
{
      DB_HADDR haddr;
      HASH_ENTRY *hash, hash_buf[512];
      int i;

      haddr = DB_HADDR_NULL;
      hash = hash_buf;
      for (haddr = 0; haddr < db_hash_len; ++haddr, ++hash) {
            if (hash >= LAST(hash_buf)) {
                  if (!write_hash_buf(emsg, hash_buf, hash))
                        return 0;
                  hash = hash_buf;
            }

            clear_hash_entry(hash, haddr);
      }
      if (!write_hash_buf(emsg, hash_buf, hash))
            return 0;

      /* The hash table might not be an even number of pages,
       * but the file must be.  We know the excess is less than
       * one hash table entry. */
      i = hash_fsize - db_hash_len*sizeof(*hash);
      if (i > 0) {
            memset(hash_buf, 0, sizeof(hash_buf));
            if (!write_hash_buf(emsg, hash_buf, &hash_buf[i]))
                  return 0;
      }

      return 1;
}
#endif


static u_char
make_new_hash(DCC_EMSG emsg, DB_HADDR new_hash_len)
{
      struct stat sb;
      u_int n;

      if (getuid() == 0) {
            /* if we are running as root,
             * don't change the owner of the database */
            if (0 > fstat(db_fd, &sb)) {
                  dcc_pemsg(EX_IOERR, emsg, "fstat(%s): %s",
                          DCC_NM2PATH(db_nm), ERROR_STR());
                  return 0;
            }
            if (0 > fchown(db_hash_fd, sb.st_uid, sb.st_gid)) {
                  dcc_pemsg(EX_IOERR, emsg, "fchown(%s,%d,%d): %s",
                          DCC_NM2PATH(db_hash_nm),
                          (int)sb.st_uid, (int)sb.st_gid,
                          ERROR_STR());
                  return 0;
            }
      }

      if (new_hash_len > db_max_hash_entries)
            new_hash_len = db_max_hash_entries;

      /* increase the requested hash table size to a multiple
       * of the page size */
      hash_fsize = new_hash_len * sizeof(HASH_ENTRY);
      n = hash_fsize % db_page_size;
      if (n != 0) {
            hash_fsize += db_page_size - n;
            new_hash_len = hash_fsize/sizeof(HASH_ENTRY);
      }

      if (new_hash_len < MIN_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "database size %d is too small", new_hash_len);
            return 0;
      }
      if (new_hash_len > MAX_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "database size %d is too large", new_hash_len);
            return 0;
      }

      /* create the empty hash table file */
      rel_db_states(0);
      if (!db_unload(emsg))
            return 0;
      if (0 > ftruncate(db_hash_fd, 0)) {
            dcc_pemsg(EX_IOERR, emsg, "truncate(%s,"L_HPAT"): %s",
                    DCC_NM2PATH(db_hash_nm), db_csize,
                    ERROR_STR());
            return 0;
      }

      db_hash_len = new_hash_len;
      db_hash_used = DB_HADDR_MIN;
      return clear_hash(emsg);
}



static u_char
check_old_hash(DCC_EMSG emsg)
{
      DB_HADDR fwd, bak, stored_hash_len;
      struct stat sb;

      /* check the size of the existing hash file */
      if (0 > fstat(db_hash_fd, &sb)) {
            dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s",
                    DCC_NM2PATH(db_hash_nm), ERROR_STR());
            return 0;
      }
      hash_fsize = sb.st_size;
      if ((hash_fsize % sizeof(HASH_ENTRY)) != 0) {
            dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_DPAT","
                    " not a multiple of %d",
                    DCC_NM2PATH(db_hash_nm), hash_fsize,
                    ISZ(HASH_ENTRY));
            return 0;
      }

      db_hash_len = hash_fsize/sizeof(HASH_ENTRY);
      if (db_hash_len < MIN_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has too few records, "OFF_DPAT" bytes",
                    DCC_NM2PATH(db_hash_nm), hash_fsize);
            return 0;
      }

      /* check the magic number */
      if (!map_hash(emsg, DB_HADDR_MAGIC, &db_sts.hash_ctl))
            return 0;
      if (memcmp(db_sts.hash_ctl.d.h, &hash_magic, sizeof(HASH_ENTRY))) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has the wrong magic \"%.*s\"",
                    DCC_NM2PATH(db_hash_nm),
                    ISZ(HASH_ENTRY), db_sts.hash_ctl.d.c);
            return 0;
      }

      if (!map_hash(emsg, DB_HADDR_FREE, &db_sts.hash_ctl))
            return 0;
      fwd = DB_HADDR_EX(db_sts.hash_ctl.d.h->fwd);
      if (DB_HADDR_INVALID(fwd)
          && (fwd != DB_HADDR_FREE
            || db_hash_len > db_hash_used)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has a broken free list head of %#x",
                    DCC_NM2PATH(db_hash_nm), fwd);
            return 0;
      }
      bak = DB_HADDR_EX(db_sts.hash_ctl.d.h->bak);
      if (DB_HADDR_INVALID(bak)
          && (bak != DB_HADDR_FREE
            || db_hash_len > db_hash_used)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has a broken free list tail of %#x",
                    DCC_NM2PATH(db_hash_nm), bak);
            return 0;
      }

      /* fetch number of hash table entries used in existing file */
      if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash)) {
            return 0;
      }
      stored_hash_len = DB_HADDR_EX(db_sts.hash.d.h->HASH_STORE_LEN);
      if (db_hash_len != stored_hash_len) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has %d entries but claims %d",
                    DCC_NM2PATH(db_hash_nm),
                    db_hash_len, stored_hash_len);
            return 0;
      }
      db_hash_used = DB_HADDR_EX(db_sts.hash.d.h->HASH_STORE_USED);
      if (db_hash_used < DB_HADDR_MIN) {
            if (db_hash_used == 0)
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s was not closed cleanly",
                          DCC_NM2PATH(db_hash_nm));
            else
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s contains an impossible %d entries",
                          DCC_NM2PATH(db_hash_nm),
                          HASH_LEN_EXT(db_hash_used));
            return 0;
      }
      if (db_hash_used >= db_hash_len) {
            if (db_hash_used > db_hash_len)
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s contains only %d entries but %d used",
                          DCC_NM2PATH(db_hash_nm),
                          HASH_LEN_EXT(db_hash_len),
                          HASH_LEN_EXT(db_hash_used));
            else
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s is filled with %d entries",
                          DCC_NM2PATH(db_hash_nm),
                          HASH_LEN_EXT(db_hash_len));
            return 0;
      }
      db_csize_stored_hash = DB_HPTR_EX(db_sts.hash.d.h->HASH_STORE_DB_CSIZE);
      if (db_csize_stored_hash != db_csize
          && hash_fsize != 0) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s contains "L_DPAT" bytes"
                    " instead of the "L_DPAT" that %s claims",
                    db_nm, db_csize,
                    db_csize_stored_hash, DCC_NM2PATH(db_hash_nm));
            return 0;
      }

      return 1;
}



/* open the files and generally get ready to work */
u_char                              /* 0=failed, 1=ok */
db_open(DCC_EMSG emsg,
      const char *new_db_nm,
      DB_HADDR new_hash_len,        /* 0 or # of entries */
      u_char mode)                  /* DB_OPEN_* */
{
      u_int cur_page_size;
      int hash_flags, db_open_flags;
      struct stat db_sb;
      DB_PTR window;

      db_close(0, 1);
      db_failed = 1;

      memset(&db_stats, 0, sizeof(db_stats));

      if (!new_db_nm && db_nm[0] == '\0')
            new_db_nm = grey_on ? DB_GREY_NAME : DB_DCC_NAME;
      if (new_db_nm) {
            if (strlen(new_db_nm) >= (sizeof(DCC_PATH)
                                    - sizeof(DB_HASH_SUFFIX))) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "invalid DB nm \"%s\"", new_db_nm);
                  return 0;
            }
            strcpy(db_nm, new_db_nm);
            strcpy(db_hash_nm, new_db_nm);
            strcat(db_hash_nm, DB_HASH_SUFFIX);
      }

      db_rdonly = (mode & DB_OPEN_RDONLY) != 0;
      db_no_mmap = !db_rdonly && (mode & DB_OPEN_NO_MMAP) != 0;

      if (mode & DB_OPEN_LOCK_EXT) {
            if (new_hash_len) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "extending db_open(%s) without locking",
                          DCC_NM2PATH(db_nm));
                  return 0;
            }
            if (!db_rdonly) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "db_open(%s) read/write without locking",
                          DCC_NM2PATH(db_nm));
                  return 0;
            }
            db_open_flags = O_RDONLY;
            hash_flags = O_RDONLY;
      } else {
            db_open_flags = O_RDWR;
            if (new_hash_len) {
                  if (db_rdonly) {
                        dcc_pemsg(EX_SOFTWARE, emsg,
                                "db_open(%s) creating read-only",
                                DCC_NM2PATH(db_nm));
                        return 0;
                  }
                  hash_flags = O_RDWR | O_CREAT;
            } else {
                  /* must open the file read/write to lock it */
                  hash_flags = O_RDWR;
            }
      }

      db_fd = dcc_lock_open(emsg, db_nm, db_open_flags,
                        (mode & DB_OPEN_LOCK_NOWAIT)
                        ? DCC_LOCK_OPEN_NOWAIT
                        : (mode & DB_OPEN_LOCK_EXT)
                        ? DCC_LOCK_OPEN_EXT
                        : DCC_LOCK_OPEN_WAIT,
                        DCC_LOCK_ALL_FILE, 0);
      if (db_fd == -1) {
            db_close(0, -1);
            return 0;
      }
      gettimeofday(&db_locked, 0);
      if (0 > fstat(db_fd, &db_sb)) {
            dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s",
                    DCC_NM2PATH(db_nm), ERROR_STR());
            db_close(0, -1);
            return 0;
      }
      db_csize = db_fsize = db_sb.st_size;
      if (db_fsize < ISZ(DB_MAGIC)) {
            dcc_pemsg(EX_IOERR, emsg,
                    "%s with %d bytes is too small to be a DCC database",
                    DCC_NM2PATH(db_nm), (int)db_fsize);
            db_close(0, -1);
            return 0;
      }

      /* check the header of the database file */
      db_buf_init(0);
      if (!map_db(emsg, 0, sizeof(DB_MAGIC), &db_sts.rcd_magic)) {
            db_close(0, -1);
            return 0;
      }
      if (memcmp(db_sts.rcd_magic.d.magic->s.version, version_buf,
               sizeof(version_buf))) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s contains the wrong magic string \"%.*s\"",
                    DCC_NM2PATH(db_nm),
                    ISZ(db_sts.rcd_magic.d.magic->s.version),
                    db_sts.rcd_magic.d.magic->s.version);
            db_close(0, -1);
            return 0;
      }
      db_flags = db_sts.rcd_magic.d.magic->s.flags;
      if (!(db_flags & DB_MAGIC_ST_GREY) != !grey_on) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s is%s a greylist database but must%s be",
                    DCC_NM2PATH(db_nm),
                    (db_flags & DB_MAGIC_ST_GREY) ? "" : " not",
                    grey_on ? "" : " not");
            db_close(0, -1);
            return 0;
      }
      memcpy(db_sn, db_sts.rcd_magic.d.magic->s.sn, sizeof(db_sn));
      memcpy(db_sn_stored, db_sn, sizeof(db_sn_stored));
      memcpy(db_ex_ts, db_sts.rcd_magic.d.magic->s.ex_ts,
             sizeof(db_ex_ts));
      memcpy(&db_ex_secs, &db_sts.rcd_magic.d.magic->s.ex_secs,
             sizeof(db_ex_secs));
      memcpy(&db_ex_secs_stored, &db_sts.rcd_magic.d.magic->s.ex_secs,
             sizeof(db_ex_secs_stored));
      db_nokeep_cks_stored = db_sts.rcd_magic.d.magic->s.nokeep_cks;
      db_nokeep_cks = db_nokeep_cks_stored;
      DB_SET_NOKEEP(db_nokeep_cks, DCC_CK_INVALID);
      DB_SET_NOKEEP(db_nokeep_cks, DCC_CK_FLOD_PATH);
      memcpy(db_flod_tholds_stored, db_sts.rcd_magic.d.magic->s.flod_tholds,
             sizeof(db_flod_tholds_stored));
      memcpy(db_flod_tholds, db_flod_tholds_stored,
             sizeof(db_flod_tholds));
      db_ck_fuzziness = grey_on ? grey_ck_fuzziness : dcc_ck_fuzziness;

      db_csize = db_csize_stored_db = db_sts.rcd_magic.d.magic->s.db_csize;
      if (db_csize < sizeof(DB_MAGIC)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s says it contains "L_DPAT" bytes"
                    " or fewer than the minimum of %d",
                    DCC_NM2PATH(db_nm), db_csize, ISZ(DB_MAGIC));
            /* that is a fatal error if we are not rebuilding */
            if (new_hash_len != 0) {
                  db_close(0, -1);
                  return 0;
            }
      }
      if (db_csize > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s says it contains "L_DPAT" bytes"
                    " or more than the actual size of "OFF_DPAT,
                    DCC_NM2PATH(db_nm), db_csize, db_fsize);
            /* that is a fatal error if we are not rebuilding */
            if (new_hash_len != 0) {
                  db_close(0, -1);
                  return 0;
            }
      }

      cur_page_size = db_sts.rcd_magic.d.magic->s.page_size;
      db_rel_state(&db_sts.rcd_magic);

      /* The buffer or page size we use must be the page size used to
       * write the files.  Try to change our size to match the file */
      if (cur_page_size != db_page_size) {
            db_invalidate = 1;
            rel_db_states(0);
            if (!db_unload(emsg)) {
                  db_close(0, -1);
                  return 0;
            }
            db_invalidate = 0;
            if (!db_buf_init(cur_page_size)) {
                  dcc_error_msg("%s has page size %d"
                              " incompatible with %d in %s",
                              DCC_NM2PATH(db_nm),
                              cur_page_size, db_get_page_size(0, 0),
                              db_hash_nm);
                  db_close(0, -1);
                  return 0;
            }
      }

      db_csize_stored_hash = 0;
      db_hash_len = 0;
      db_hash_fd = open(db_hash_nm, hash_flags, 0666);
      if (db_hash_fd < 0) {
            dcc_pemsg(EX_IOERR, emsg, "open(%s): %s",
                    DCC_NM2PATH(db_hash_nm), ERROR_STR());
            db_close(0, -1);
            return 0;
      }
      if (0 > fcntl(db_hash_fd, F_SETFD, FD_CLOEXEC)) {
            dcc_pemsg(EX_IOERR, emsg, "fcntl(%s, FD_CLOEXEC): %s",
                    DCC_NM2PATH(db_hash_nm), ERROR_STR());
            db_close(0, -1);
            return 0;
      }

      if (new_hash_len != 0) {
            if (!make_new_hash(emsg, new_hash_len)) {
                  db_close(0, -1);
                  return 0;
            }
      } else {
            if (!check_old_hash(emsg)) {
                  db_close(0, -1);
                  return 0;
            }
      }

      db_end_pg_num = db_fsize / db_page_size;
      if (db_fsize % db_page_size != 0) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has size "OFF_HPAT","
                    " not a multiple of its page size of %#x",
                    DCC_NM2PATH(db_nm), db_fsize, db_page_size);
            db_close(0, -1);
            return 0;
      }
      /* Fill the last page of the database with zeros in case
       * the length was wrong.
       * That is possible only if the length is wrong by less than a page. */
      if ((DB_PTR)db_fsize > db_csize + db_page_size
          || db_csize > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has size "OFF_HPAT" but claims "L_HPAT,
                    DCC_NM2PATH(db_nm), db_fsize, db_csize);
            db_close(0, -1);
            return 0;
      }
      if (!db_rdonly && (DB_PTR)db_fsize > db_csize) {
            if (!map_db(emsg, db_csize, db_fsize - db_csize, &db_sts.rcd)) {
                  db_close(0, -1);
                  return 0;
            }
            memset(db_sts.rcd.d.r, 0, db_fsize - db_csize);
            db_sts.rcd.b->flags |= DB_BUF_FG_MSYNC;
      }

      /* write new sizes and other parameters.
       * This should be a NOP if the file is read only,
       * but try it just in case. */
      if (!db_flush_magic(emsg)) {
            db_close(0, -1);
            return 0;
      }

      window = db_page_size*db_buf_total;
      if (window >= (1024*1024)) {
            snprintf(db_window_size, sizeof(db_window_size),
                   "%d MByte window",
                   (int)(window / (1024*1024)));
      } else {
            snprintf(db_window_size, sizeof(db_window_size),
                   "%d KByte window",
                   (int)(window / 1024));
      }
      rel_db_states(0);
      db_failed = 0;
      return 1;
}



/* get a free buffer for a chunk of either the hash table or database files */
PSTATIC DB_BUF *
get_free_buf(DCC_EMSG emsg, DB_BUF **bh)
{
      DB_BUF *b;

      /* Look for an unlocked buffer.
       * We know there is one because we have more buffers than
       * can be locked simultaneously. */
      b = buf_oldest;
      for (;;) {
            if (!b) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "broken DB buffer MRU chain");
                  return 0;
            }
            if (!b->lock_cnt)
                  break;
            b = b->newer;
      }

      /* Found an unlocked buffer.
       * Unlink it from its hash chain. */
      if (b->fwd)
            b->fwd->bak = b->bak;
      if (b->bak)
            b->bak->fwd = b->fwd;
      else if (b->hash)
            *b->hash = b->fwd;
      if (b->type != DB_BUF_TYPE_FREE) {
            if (!buf_munmap(emsg, b))
                  return 0;
      }

      /* put it on the new hash chain */
      b->bak = 0;
      b->hash = bh;
      b->fwd = *bh;
      *bh = b;
      if (b->fwd)
            b->fwd->bak = b;

      return b;
}



PSTATIC DB_BUF *
find_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_PG_NUM pg_num)
{
      DB_BUF *b, **bh;

      bh = DB_BUF_HASH(pg_num);
      b = *bh;
      for (;;) {
            if (!b) {
                  /* we ran off the end of the buffer hash chain,
                   * so get a free buffer */
                  b = get_free_buf(emsg, bh);
                  if (!b)
                        return 0;
                  b->type = type;
                  b->pg_num = pg_num;
                  break;
            }
            if (b->type == type
                && b->pg_num == pg_num)
                  break;            /* found the buffer we need */

            b = b->fwd;
      }

      /* make the buffer newest */
      if (buf_newest != b) {
            /* unlink it */
            b->newer->older = b->older;
            if (b->older)
                  b->older->newer = b->newer;
            else
                  buf_oldest = b->newer;
            /* insert it at the head of the MRU list */
            b->newer = 0;
            b->older = buf_newest;
            buf_newest->newer = b;
            buf_newest = b;
      }
      if (!db_rdonly)
            b->flags |= DB_BUF_FG_DIRTY;
      return b;
}



PSTATIC DB_BUF *
find_st_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_STATE *st, DB_PG_NUM pg_num)
{
      DB_BUF *b;

      /* release previous buffer unless it is the right one */
      b = st->b;
      if (b) {
            if (b->pg_num == pg_num
                && b->type == type)
                  return b;   /* already have the target buffer */

            st->b = 0;
            st->d.p = 0;
            if (--b->lock_cnt == 0) {
                  if (!DB_IS_LOCKED()) {
                        if (!buf_munmap(emsg, b))
                              return 0;
                  }
            } else if (b->lock_cnt < 0) {
                  dcc_logbad(EX_SOFTWARE,
                           "negative database buffer lock");
            }
      }

      /* look for the buffer */
      b = find_buf(emsg, type, pg_num);
      if (!b)
            return 0;
      if (!b->buf.v) {
            /* fill it if it did not exist */
            if (!buf_mmap(emsg, b, pg_num)) {
                  b->type = DB_BUF_TYPE_FREE;
                  b->pg_num = -1;
                  return 0;
            }
            if (type == DB_BUF_TYPE_DB)
                  ++db_stats.db_mmaps;
            else
                  ++db_stats.hash_mmaps;
      }
      ++b->lock_cnt;
      st->b = b;
      st->d.p = 0;
      return b;
}



PSTATIC u_char
buf_msync(DCC_EMSG emsg, DB_BUF *b)
{
#ifdef MADV_FREE
      static u_char madvise_ok = 1;
#endif
      off_t offset;
      char *p;
      int fd, i;

      if (!(b->flags & DB_BUF_FG_DIRTY))
            return 1;


      if (db_invalidate) {
            b->flags &= ~(DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
            return 1;
      }

      if (b->flags & DB_BUF_FG_NO_MMAP) {
            if (b->flags & DB_BUF_FG_ANON) {
                  p = b->buf.v;
            } else {
                  /* at least FreeBSD fails writing to the file that
                   * underlies a mmap() region from that region */
                  static char *wbuf;
                  static u_int wbuf_len;

                  if (wbuf_len != db_page_size) {
                        wbuf_len = db_page_size;
                        if (wbuf)
                              free(wbuf);
                        wbuf = malloc(db_page_size);
                  }
                  p = wbuf;
                  memcpy(p, b->buf.v, db_page_size);
            }

            fd = (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd;
            offset = (off_t)b->pg_num * (off_t)db_page_size;
            if (offset != lseek(fd, offset, SEEK_SET)) {
                  dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"OFF_HPAT"): %s",
                          B2PATH(b), offset, ERROR_STR());
                  db_failed = 1;
                  return 0;
            }
            i = write(fd, p, db_page_size);
            if (i != (int)db_page_size) {
                  dcc_pemsg(EX_IOERR, emsg, "write(%s)=%d: %s",
                          B2PATH(b), i, ERROR_STR());
                  db_failed = 1;
                  return 0;
            }
#ifdef MADV_FREE
            if (madvise_ok
                && 0 > madvise(b->buf.v, db_page_size, MADV_FREE)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_FREE): %s",
                                    ERROR_STR());
                  madvise_ok = 0;
            }
#endif

#ifndef HAVE_OLD_MSYNC
      } else {
            if ((b->flags & DB_BUF_FG_MSYNC)
                || DB_IS_TIME(msync_time, MSYNC_TIME)) {
                  msync_time = db_time.tv_sec + MSYNC_TIME;
                  if (0 > MSYNC(b->buf.v, db_page_size, MS_ASYNC)) {
                        dcc_pemsg(EX_IOERR, emsg,
                                "msync(db buffer %#lx,%#x): %s",
                                (long)b->buf.v, db_page_size,
                                ERROR_STR());
                        db_failed = 1;
                        return 0;
                  }
            }
#endif
      }

      b->flags &= ~(DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      return 1;
}



PSTATIC u_char
buf_munmap(DCC_EMSG emsg, DB_BUF *b)
{
      u_char result;

      if (b->lock_cnt != 0)
            dcc_logbad(EX_SOFTWARE, "unmapping locked DB buffer");

      result = buf_msync(emsg, b);
      if (!result) {
            emsg = 0;
            db_failed = 1;
      }

      if (0 > munmap(b->buf.v, db_page_size)) {
            dcc_pemsg(EX_IOERR, emsg, "munmap(%s,%d): %s",
                    B2PATH(b), db_page_size, ERROR_STR());
            db_failed = 1;
            result = 0;
      }
      b->buf.v = 0;

      b->pg_num = -1;
      b->type = DB_BUF_TYPE_FREE;
      b->flags = 0;

      return result;
}



PSTATIC u_char
buf_mmap(DCC_EMSG emsg, DB_BUF *b, DB_PG_NUM pg_num)
{
#ifndef HAVE_OLD_MSYNC
#ifdef MADV_RANDOM
      static u_char madv_random_ok = 1;
#endif
#ifdef MADV_WILLNEED
      static u_char madv_willneed_ok = 1;
#endif
#endif
      int flags;
      off_t offset;
      void *p;


      if (db_no_mmap
          && (b->type == DB_BUF_TYPE_HASH
            || pg_num >= db_end_pg_num)) {
            /* If there is enough RAM to avoid thrashing
             * or if this is a hash table page that we will probably
             *    be changing
             * or if this is the current data page whose hash table
             *    entries are being rebuilt,
             * then read and write entire buffers instead of letting
             * the Solaris virtual memory system do it.
             * Solaris will bog the system down doing nothing but
             * flushing dirty pages mmap() */
            b->flags |= DB_BUF_FG_NO_MMAP;
            flags = MAP_PRIVATE;
      } else {
#ifdef MAP_NOSYNC
            flags = (MAP_SHARED | MAP_NOSYNC);
#else
            flags = MAP_SHARED;
#endif
      }
      offset = (off_t)pg_num * (off_t)db_page_size;
      p = mmap(0, db_page_size,
             db_rdonly ? PROT_READ : (PROT_READ | PROT_WRITE),
             flags,
             (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd,
             offset);

      if (p == MAP_FAILED) {
            dcc_pemsg(EX_IOERR, emsg, "mmap(%s,%#x,"OFF_HPAT"): %s",
                    B2PATH(b), db_page_size, offset, ERROR_STR());
            return 0;
      }

#ifndef HAVE_OLD_MSYNC
      if ((b->flags & DB_BUF_FG_NO_MMAP)
          || (DB_PTR)(db_fsize+hash_fsize) < db_max_rss)  {
#ifdef MADV_WILLNEED
            /* Tell the kernel to keep entire buffers in RAM if
             * we have plenty */
            if (madv_willneed_ok
                && 0 > madvise(p, db_page_size, MADV_WILLNEED)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_WILLNEED): %s",
                                    ERROR_STR());
                  madv_willneed_ok = 0;
            }
#endif

      } else if ((DB_PTR)db_fsize >= db_max_rss) {
#ifdef MADV_RANDOM
            /* Tell the kernel to not read entire buffers if we are short
             * of RAM.  Let it read-ahead and try to fill buffers if we
             * hope to keep the whole database in RAM. */
            if (madv_random_ok
                && 0 > madvise(p, db_page_size, MADV_RANDOM)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_RANDOM): %s",
                                    ERROR_STR());
                  madv_random_ok = 0;
            }
#endif
      }
#endif /* !HAVE_OLD_MSYNC */

      b->buf.v = p;
      return 1;
}



/* mmap() a hash table entry */
PSTATIC u_char
map_hash(DCC_EMSG emsg,
       DB_HADDR haddr,        /* this entry */
       DB_STATE *st)                /* point this to the entry */
{
      DB_PG_NUM pg_num;
      DB_PG_OFF pg_off;
      DB_BUF *b;

      if (haddr >= db_hash_len) {
            dcc_pemsg(EX_DATAERR, emsg, "invalid hash address %#x",
                    haddr);
            return 0;
      }

      pg_num = haddr / db_hash_page_len;
      pg_off = haddr % db_hash_page_len;

      b = find_st_buf(emsg, DB_BUF_TYPE_HASH, st, pg_num);
      if (!b)
            return 0;
      st->s.haddr = haddr;
      st->d.h = &b->buf.h[pg_off];
      return 1;
}



/* unlink a hash table entry from the free list */
PSTATIC u_char
unlink_free_hash(DCC_EMSG emsg,
             DB_STATE *hash_st,     /* remove this from the free list */
             DB_STATE *tmp_st)
{
      DB_HADDR fwd, bak;

      if (!db_make_dirty(emsg))
            return 0;

      fwd = DB_HADDR_EX(hash_st->d.h->fwd);
      bak = DB_HADDR_EX(hash_st->d.h->bak);
      if (!HE_IS_FREE(hash_st->d.h)
          || (DB_HADDR_INVALID(fwd) && fwd != DB_HADDR_FREE)
          || (DB_HADDR_INVALID(bak) && bak != DB_HADDR_FREE)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "bad hash free list entry at %#x", hash_st->s.haddr);
            return 0;
      }

      if (!map_hash(emsg, fwd, tmp_st))
            return 0;
      if (DB_HADDR_EX(tmp_st->d.h->bak) != hash_st->s.haddr) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "free %#x --> bad-free %#x", hash_st->s.haddr, fwd);
            return 0;
      }
      DB_HADDR_CP(tmp_st->d.h->bak, bak);

      if (!map_hash(emsg, bak, tmp_st))
            return 0;
      if (DB_HADDR_EX(tmp_st->d.h->fwd) != hash_st->s.haddr) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "bad free %#x <-- free %#x", bak, hash_st->s.haddr);
            return 0;
      }
      DB_HADDR_CP(tmp_st->d.h->fwd, fwd);

      DB_HADDR_CP(hash_st->d.h->fwd, DB_HADDR_NULL);
      DB_HADDR_CP(hash_st->d.h->bak, DB_HADDR_NULL);
      ++db_hash_used;
      return 1;
}



/* get a free hash table entry and leave db_sts.free pointing to it */
PSTATIC u_char                      /* 0=failed, 1=got it */
get_free_hash(DCC_EMSG emsg,
            DB_HADDR result)        /* try near here */
{
      DB_HADDR pg_lim;
      int i;

      if (db_hash_len <= db_hash_used) {
            dcc_pemsg(EX_SOFTWARE, emsg, "no free hash table entry;"
                    " %d of %d used", db_hash_used, db_hash_len);
            return 0;
      }

      /* look near the target
       * Try hard because going off the page is so expensive that it
       * justifies plenty of time here.*/
      if (result != DB_HADDR_NULL) {
            pg_lim = (result - (result % db_hash_page_len)
                    + db_hash_page_len-1);
            for (i = 0; i < 50; ++i) {
                  if (!map_hash(emsg, result, &db_sts.free))
                        return 0;
                  if (HE_IS_FREE(db_sts.free.d.h))
                        return unlink_free_hash(emsg, &db_sts.free,
                                          &db_sts.tmp);
                  if (++result >= pg_lim)
                        result -= db_hash_page_len-1-DB_HADDR_MIN;
            }
      }

      /* then try the free list */
      if (!map_hash(emsg, DB_HADDR_FREE, &db_sts.free))
            return 0;
      result = DB_HADDR_EX(db_sts.free.d.h->fwd);
      if (DB_HADDR_INVALID(result)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "broken hash free list head of %#x", result);
            return 0;
      }
      if (!map_hash(emsg, result, &db_sts.free))
            return 0;
      return unlink_free_hash(emsg, &db_sts.free, &db_sts.tmp);
}



/* mmap() a database entry
 *    We assume that no database entry spans buffers,
 *    and that there are enough buffers to accomodate all possible
 *    concurrent requests. */
PSTATIC u_char
map_db(DCC_EMSG emsg,
       DB_PTR rptr,                 /* address of the record */
       u_int tgt_len,               /* its length */
       DB_STATE *st)                /* point this to the record */
{
      DB_PG_NUM pg_num;
      DB_PG_OFF pg_off;
      DB_BUF *b;

      if (rptr+tgt_len > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid database address "L_HPAT" or length %d"
                    " past db_fsize "OFF_HPAT" in %s",
                    rptr, tgt_len, db_fsize, DCC_NM2PATH(db_nm));
            db_failed = 1;
            return 0;
      }

      pg_num = rptr / db_page_size;
      pg_off = rptr % db_page_size;

      /* do not go past the end of a buffer */
      if (tgt_len+pg_off > db_page_size) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid database address "L_HPAT
                    " or length %#x in %s",
                    rptr, tgt_len, DCC_NM2PATH(db_nm));
            db_failed = 1;
            return 0;
      }

      b = find_st_buf(emsg, DB_BUF_TYPE_DB, st, pg_num);
      if (!b)
            return 0;
      st->s.rptr = rptr;
      st->d.r = (DB_RCD *)&b->buf.c[pg_off];
      return 1;
}



u_char                              /* 0=failed, 1=got it */
db_map_rcd(DCC_EMSG emsg,
         DB_STATE *rcd_st,          /* point this to the record */
         DB_PTR rptr,               /* that is here */
         u_int *rcd_lenp)           /* put its length here */
{
      u_int rcd_len;

      if (DB_PTR_IS_BAD(rptr)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "getting bogus record at "L_HPAT", in %s",
                    rptr, DCC_NM2PATH(db_nm));
            return 0;
      }

      if (!map_db(emsg, rptr, DB_RCD_PAD, rcd_st))
            return 0;

      rcd_len = (DB_RCD_PAD
               + (sizeof(rcd_st->d.r->cks[0]) * DB_NUM_CKS(rcd_st->d.r)));

      if (&rcd_st->d.c[rcd_len] > &rcd_st->b->buf.c[db_page_size]) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid checksum count %d at "L_HPAT" in %s",
                    DB_NUM_CKS(rcd_st->d.r), rptr, DCC_NM2PATH(db_nm));
            return 0;
      }

      if (rcd_lenp)
            *rcd_lenp = rcd_len;
      return 1;
}



/* write the new sizes of the files into the files */
PSTATIC u_char
db_flush_len(DCC_EMSG emsg)
{
      u_char result = 1;

      if (db_hash_fd != -1
          && db_csize_stored_hash != db_csize) {
            if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash_ctl)) {
                  result = 0;
            } else {
                  DB_HPTR_CP(db_sts.hash_ctl.d.h->HASH_STORE_DB_CSIZE,
                           db_csize);
                  db_csize_stored_hash = db_csize;
            }
      }

      if (db_fd != -1
          && db_csize_stored_db != db_csize) {
            if (!map_db(emsg, 0, sizeof(DB_MAGIC), &db_sts.rcd_magic)) {
                  result = 0;
            } else {
                  db_sts.rcd_magic.d.magic->s.db_csize = db_csize;
                  db_csize_stored_db = db_csize;
                  db_sts.rcd_magic.b->flags |= DB_BUF_FG_MSYNC;
            }
      }

      return result;
}



/* write the database parameters into the magic number headers of the files */
u_char
db_flush_magic(DCC_EMSG emsg)
{
      if (!db_flush_len(emsg))
            return 0;

      if (db_fd == -1)
            return 1;

      if (db_nokeep_cks != db_nokeep_cks_stored
          || memcmp(&db_ex_secs, &db_ex_secs_stored,
                  sizeof(db_ex_secs))
          || memcmp(db_flod_tholds, db_flod_tholds_stored,
                  sizeof(db_flod_tholds))
          || memcmp(db_sn, db_sn_stored, sizeof(db_sn))) {

            if (!map_db(emsg, 0, sizeof(DB_MAGIC), &db_sts.rcd_magic))
                  return 0;

            db_sts.rcd_magic.d.magic->s.page_size = db_page_size;

            memcpy(&db_sts.rcd_magic.d.magic->s.ex_secs,&db_ex_secs,
                   sizeof(db_sts.rcd_magic.d.magic->s.ex_secs));
            memcpy(&db_ex_secs_stored, &db_ex_secs,
                   sizeof(db_ex_secs_stored));

            db_sts.rcd_magic.d.magic->s.nokeep_cks = db_nokeep_cks;
            db_nokeep_cks_stored = db_nokeep_cks;

            memcpy(db_sts.rcd_magic.d.magic->s.flod_tholds,
                   db_flod_tholds,
                   sizeof(db_sts.rcd_magic.d.magic->s.flod_tholds));
            memcpy(db_flod_tholds_stored,
                   db_flod_tholds,
                   sizeof(db_flod_tholds_stored));

            memcpy(db_sts.rcd_magic.d.magic->s.sn, db_sn,
                   sizeof(db_sts.rcd_magic.d.magic->s.sn));
            memcpy(db_sn_stored, db_sn,
                   sizeof(db_sn_stored));

            db_sts.rcd_magic.b->flags |= DB_BUF_FG_MSYNC;
      }

      return 1;
}



/* find a checksum type known to be in a record */
DB_RCD_CK *                   /* 0=it's not there */
db_map_rcd_ck(DCC_EMSG emsg,
            DB_STATE *rcd_st,       /* point this to the record */
            DB_PTR rptr,            /* that is here */
            DCC_CK_TYPES type)      /* find this type of checksum */
{
      DB_RCD_CK *rcd_ck;
      int i;

      if (!db_map_rcd(emsg, rcd_st, rptr, 0))
            return 0;

      rcd_ck = rcd_st->d.r->cks;
      i = DB_NUM_CKS(rcd_st->d.r);
      if (i > DCC_NUM_CKS) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "impossible %d checksums in "L_HPAT" in %s",
                    i, rptr, DCC_NM2PATH(db_nm));
            return 0;
      }

      for (; i != 0; --i, ++rcd_ck) {
            if (DB_CK_TYPE(rcd_ck) == type)
                  return rcd_ck;
      }

      dcc_pemsg(EX_DATAERR, emsg, "missing \"%s\" checksum in "L_HPAT" in %s",
              dcc_type2str_err(type, 0, 1),
              rptr, DCC_NM2PATH(db_nm));
      return 0;
}



DB_HADDR
db_hash(DCC_CK_TYPES type, const DCC_SUM sum)
{
      u_long accum;
      DB_HADDR haddr;

      accum = type;
      accum += (sum[0]<<24)+(sum[1]<<16)+(sum[2]<<8)+sum[3];
      accum += (sum[4]<<24)+(sum[5]<<16)+(sum[6]<<8)+sum[7];
      accum += (sum[8]<<24)+(sum[9]<<16)+(sum[10]<<8)+sum[11];
      accum += (sum[12]<<24)+(sum[13]<<16)+(sum[14]<<8)+sum[15];
      haddr = mhash(accum, db_hash_len);
      if (haddr < DB_HADDR_MIN)
            haddr = DB_HADDR_MIN;
      return haddr;
}



/* look for a checksum in the hash table
 *    return with not-found, the home slot, or the last entry on
 *    the collision chain */
DB_FOUND
db_lookup(DCC_EMSG emsg, DCC_CK_TYPES type, const DCC_SUM sum,
        DB_HADDR lo,                /* postpone if out of this window */
        DB_HADDR hi,
        DB_STATE *hash_st,          /* hash block for record or related */
        DB_STATE *rcd_st,           /* put the record or garbage here */
        DB_RCD_CK **prcd_ck)        /* point to cksum if found */
{
      DB_HADDR haddr, haddr1;
      DB_PTR db_ptr;
      DB_RCD_CK *found_ck;
      int failsafe;

      haddr = db_hash(type, sum);
      if (haddr < lo || haddr > hi) {
            if (lo == 0 && hi == MAX_HASH_ENTRIES) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "out of range hash address");
                  return DB_FOUND_SYSERR;
            }
            return DB_FOUND_LATER;
      }

      if (prcd_ck)
          *prcd_ck = 0;

      if (!map_hash(emsg, haddr, hash_st))
            return DB_FOUND_SYSERR;

      if (HE_IS_FREE(hash_st->d.h))
            return DB_FOUND_EMPTY;

      if (!DB_HADDR_C_NULL(hash_st->d.h->bak))
            return DB_FOUND_INTRUDER;

      /* We know that the current hash table entry is in its home slot.
       * It might be for the key or checksum we are looking for
       * or it might be for some other checksum with the same hash value. */
      for (failsafe = db_hash_len; failsafe >=0; --failsafe) {
            if (HE_CMP(hash_st->d.h, type, sum)) {
                  /* This hash table entry could be for our target
                   * checksum.  Read the corresponding record so we
                   * decide whether we have a hash collision or we
                   * have found a record containing our target checksum.
                   *
                   * find right type of checksum in the record */
                  db_ptr = DB_HPTR_EX(hash_st->d.h->rcd);
                  found_ck = db_map_rcd_ck(emsg, rcd_st, db_ptr, type);
                  if (!found_ck)
                        return DB_FOUND_SYSERR;
                  if (!memcmp(sum, found_ck->sum,
                            sizeof(DCC_SUM))) {
                        if (prcd_ck)
                              *prcd_ck = found_ck;
                        return DB_FOUND_IT;
                  }
            }

            /* This DB record was a hash collision, or for a checksum
             * other than our target.
             * Fail if this is the end of the hash chain */
            haddr1 = DB_HADDR_EX(hash_st->d.h->fwd);
            if (haddr1 == DB_HADDR_NULL)
                  return DB_FOUND_CHAIN;

            if (DB_HADDR_INVALID(haddr1)) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "broken hash chain fwd-link %#x at %#x in %s",
                           haddr1, haddr, DCC_NM2PATH(db_hash_nm));
                  return DB_FOUND_SYSERR;
            }

            if (!map_hash(emsg, haddr1, hash_st))
                  return DB_FOUND_SYSERR;

            if (DB_HADDR_EX(hash_st->d.h->bak) != haddr) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "broken hash chain back-link"
                          " %#x<--%#x instead of %#x<--%#x in %s",
                          DB_HADDR_EX(hash_st->d.h->bak), haddr1,
                          haddr, haddr1, DCC_NM2PATH(db_hash_nm));
                  return DB_FOUND_SYSERR;
            }
            haddr = haddr1;
      }
      dcc_pemsg(EX_DATAERR, emsg, "infinite hash chain at %#x in %s",
              haddr, DCC_NM2PATH(db_hash_nm));
      return DB_FOUND_SYSERR;
}



/* combine checksums */
DCC_TGTS
db_sum_ck(DCC_TGTS prev,            /* previous sum */
        DCC_TGTS new)               /* new value */
{
      DCC_TGTS res;

      /* This arithmetic must be commutative (after handling deleted
       * values), because inter-server flooding causes records to appear in
       * the database out of temporal order.
       *
       * DCC_TGTS_TOO_MANY can be thought of as a count of plus infinity.
       * DCC_TGTS_OK is like minus infinity.
       * DCC_TGTS_OK2 like half of minus infinity
       * DCC_TGTS_TOO_MANY (plus infinity) added to DCC_TGTS_OK (minus
       *    infinity) or DCC_TGTS_OK2 yields DCC_TGTS_OK or DCC_TGTS_OK2.
       *
       * Claims of not-spam from all clients are discarded as they arrive
       * and before here. They can only come from the local white list
       */

      if (new == DCC_TGTS_OK || prev == DCC_TGTS_OK) {
            res = DCC_TGTS_OK;
      } else if (new == DCC_TGTS_OK2 || prev == DCC_TGTS_OK2) {
            res = DCC_TGTS_OK2;
      } else if (new == DCC_TGTS_DEL) {
            res = prev;
      } else if (new == DCC_TGTS_TOO_MANY
               || prev == DCC_TGTS_TOO_MANY
               || ((res = prev+new) >= DCC_TGTS_TOO_MANY)) {
            res = DCC_TGTS_TOO_MANY;
      }
      return res;
}



/* delete reports */
static u_char                       /* 1=done, 0=broken database */
del_ck(DCC_EMSG emsg,
       DCC_TGTS *res,               /* residual targets after deletion */
       const DB_RCD *new,           /* delete reports older than this one */
       DCC_CK_TYPES type,           /* delete this type of checksum */
       DB_RCD_CK *prev_ck,          /* starting with this one */
       DB_STATE *prev_st)           /* use this scratch state block */
{
      DB_PTR prev;

      *res = 0;
      for (;;) {
            /* delete reports that are older than the delete request */
            if (DCC_TS_NEWER_TS(new->ts, prev_st->d.r->ts)
                && DB_RCD_ID(prev_st->d.r) != DCC_ID_WHITE) {
                  DB_TGTS_RCD_SET(prev_st->d.r, 0);
                  DB_TGTS_CK_SET(prev_ck, 0);

            } else {
                  /* sum reports that are not deleted */
                  *res = db_sum_ck(*res, DB_TGTS_RCD(prev_st->d.r));
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;
            prev_ck = db_map_rcd_ck(emsg, prev_st, prev, type);
            if (!prev_ck)
                  return 0;
      }
}



/* Mark reports made obsolete by a spam report
 *    A new report of spam make sufficiently old reports obsolete.
 *    Sufficiently recent existing reports make a new report obsolete,
 *    or at least not worth spending bandwidth to flood. */
PSTATIC u_char                      /* 1=done, 0=broken database */
db_obs_ck(DCC_EMSG emsg,
        const DB_RCD *new,
        DB_RCD_CK *new_ck,
        DCC_CK_TYPES type,          /* check this type of checksum */
        DB_RCD_CK *prev_ck,         /* starting with this one */
        DCC_TGTS prev_ck_tgts,
        DB_STATE *prev_st)          /* use this scratch state block */
{
      struct timeval tv;
      time_t secs;
      DCC_TS ts;
      int limit;
      DB_PTR prev;

      secs = db_ex_secs[type].all;
      if (secs > DCC_NEW_SPAM_SECS)
            secs = DCC_NEW_SPAM_SECS;
      dcc_ts2timeval(&tv, new->ts);
      dcc_timeval2ts(ts, &tv, -secs);

      limit = 100;
      for (;;) {
            /* preceding white listed entries make new entries obsolete */
            if (DB_RCD_ID(prev_st->d.r) == DCC_ID_WHITE) {
                  new_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            if (DB_CK_OBS(prev_ck)) {
                  /* don't look forever for recent existing report */
                  if (--limit == 0)
                        return 1;

            } else if (prev_ck_tgts != DCC_TGTS_TOO_MANY) {
                  /* mark this predecessor obsolete if it
                   * was before the checksum became spam */
                  prev_ck->type_fgs |= DB_CK_FG_OBS;

            } else if (DCC_TS_OLDER_TS(prev_st->d.r->ts, &ts)) {
                  /* this older predecessor is now obsolete */
                  prev_ck->type_fgs |= DB_CK_FG_OBS;
                  /* we're finished, because all older preceding reports
                   * were marked obsolete when it was inserted  */
                  return 1;

            } else {
                  /* this predecessor is recent, so it makes
                   * our new record obsolete. */
                  new_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;   /* it is a new report of spam */

            prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, prev, type);
            if (!prev_ck)
                  return 0;
            prev_ck_tgts = DB_TGTS_CK(prev_ck);
      }
}



/* mark extra server-ID declarations obsolete */
static u_char                       /* 1=done, 0=broken database */
srvr_id_ck(DCC_EMSG emsg,
         const DB_RCD *new,
         DB_RCD_CK *new_ck,
         DB_RCD_CK *prev_ck,        /* starting with this one */
         DB_STATE *prev_st)         /* use this scratch state block */
{
      DB_PTR prev;

      for (;;) {
            if (DB_RCD_ID(prev_st->d.r) == DB_RCD_ID(new)) {
                  /* keep newest server-ID declaration */
                  if (DCC_TS_NEWER_TS(prev_st->d.r, new->ts))
                        new_ck->type_fgs |= DB_CK_FG_OBS;
                  else
                        prev_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;

            prev_ck = db_map_rcd_ck(emsg, prev_st, prev, DCC_CK_SRVR_ID);
            if (!prev_ck)
                  return 0;
      }
}



/* Install pointers in the hash table for a record and fix the accumulated
 *    counts in the record pointed to by db_sts.rcd */
u_char                              /* 0=failed, 1=done */
db_link_rcd(DCC_EMSG emsg, DB_HADDR lo, DB_HADDR hi)
{
      DCC_TGTS res;
      DB_RCD *rcd;
      DB_RCD_CK *prev_ck;
      DB_RCD_CK *rcd_ck;
      DCC_CK_TYPES rcd_type;
      DCC_TGTS rcd_tgts, prev_ck_tgts;
      int ck_num;
      DB_HADDR haddr;

      if (!db_make_dirty(emsg))
            return 0;

      rcd = db_sts.rcd.d.r;
      rcd_tgts = DB_TGTS_RCD_RAW(rcd);
      rcd_ck = rcd->cks;
      ck_num = DB_NUM_CKS(rcd);
      if (ck_num > DIM(rcd->cks)) {
            dcc_pemsg(EX_SOFTWARE, emsg,
                    "bogus checksum count %#x at "L_HPAT" in %s",
                    rcd->fgs_num_cks, db_sts.rcd.s.rptr,
                    DCC_NM2PATH(db_nm));
            return 0;
      }
      for (; ck_num > 0; --ck_num, ++rcd_ck) {
            res = rcd_tgts;
            if (res == DCC_TGTS_DEL)
                  res = 0;
            /* avoid dirtying a mapped page if not necessary */
            if (rcd_ck->prev != DB_PTR_CP(DB_PTR_NULL))
                  rcd_ck->prev = DB_PTR_CP(DB_PTR_NULL);

            /* do not link or total some checksums unless they
             * are whitelist entries */
            rcd_type = DB_CK_TYPE(rcd_ck);
            if (DB_TEST_NOKEEP(db_nokeep_cks, rcd_type)
                && DB_RCD_ID(rcd) != DCC_ID_WHITE) {
                  DB_TGTS_CK_SET(rcd_ck, 1);
                  continue;
            }

            if (!DCC_CK_OK_DB(rcd_type)) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "invalid checksum type %s at "L_HPAT" in %s",
                          dcc_type2str_err(rcd_type, 0, 1),
                          db_sts.rcd.s.rptr, DCC_NM2PATH(db_nm));
                  return 0;
            }

            switch (db_lookup(emsg, rcd_type, rcd_ck->sum, lo, hi,
                          &db_sts.hash, &db_sts.rcd2, &prev_ck)) {
            case DB_FOUND_SYSERR:
                  return 0;

            case DB_FOUND_LATER:
                  continue;

            case DB_FOUND_IT:
                  /* We found the checksum
                   * Update the hash table to point to the new record */
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  rcd_ck->prev = DB_PTR_CP(db_sts.rcd2.s.rptr);
                  if (rcd_tgts == DCC_TGTS_DEL) {
                        /* delete predecessors to a delete request
                         * and compute the remaining sum */
                        if (!del_ck(emsg, &res, rcd, rcd_type,
                                  prev_ck, &db_sts.rcd2))
                              return 0;
                        /* delete requests are obsolete if the
                         * checksum is white-listed */
                        if (res == DCC_TGTS_OK
                            || res == DCC_TGTS_OK2)
                              rcd_ck->type_fgs |= DB_CK_FG_OBS;
                  } else {
                        /* Simple checksum with a predecessor
                         * This does not do the substantial extra work
                         * to notice delete requests that arrived early.
                         * That problem is handled by the incoming
                         * flooding duplicate report detection
                         * mechanism. */
                        prev_ck_tgts = DB_TGTS_CK(prev_ck);
                        if (DB_RCD_SUMRY(rcd))
                              res = prev_ck_tgts;
                        else
                              res = db_sum_ck(res, prev_ck_tgts);

                        if (res == DCC_TGTS_OK || res == DCC_TGTS_OK2
                            || (DB_RCD_ID(db_sts.rcd2.d.r)
                              == DCC_ID_WHITE)) {
                              /* obsolete white-listed checksums */
                              rcd_ck->type_fgs |= DB_CK_FG_OBS;

                        } else if (res == DCC_TGTS_TOO_MANY
                                 && !DB_CK_OBS(rcd_ck)) {
                              /* suppress unneeded reports of spam */
                              if (!db_obs_ck(emsg, rcd, rcd_ck,
                                           rcd_type,
                                           prev_ck, prev_ck_tgts,
                                           &db_sts.rcd2))
                                  return 0;

                        } else if (rcd_type == DCC_CK_SRVR_ID) {
                              if (!srvr_id_ck(emsg, rcd, rcd_ck,
                                          prev_ck, &db_sts.rcd2))
                                  return 0;
                        }
                  }
                  break;

            case DB_FOUND_EMPTY:
                  /* We found an empty hash table slot.
                   * Update the slot to point to our new record
                   * after removing it from the free list. */
                  if (!unlink_free_hash(emsg, &db_sts.hash, &db_sts.tmp))
                        return 0;
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum);
                  break;

            case DB_FOUND_CHAIN:
                  /* We found a hash collision, a chain of 1 or more
                   * records with the same hash value.
                   * Get a free slot, link it to the end of the chain,
                   * and point it to the record */
                  if (!get_free_hash(emsg, db_sts.hash.s.haddr))
                        return 0;
                  DB_HADDR_CP(db_sts.free.d.h->bak, db_sts.hash.s.haddr);
                  DB_HADDR_CP(db_sts.hash.d.h->fwd, db_sts.free.s.haddr);
                  DB_HPTR_CP(db_sts.free.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.free.d.h,rcd_type, rcd_ck->sum);
                  break;

            case DB_FOUND_INTRUDER:
                  /* The home hash slot for our key contains an
                   * intruder.  Find a place to put it. */
                  haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd);
                  if (haddr == DB_HADDR_NULL)
                        haddr = DB_HADDR_EX(db_sts.hash.d.h->bak);
                  if (!get_free_hash(emsg, haddr))
                        return 0;
                  /* Move the intruder */
                  *db_sts.free.d.h = *db_sts.hash.d.h;
                  /* re-link the neighbors of the intruder */
                  haddr = DB_HADDR_EX(db_sts.free.d.h->bak);
                  if (haddr == DB_HADDR_NULL) {
                        dcc_pemsg(EX_DATAERR, emsg,
                                "bad hash chain reverse link at %#x"
                                " in %s",
                                haddr, DCC_NM2PATH(db_hash_nm));
                        return 0;
                  }
                  if (!map_hash(emsg, haddr, &db_sts.tmp))
                        return 0;
                  DB_HADDR_CP(db_sts.tmp.d.h->fwd, db_sts.free.s.haddr);
                  haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd);
                  if (haddr != DB_HADDR_NULL) {
                        if (!map_hash(emsg, haddr, &db_sts.tmp))
                              return 0;
                        DB_HADDR_CP(db_sts.tmp.d.h->bak,
                                  db_sts.free.s.haddr);
                  }
                  /* install the new entry in its home slot */
                  DB_HADDR_CP(db_sts.hash.d.h->fwd, DB_HADDR_NULL);
                  DB_HADDR_CP(db_sts.hash.d.h->bak, DB_HADDR_NULL);
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum);
                  break;
            }

            /* Fix the checksum in the report.  Try not to write
             * in the buffer unless necessary to speed up dbclean */
            if (DB_TGTS_CK(rcd_ck) != res)
                  DB_TGTS_CK_SET(rcd_ck, res);
      }

      return db_flush_len(emsg);
}



/* Add a record to the database and the hash table
 *    The record must be known to be valid */
DB_PTR                              /* 0=failed */
db_add_rcd(DCC_EMSG emsg, DB_RCD *new_rcd)
{
      u_int new_rcd_len, pad_len;
      DB_PTR new_db_csize, new_db_fsize, rcd_pos, new_page_num;

      if (!db_make_dirty(emsg))
            return 0;

      new_rcd_len = (sizeof(*new_rcd)
                   - sizeof(new_rcd->cks)
                   + (DB_NUM_CKS(new_rcd) * sizeof(new_rcd->cks[0])));
      rcd_pos = db_csize;
      new_db_csize = rcd_pos+new_rcd_len;
      new_page_num = new_db_csize/db_page_size;

      /* advance rcd_pos with zero filler reports to get past
       * a page boundary */
      if (new_page_num != db_csize/db_page_size) {
            pad_len = new_page_num*db_page_size - db_csize;
            pad_len = ((pad_len + DB_RCD_PAD-1) / DB_RCD_PAD) * DB_RCD_PAD;
            rcd_pos = db_csize + pad_len;
            new_db_csize = rcd_pos + new_rcd_len;
            new_db_fsize = (new_page_num+1)*db_page_size;
            db_extended = 1;
            if (!db_extend(emsg, db_fd, db_nm, new_db_fsize, db_fsize))
                  return 0;
            db_fsize = new_db_fsize;
            db_end_pg_num = new_page_num;
      }

      /* install the record */
      if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd))
            return 0;
      /* Mark its buffer to be sent to the disk to keep the database
       * as good as possible even if we crash.  We don't need to worry
       * about later changes to the hash links because dbclean will
       * rebuild them if we crash */
      memcpy(db_sts.rcd.d.r, new_rcd, new_rcd_len);
      db_sts.rcd.b->flags |= DB_BUF_FG_MSYNC;
      db_csize = new_db_csize;

      /* install pointers in the hash table
       * and update the total counts in the record */
      if (!db_link_rcd(emsg, 0, MAX_HASH_ENTRIES))
            return 0;

      ++db_stats.adds;
      return rcd_pos;
}

Generated by  Doxygen 1.6.0   Back to index