Logo Search packages:      
Sourcecode: dcc version File versions

db.c

/* Distributed Checksum Clearinghouse
 *
 * server database functions
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.132 $Revision$
 */

#include "srvr_defs.h"
#include <sys/resource.h>
#ifdef HAVE_HW_PHYSMEM
#include <sys/sysctl.h>
#endif
#ifdef HAVE_PSTAT_GETSTATIC /* HP-UX */
#include <sys/pstat.h>
#endif

#define PSTATIC static
/* #define PSTATIC */               /* for profiling */

static const u_char hash_magic[sizeof(HASH_ENTRY)] = DB_HASH_MAGIC;

DB_STATS db_stats;

DB_STATES db_sts;

DCC_PATH db_path_buf;

int db_fd = -1;
DCC_PATH db_nm;
int db_hash_fd = -1;
DCC_PATH db_hash_nm;
struct timeval db_locked;           /* 1=database not locked */

struct timeval db_time;

int db_debug;

u_char grey_on;
static u_char db_no_mmap;
static u_char db_dirty;
static u_char db_extended;
static u_char db_rdonly;
DB_PG_NUM db_end_pg_num = (DB_PG_NUM)-1;    /* have rebuilt hash to this page */
int db_failed_line;                 /* bad happened at this line # */
const char *db_failed_file;         /*    in this file */
static u_char db_invalidate;        /* do not write to the files */
#define DB_FAILED() (db_failed_line = __LINE__, db_failed_file = __FILE__)


int db_buf_total;             /* total # of db buffers */
DB_PTR db_max_rss;                  /* maximum db resident set size */
/* use DB_PTR instead of off_t because off_t is often only 32-bits */

#define DB_HASH_TOTAL DB_BUF_MAX
static DB_BUF *db_buf_hash[DB_HASH_TOTAL];
static DB_BUF db_bufs[DB_HASH_TOTAL];     /* control mmap()'ed blocks */
static DB_BUF *buf_oldest, *buf_newest;

#define B2PATH(b) ((b)->type == DB_BUF_TYPE_DB ? db_nm : db_hash_nm)

#define DB_BUF_HASH(pg_num) (&db_buf_hash[(pg_num) % DIM(db_buf_hash)])

static const DB_VERSION_BUF version_buf = DB_VERSION_STR;

DB_PARMS db_parms;
static DB_PARMS db_parms_stored;

u_int db_page_size;                 /* size of 1 mmap()'ed buffer */

static off_t hash_fsize;
DB_HADDR db_hash_len;               /* # of hash table entries */
DB_HADDR db_hash_used;              /* # of hash table entries in use */
u_int db_hash_page_len;             /* # of HASH_ENTRY's per buffer */
DB_HADDR db_max_hash_entries = 0;   /* after db_buf_init()*/
static off_t db_fsize;              /* size of database file */
DB_PTR db_csize;              /* size of database contents in bytes */
static DB_PTR db_csize_stored_hash; /* DB size stored in hash file */
u_int db_page_max;                  /* only padding after this in DB buf */
char db_window_size[32];            /* size of mmap() window */

static const u_char dcc_ck_fuzziness[DCC_DIM_CKS] = {
      0,                      /* DCC_CK_INVALID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_IP */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_ENV_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_SUB */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_MESSAGE_ID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_RECEIVED */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_BODY */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_FUZ1 */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_FUZ2 */
      DCC_CK_FUZ_LVL_REP,           /* DCC_CK_REP_TOTAL */
      DCC_CK_FUZ_LVL_REP,           /* DCC_CK_REP_BULK */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_SRVR_ID */
      DCC_CK_FUZ_LVL2               /* DCC_CK_ENV_TO */
};
static const u_char grey_ck_fuzziness[DCC_DIM_CKS] = {
      0,                      /* DCC_CK_INVALID */
      DCC_CK_FUZ_LVL2,        /* DCC_CK_IP */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_ENV_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FROM */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_SUB */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_MESSAGE_ID */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_RECEIVED */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_BODY */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FUZ1 */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_FUZ2 */
      DCC_CK_FUZ_LVL_NO,            /* DCC_CK_GREY_MSG */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_GREY_TRIPLE */
      DCC_CK_FUZ_LVL1,        /* DCC_CK_SRVR_ID */
      DCC_CK_FUZ_LVL1               /* DCC_CK_ENV_TO */
};
const u_char *db_ck_fuzziness = dcc_ck_fuzziness;


PSTATIC u_char buf_msync(DCC_EMSG, DB_BUF *);
PSTATIC u_char buf_munmap(DCC_EMSG, DB_BUF *);
PSTATIC u_char buf_mmap(DB_BUF *, DB_PG_NUM);
PSTATIC DB_BUF *find_buf(DCC_EMSG, DB_BUF_TYPE, DB_PG_NUM);
PSTATIC u_char map_hash(DCC_EMSG, DB_HADDR, DB_STATE *);
PSTATIC u_char map_db(DCC_EMSG, DB_PTR, u_int, DB_STATE *);
PSTATIC void rel_db_states(u_char);
PSTATIC u_char db_set_sizes(DCC_EMSG);
PSTATIC u_char db_make_clean(u_char);


static u_int
gcd(u_int n, u_int m)
{
      u_int r;

      if (n > m) {
            r = m; m = n; n = r;
      }
      for (;;) {
            r = m % n;
            if (r == 0)
                  return n;
            m = n;
            n = r;
      }
}



DB_NOKEEP_CKS
def_nokeep_cks(void)
{
      DCC_CK_TYPES type;
      DB_NOKEEP_CKS nokeep = 0;

      DB_SET_NOKEEP(nokeep, DCC_CK_INVALID);
      DB_SET_NOKEEP(nokeep, DCC_CK_FLOD_PATH);
      for (type = 0; type < DCC_NUM_CKS; ++type) {
            if (DB_GLOBAL_NOKEEP(grey_on, type))
                  DB_SET_NOKEEP(nokeep, type);
      }

      return nokeep;
}



/* At least in BSD/OS, mmap() cannot extend a file */
u_char
db_extend(DCC_EMSG emsg, int fd, const char *nm,
        DB_PTR new_size, DB_PTR old_size)
{
      static u_char zeros[8*1024];
      int len, i;

      if (new_size > DB_PTR_MAX) {
            dcc_pemsg(EX_SOFTWARE, emsg, "invalid new size "L_HPAT" for %s",
                    new_size, nm);
            return 0;
      }
      if (new_size <= old_size) {
            dcc_pemsg(EX_SOFTWARE, emsg,
                    "new_size "L_HPAT" <= old_size "L_HPAT
                    " in db_extend(%s)",
                    new_size, old_size, nm);
            return 0;
      }

      /* Use write() because FreeBSD documentation cautions against mmap() on
       * files with holes. */
      if (old_size != (DB_PTR)lseek(fd, old_size, SEEK_SET)) {
            dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"L_HPAT"): %s",
                    nm, old_size, ERROR_STR());
            return 0;
      }

      for (;;) {
            len = new_size - old_size;
            if (len > ISZ(zeros))
                  len = sizeof(zeros);
            else if (len <= 0)
                  return 1;
            old_size += len;
            i = write(fd, &zeros, len);
            if (i != len) {
                  dcc_pemsg(EX_IOERR, emsg, "extend write(%s,%d)=%d: %s",
                          nm, len, i, ERROR_STR());
                  return 0;
            }
      }
}



/* release all unneeded buffers */
u_char                              /* 0=problem 1=finished */
db_unload(DCC_EMSG emsg, u_char some)
{
      DB_BUF *b;
      u_char result;

      result = 1;
      for (b = buf_oldest; b != 0; b = b->newer) {
            if (b->type == DB_BUF_TYPE_FREE
                || b->lock_cnt != 0)
                  continue;
            if (!buf_munmap(emsg, b)) {
                  emsg = 0;
                  result = 0;
            }
            if (some)
                  return result*2;
      }

      return result;
}



/* msync() some oldest buffer
 *    This does not seem to have any effect on many systems */
static int msync_bufs;

u_char
db_sync_some(int secs)
{
      DB_BUF *b;
      u_char result;

      result = 1;
      for (b = buf_oldest; b != 0; b = b->newer) {
            if (b->type == DB_BUF_TYPE_FREE
                || b->lock_cnt != 0
                || !(b->flags & DB_BUF_FG_DIRTY))
                  continue;

            if ((b->flags & DB_BUF_FG_MSYNC)
                || msync_bufs > 0) {
                  if (!buf_msync(0, b))
                        result = 0;
            }
      }

      /* Common UNIX buffer cache flushing mechanisms are too quick for
       * this database, causing far too much disk traffic.  That is why
       * the buffers are marked MAP_NOSYNC if possible.  However, it
       * is necessary to push the database toward the disk so that when dccd
       * shuts down, the system need not be confronted with GBytes to write.
       *
       * Choose a rate that tries to push the database to the disk at
       * about 1 MByte/second.
       * Assume 10% of each buffer is dirty and will be written to the
       * disk when flushed.
       * So flush (10 MBytes/db_page_size) buffers/sec. */
      if (db_page_size == 0) {
            msync_bufs = 1;
      } else {
            msync_bufs = (10*1024*1024*secs)/db_page_size;
      }

      return result;
}



static void
db_rel_state(DB_STATE *st)
{
      DB_BUF *b;

      if ((b = st->b) != 0) {
            st->b = 0;
            st->d.p = 0;
            st->s.rptr = DB_PTR_BAD;
            if (--b->lock_cnt == 0) {
                  if (!DB_IS_LOCKED())
                        buf_munmap(0, b);
            } else if (b->lock_cnt < 0) {
                  dcc_logbad(EX_SOFTWARE,
                           "negative database buffer lock");
            }
      }
}



PSTATIC void
rel_db_states(u_char not_hash_magic)
{
      DB_STATE *st;

      for (st = &db_sts.rcd; st < &db_sts.hash_ctl; ++st)
            db_rel_state(st);

      /* release the buffer with the dirty flag only if allowed */
      if (!not_hash_magic)
            db_rel_state(st);
}



/* shut down the database, including flushing and releasing all
 * mmap()'ed buffers */
u_char
db_close(int mode)                  /* -1=invalidate cache, 0=flush, 1=ok */
{
      u_char result = 1;

      if (mode < 0) {
            db_invalidate = 1;
            mode = 0;
      }

      /* flush the data and then release and flush the dirty flags */
      if (!db_make_clean(mode))
            result = 0;
      rel_db_states(0);
      if (!db_unload(0, 0))
            result = 0;

      /* close the hash table first because the server is often
       * waiting for the lock on the main file held by dbclean */
      if (db_hash_fd >= 0) {
            if (0 > close(db_hash_fd)) {
                  dcc_pemsg(EX_IOERR, 0, "close(%s): %s",
                          db_hash_nm, ERROR_STR());
                  result = 0;
            }
            db_hash_fd = -1;
      }
      if (db_fd >= 0) {
            if (0 > close(db_fd)) {
                  dcc_pemsg(EX_IOERR, 0, "close(%s): %s",
                          db_nm, ERROR_STR());
                  result = 0;
            }
            db_fd = -1;
      }

      db_invalidate = 0;
      db_failed_line = 0;
      db_dirty = 0;
      db_extended = 0;
      db_locked.tv_sec = 0;         /* clear DB_IS_LOCKED() */
      return result;
}



/* This locking does only multiple-readers/single-writer */
int                           /* -1=failed, 0=was not locked, 1=was */
db_lock(void)
{
      struct stat sb;

      if (DB_IS_LOCKED())
            return 1;

      if (!dcc_exlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm))
            return -1;
      if (0 > fstat(db_fd, &sb)) {
            dcc_error_msg("stat(%s): %s", db_nm, ERROR_STR());
            return -1;
      }
      if (db_fsize != sb.st_size) {
            if (db_fsize > sb.st_size || !db_rdonly) {
                  dcc_error_msg("%s changed from "OFF_HPAT" to "OFF_HPAT,
                              db_nm, db_fsize, sb.st_size);
                  return -1;
            }
            db_fsize = sb.st_size;
      }

      gettimeofday(&db_locked, 0);
      return 0;
}



PSTATIC u_char                      /* 0=failed, 1=clean, 2=too much work */
db_make_clean(u_char ok)
{
      u_char result;

      if (!db_dirty)
            return 1;

      result = 1;

      /* send any changes to the disk,
       * but keep the database-dirty flags in RAM */
      if (!db_failed_line
          && ok
          && db_hash_fd >= 0
          && !map_hash(0, DB_HADDR_SIZES, &db_sts.hash_ctl)) {
            result = 0;
      }

      rel_db_states(1);
      if (!db_unload(0, 0))
            result = 0;

      if (db_extended) {
            /* Send the meta-data to disk so that other processes
             * such as dbclean can find the new length of the file
             * on Solaris. */
            if (0 > fsync(db_fd)) {
                  dcc_error_msg("fsync(%s): %s", db_nm, ERROR_STR());
                  result = 0;
            }
            db_extended = 0;
      }

      /* Clean the dirty flag in the hash table.
       * With luck, this will reach the disk after everything else */
      if (!db_failed_line
          && ok
          && db_hash_fd >= 0
          && (DB_HADDR_EX(db_sts.hash_ctl.d.h
                      ->HASH_STORE_USED) != db_hash_used)) {
            DB_HADDR_CP(db_sts.hash_ctl.d.h->HASH_STORE_USED, db_hash_used);
            db_sts.hash_ctl.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
            if (!buf_msync(0, db_sts.hash_ctl.b))
                  result = 0;
      }

      db_dirty = 0;
      return result;
}



/* mark the hash file and so the database dirty */
static u_char
db_make_dirty(DCC_EMSG emsg)
{
      if (db_dirty)
            return 1;

      if (!DB_IS_LOCKED()) {
            dcc_pemsg(EX_SOFTWARE, emsg, "dirtying unlocked database");
            return 0;
      }

      if (db_rdonly)
            dcc_logbad(EX_SOFTWARE, "dirtying read-only database");

      if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash_ctl))
            return 0;
      DB_HADDR_CP(db_sts.hash_ctl.d.h->HASH_STORE_USED, 0);
      db_sts.hash_ctl.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      if (!buf_msync(emsg, db_sts.hash_ctl.b))
            return 0;

      db_dirty = 1;
      return 1;
}



/* (start to) unlock the database */
u_char                              /* 0=failed, 1=at least started */
db_unlock(void)
{
      int result;

      if (!DB_IS_LOCKED())
            return 1;

      /* when we unlock frequently, we cannot use the write() hack */
      db_no_mmap = 0;

      result = db_make_clean(1);
      if (!dcc_unlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm))
            result = 0;
      db_locked.tv_sec = 0;
      return result;
}



/* fix configure script if this changes */
#define MIN_MIN_DB_MBYTE 32
#define MAX_MAX32_DB_MBYTE (3*1024) /* 3 GByte on 32 bit machines is fair */
#define MAX_MAX64_DB_MBYTE (64*1024)      /* 64 GByte on 32 bit machines is fair */
/* space used by dccd for rate limiting blocks and so forth */
#define DCCD_PAD_MBYTE  10
#define DCCD_PAD  (DCCD_PAD_MBYTE*1024*1024)

static void
get_db_max_rss(void)
{
      u_int max_db_mbyte, min_db_mbyte;
#if defined(RLIMIT_AS) || defined(RLIMIT_RSS)
      struct rlimit limit;
#endif
      DB_PTR physmem = 0;

#ifdef HAVE_PHYSMEM_TOTAL
      /* maybe someday physmem_total() will be widely available */
      physmem = physmem_total();
      if (db_debug)
            dcc_trace_msg("physmem=%d MByte from physmem_total()",
                        (u_int)(physmem/(1024*1024)));
#endif
#ifdef HAVE__SC_PHYS_PAGES
      if (physmem == 0) {
            long pages, pagesize;

            if ((pages = sysconf(_SC_PHYS_PAGES)) == -1) {
                  dcc_error_msg("sysconf(_SC_PHYS_PAGES): %s",
                              ERROR_STR());
            } else if ((pagesize = sysconf(_SC_PAGESIZE)) == -1) {
                  dcc_error_msg("sysconf(_SC_PAGESIZE): %s",
                              ERROR_STR());
            } else {
                  physmem = (DB_PTR)pages * (DB_PTR)pagesize;
                  if (db_debug)
                        dcc_trace_msg("physmem=%d MByte"
                                    " from sysconf(_SC_PHYS_PAGES)"
                                    " and sysconf(_SC_PAGESIZE)",
                                    (u_int)(physmem/(1024*1024)));
            }
      }
#endif
#ifdef HAVE_HW_PHYSMEM
      if (physmem == 0) {
            int mib[2] = {CTL_HW, HW_PHYSMEM};
            unsigned long int hw_physmem;
            size_t hw_physmem_len;

            hw_physmem_len = sizeof(hw_physmem);
            if (0 > sysctl(mib, 2, &hw_physmem, &hw_physmem_len, 0,0)) {
                  dcc_error_msg("sysctl(HW_PHYSMEM): %s", ERROR_STR());
            } else {
                  physmem = hw_physmem;
                  if (db_debug)
                        dcc_trace_msg("physmem=%d MByte"
                                    " from sysctl(mib)",
                                    (u_int)(physmem/(1024*1024)));
            }
      }
#endif
#ifdef HAVE_PSTAT_GETSTATIC
      if (physmem == 0) {
            struct pst_static pss;

            if (0 > pstat_getstatic(&pss, sizeof pss, 1, 0)) {
                  dcc_error_msg("pstat_getstatic(): %s", ERROR_STR());
            } else if (pss.physical_memory <= 0
                     || pss.page_size < 0) {
                  dcc_error_msg("pstat_getstatic() says"
                              " physical_memory=%d page_size=%d",
                              pss.physical_memory, pss.page_size);
            } else {
                  physmem = ((DB_PTR)pss.physical_memory
                           * (DB_PTR)pss.page_size);
                  if (db_debug)
                        dcc_trace_msg("physmem=%d MByte"
                                    " from pstat_getstatic()",
                                    (u_int)(physmem/(1024*1024)));
            }
      }
#endif

      /* use default maximum if maximum is bogus or unset by ./configure */
      max_db_mbyte = DCC_MAX_DB_MBYTE;
      if (max_db_mbyte < MIN_MIN_DB_MBYTE
          || max_db_mbyte > MAX_MAX64_DB_MBYTE)
            max_db_mbyte = MAX_MAX64_DB_MBYTE;
      if (sizeof(long) < 8 && max_db_mbyte > MAX_MAX32_DB_MBYTE)
            max_db_mbyte = MAX_MAX32_DB_MBYTE;

      /* use default minimum if minimum is bogus or unset by ./configure */
      min_db_mbyte = DCC_MIN_DB_MBYTE;
      if (min_db_mbyte < MIN_MIN_DB_MBYTE || min_db_mbyte > max_db_mbyte)
            min_db_mbyte = 64;

      /* Try to use physical memory less 512 MByte
       *    or half of physical memory if there is less than 1 GByte. */
      if (physmem > 1024*1024*1024)
            db_max_rss = physmem - 512*1024*1024;
      else
            db_max_rss = physmem/2;

      /* If we got a reasonable memory size from the kernel, use it.
       * Use a default if not */
      if (db_max_rss/(1024*1024) < min_db_mbyte) {
            db_max_rss = min_db_mbyte;
            if (db_debug)
                  dcc_trace_msg("physmem=minimum %d MByte", min_db_mbyte);
            db_max_rss *= 1024*1024;
      } else if (db_max_rss/(1024*1024) > max_db_mbyte) {
            db_max_rss = max_db_mbyte;
            if (db_debug)
                  dcc_trace_msg("physmem=maximum %d MByte", max_db_mbyte);
            db_max_rss *= 1024*1024;
      }

#ifdef RLIMIT_AS
      /* try not to break process virtual memory limit,
       * but only if it is not ridiculously tiny */
      if (0 <= getrlimit(RLIMIT_AS, &limit)
          && (DB_PTR)limit.rlim_cur < db_max_rss
          && limit.rlim_max/(1024*1024) > min_db_mbyte + DCCD_PAD_MBYTE) {
            DB_PTR limit_rss;
            struct rlimit limit_new;

            limit_new = limit;
            limit_new.rlim_cur = limit_new.rlim_max;
            if ((DB_PTR)limit_new.rlim_cur > db_max_rss + DCCD_PAD)
                  limit_new.rlim_cur = db_max_rss + DCCD_PAD;
            if (0 > setrlimit(RLIMIT_AS, &limit_new)) {
                  dcc_error_msg("setrlimit(RLIMIT_AS, "L_DPAT"): %s",
                              db_max_rss, ERROR_STR());
                  limit_rss = limit.rlim_cur;
            } else {
                  limit_rss = limit_new.rlim_cur;
            }
            if (db_debug)
                  dcc_trace_msg("RLIMIT_AS reduced max_rss"
                              " from "L_DPAT" to "L_DPAT,
                              db_max_rss, limit_rss);
            db_max_rss = limit_rss;
      }
#endif /* RLIMIT_AS */
#ifdef RLIMIT_RSS
      /* try not to break process resident memory limit
       * but only if it is not ridiculously tiny */
      if (0 <= getrlimit(RLIMIT_RSS, &limit)
          && (DB_PTR)limit.rlim_cur < db_max_rss
          && limit.rlim_max/(1024*1024) > min_db_mbyte + DCCD_PAD_MBYTE) {
            DB_PTR limit_rss;
            struct rlimit limit_new;

            limit_new = limit;
            limit_new.rlim_cur = limit_new.rlim_max;
            if ((DB_PTR)limit_new.rlim_cur > db_max_rss + DCCD_PAD)
                  limit_new.rlim_cur = db_max_rss + DCCD_PAD;
            if (0 > setrlimit(RLIMIT_RSS, &limit_new)) {
                  dcc_error_msg("setrlimit(RLIMIT_RSS, "L_DPAT"): %s",
                              db_max_rss, ERROR_STR());
                  limit_rss = limit.rlim_cur;
            } else {
                  limit_rss = limit_new.rlim_cur;
            }
            if (db_debug)
                  dcc_trace_msg("RLIMIT_RSS reduced max_rss"
                              " from "L_DPAT" to "L_DPAT,
                              db_max_rss, limit_rss);
            db_max_rss = limit_rss;
      }
#endif /* RLIMIT_RSS */

#undef MIN_MIN_DB_MBYTE
}



/* Pick a buffer size that will hold an integral number of DB hash
 * table entries and is a multiple of system's page size.
 * The entire hash table should reside in memory
 * if the system has enough memory. */
int
db_get_page_size(u_int old_page_size,     /* 0 or required page size */
             u_int tgt_page_size)   /* 0 or target page size */
{
      u_int min_page_size, max_page_size;

      /* Ask the operating system only once so we don't get differing
       * answers and so compute a varying page size.
       * Some systems can't keep their stories straight. */
      if (db_max_rss == 0)
            get_db_max_rss();

      /* Compute the least common multiple of system the page size
       * and the DB hash table entry size.  This will give us the
       * smallest page size that we can use. */
      min_page_size = getpagesize();
      min_page_size *= (sizeof(HASH_ENTRY)
                    / gcd(sizeof(HASH_ENTRY), min_page_size));

      /* The DB buffer or page size must also be a multiple of the
       * the end-of-page padding used in the main database file. */
      if (sizeof(DB_RCD) % DB_RCD_HDR_LEN != 0)
            dcc_logbad(EX_SOFTWARE,
                     "DB padding size %d"
                     " is not a divisor of DB entry size %d",
                     DB_RCD_HDR_LEN, ISZ(DB_RCD));
      min_page_size *= (DB_RCD_HDR_LEN / gcd(DB_RCD_HDR_LEN, min_page_size));

      /* Use the old buffer size if possible so we are not confused
       * by padding at the ends of the old pages.
       * Fail if it is impossible.  This should cause dbclean to
       * rebuild the database. */
      if (old_page_size != 0) {
            if ((old_page_size % min_page_size) != 0)
                  return 0;
            /* adjust the number of buffers to fit our window size */
            db_buf_total = db_max_rss / old_page_size;
            if (db_buf_total < (int)DB_BUF_MIN)
                  return 0;
            if (db_buf_total > DB_BUF_MAX)
                  db_buf_total = DB_BUF_MAX;
            return old_page_size;
      }

      db_buf_total = DB_BUF_MAX;
      max_page_size = db_max_rss / db_buf_total;
      max_page_size -= max_page_size % min_page_size;

      /* If we have a target page size, try to use it.
       * A target page size is big enough to hold 25% of the entire
       * greylist database */
      if (tgt_page_size != 0 && tgt_page_size < max_page_size) {
            tgt_page_size -= tgt_page_size % min_page_size;
            if (tgt_page_size < min_page_size)
                  tgt_page_size = min_page_size;
            return tgt_page_size;
      } else if (max_page_size > min_page_size) {
            return max_page_size;
      } else {
            return min_page_size;
      }
}



/* (re)create the buffer pool
 * The buffers are small blocks that point to the real mmap()'ed memory.
 */
u_char
db_buf_init(u_int old_page_size)    /* 0 or required page size */
{
      DB_BUF *b, *bprev, *bnext;
      int i;


      db_page_size = db_get_page_size(old_page_size, 0);
      if (!db_page_size)
            return 0;

      db_page_max = db_page_size - DB_RCD_HDR_LEN;
      db_hash_page_len = db_page_size/sizeof(HASH_ENTRY);

      db_max_hash_entries = (MAX_HASH_ENTRIES
                         - MAX_HASH_ENTRIES % db_hash_page_len);

      for (b = db_bufs, i = DB_BUF_MAX; --i != 0; ++b) {
            if (b->buf.v)
                  free(b->buf.v);
      }
      memset(db_bufs, 0, sizeof(db_bufs));

      b = db_bufs;
      buf_oldest = b;
      bprev = 0;
      for (i = db_buf_total; --i != 0; b = bnext) {
            bnext = b+1;
            b->older = bprev;
            b->newer = bnext;
            bprev = b;
      }
      if (b->buf.v)
            free(b->buf.v);
      memset(b, 0, sizeof(*b));
      b->older = bprev;
      buf_newest = b;

      memset(db_buf_hash, 0, sizeof(db_buf_hash));

      return 1;
}



static void
clear_hash_entry(HASH_ENTRY *hash, DB_HADDR rcd_num)
{
      DB_HADDR rcd_p;

      if (rcd_num == DB_HADDR_MAGIC) {
            memcpy(hash, &hash_magic, sizeof(hash_magic));
            return;
      }

      memset(hash, 0, sizeof(*hash));

      if (rcd_num == DB_HADDR_SIZES) {
            DB_HADDR_CP(hash->HASH_STORE_LEN, db_hash_len);
            DB_HADDR_CP(hash->HASH_STORE_USED, DB_HADDR_MIN);
            return;
      }

      if (rcd_num == DB_HADDR_MIN) {
            rcd_p = DB_HADDR_FREE;
      } else if (rcd_num == DB_HADDR_FREE) {
            rcd_p = db_hash_len - 1;
      } else {
            rcd_p = rcd_num - 1;
      }
      DB_HADDR_CP(hash->bak, rcd_p);

      if (rcd_num == DB_HADDR_FREE) {
            rcd_p = DB_HADDR_MIN;
      } else {
            rcd_p = rcd_num+1;
            if (rcd_p >= db_hash_len)
                  rcd_p = DB_HADDR_FREE;
      }
      DB_HADDR_CP(hash->fwd, rcd_p);
}



#ifdef MAP_ANON
/* Clear new hash file by linking all of its entries into
 * the free list using the DB buffer/page mechanism */
static u_char
clear_hash(DCC_EMSG emsg)
{
      DB_HADDR haddr;
      DB_BUF *b;
      HASH_ENTRY *hash, *hash_lim;
      void *p;
      int i;

      db_dirty = 1;
      haddr = DB_HADDR_NULL;
      hash = hash_lim = 0;
      for (haddr = 0; haddr < db_hash_len; ++haddr, ++hash) {
            if (hash >= hash_lim) {
                  b = find_buf(emsg, DB_BUF_TYPE_HASH,
                             haddr / db_hash_page_len);
                  if (!b)
                        return 0;
                  hash = b->buf.h;
                  if (!hash) {
                        p = mmap(0, db_page_size,
                               PROT_READ | PROT_WRITE,
                               MAP_ANON | MAP_PRIVATE, -1, 0);
                        if (p == MAP_FAILED) {
                              dcc_pemsg(EX_IOERR, emsg,
                                      "mmap(anon, %d): %s",
                                      db_page_size, ERROR_STR());
                              return 0;
                        }
                        b->buf.v = p;
                        hash = p;
                        b->flags |= (DB_BUF_FG_NO_MMAP
                                   | DB_BUF_FG_ANON);
                  }
                  hash_lim = hash;
                  if (db_hash_len - haddr > db_hash_page_len)
                        hash_lim += db_hash_page_len;
                  else
                        hash_lim += db_hash_len - haddr;
            }
            clear_hash_entry(hash, haddr);
      }

      /* The hash table might not be an even number of pages,
       * but the file must be.  We know the excess is less than
       * one hash table entry. */
      i = hash_fsize - db_hash_len*sizeof(*hash);
      if (i > 0)
            memset(hash, 0, i);

      return 1;
}

#else /* !defined(MAP_ANON) */

/* Clear new hash file by linking all of its entries into
 * the free list and using write(). */
static u_char
write_hash_buf(DCC_EMSG emsg, void *buf, void *ptr)
{
      int i, len;

      len = (char *)ptr - (char *)buf;
      if (!len)
            return 1;
      i = write(db_hash_fd, buf, len);
      if (i != len) {
            dcc_pemsg(EX_IOERR, emsg, "write(%s,%d)=%d: %s",
                    db_hash_nm, len, i, ERROR_STR());
            return 0;
      }
      return 1;
}



static u_char
clear_hash(DCC_EMSG emsg)
{
      DB_HADDR haddr;
      HASH_ENTRY *hash, hash_buf[512];
      int i;

      haddr = DB_HADDR_NULL;
      hash = hash_buf;
      for (haddr = 0; haddr < db_hash_len; ++haddr, ++hash) {
            if (hash >= LAST(hash_buf)) {
                  if (!write_hash_buf(emsg, hash_buf, hash))
                        return 0;
                  hash = hash_buf;
            }

            clear_hash_entry(hash, haddr);
      }
      if (!write_hash_buf(emsg, hash_buf, hash))
            return 0;

      /* The hash table might not be an even number of pages,
       * but the file must be.  We know the excess is less than
       * one hash table entry. */
      i = hash_fsize - db_hash_len*sizeof(*hash);
      if (i > 0) {
            memset(hash_buf, 0, sizeof(hash_buf));
            if (!write_hash_buf(emsg, hash_buf, &hash_buf[i]))
                  return 0;
      }

      return 1;
}
#endif


static u_char
make_new_hash(DCC_EMSG emsg, DB_HADDR new_hash_len)
{
      struct stat sb;
      u_int n;

      if (getuid() == 0) {
            /* if we are running as root,
             * don't change the owner of the database */
            if (0 > fstat(db_fd, &sb)) {
                  dcc_pemsg(EX_IOERR, emsg, "fstat(%s): %s",
                          db_nm, ERROR_STR());
                  return 0;
            }
            if (0 > fchown(db_hash_fd, sb.st_uid, sb.st_gid)) {
                  dcc_pemsg(EX_IOERR, emsg, "fchown(%s,%d,%d): %s",
                          db_hash_nm, (int)sb.st_uid, (int)sb.st_gid,
                          ERROR_STR());
                  return 0;
            }
      }

      if (new_hash_len > db_max_hash_entries)
            new_hash_len = db_max_hash_entries;

      /* increase the requested hash table size to a multiple
       * of the page size */
      hash_fsize = new_hash_len * sizeof(HASH_ENTRY);
      n = hash_fsize % db_page_size;
      if (n != 0) {
            hash_fsize += db_page_size - n;
            new_hash_len = hash_fsize/sizeof(HASH_ENTRY);
      }

      if (new_hash_len < MIN_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "database size %d is too small", new_hash_len);
            return 0;
      }
      if (new_hash_len > MAX_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "database size %d is too large", new_hash_len);
            return 0;
      }

      /* create the empty hash table file */
      rel_db_states(0);
      if (!db_unload(emsg, 0))
            return 0;
      if (0 > ftruncate(db_hash_fd, 0)) {
            dcc_pemsg(EX_IOERR, emsg, "truncate(%s,"L_HPAT"): %s",
                    db_hash_nm, db_csize, ERROR_STR());
            return 0;
      }

      db_hash_len = new_hash_len;
      db_hash_used = DB_HADDR_MIN;
      return clear_hash(emsg);
}



static u_char
check_old_hash(DCC_EMSG emsg)
{
      DB_HADDR fwd, bak, stored_hash_len;
      struct stat sb;

      /* check the size of the existing hash file */
      if (0 > fstat(db_hash_fd, &sb)) {
            dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s",
                    db_hash_nm, ERROR_STR());
            return 0;
      }
      hash_fsize = sb.st_size;
      if ((hash_fsize % sizeof(HASH_ENTRY)) != 0) {
            dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_DPAT","
                    " not a multiple of %d",
                    db_hash_nm, hash_fsize,
                    ISZ(HASH_ENTRY));
            return 0;
      }

      db_hash_len = hash_fsize/sizeof(HASH_ENTRY);
      if (db_hash_len < MIN_HASH_ENTRIES) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has too few records, "OFF_DPAT" bytes",
                    db_hash_nm, hash_fsize);
            return 0;
      }

      /* check the magic number */
      if (!map_hash(emsg, DB_HADDR_MAGIC, &db_sts.hash_ctl))
            return 0;
      if (memcmp(db_sts.hash_ctl.d.h, &hash_magic, sizeof(HASH_ENTRY))) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has the wrong magic \"%.*s\"",
                    db_hash_nm, ISZ(HASH_ENTRY), db_sts.hash_ctl.d.c);
            return 0;
      }

      if (!map_hash(emsg, DB_HADDR_FREE, &db_sts.hash_ctl))
            return 0;
      fwd = DB_HADDR_EX(db_sts.hash_ctl.d.h->fwd);
      if (DB_HADDR_INVALID(fwd)
          && (fwd != DB_HADDR_FREE
            || db_hash_len > db_hash_used)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has a broken free list head of %#x",
                    db_hash_nm, fwd);
            return 0;
      }
      bak = DB_HADDR_EX(db_sts.hash_ctl.d.h->bak);
      if (DB_HADDR_INVALID(bak)
          && (bak != DB_HADDR_FREE
            || db_hash_len > db_hash_used)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has a broken free list tail of %#x",
                    db_hash_nm, bak);
            return 0;
      }

      /* fetch number of hash table entries used in existing file */
      if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash)) {
            return 0;
      }
      stored_hash_len = DB_HADDR_EX(db_sts.hash.d.h->HASH_STORE_LEN);
      if (db_hash_len != stored_hash_len) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has %d entries but claims %d",
                    db_hash_nm, db_hash_len, stored_hash_len);
            return 0;
      }
      db_hash_used = DB_HADDR_EX(db_sts.hash.d.h->HASH_STORE_USED);
      if (db_hash_used < DB_HADDR_MIN) {
            if (db_hash_used == 0)
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s was not closed cleanly",
                          db_hash_nm);
            else
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s contains an impossible %d entries",
                          db_hash_nm, HASH_LEN_EXT(db_hash_used));
            return 0;
      }
      if (db_hash_used >= db_hash_len) {
            if (db_hash_used > db_hash_len)
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s contains only %d entries but %d used",
                          db_hash_nm,
                          HASH_LEN_EXT(db_hash_len),
                          HASH_LEN_EXT(db_hash_used));
            else
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s is filled with %d entries",
                          db_hash_nm,
                          HASH_LEN_EXT(db_hash_len));
            return 0;
      }
      db_csize_stored_hash = DB_HPTR_EX(db_sts.hash.d.h->HASH_STORE_DB_CSIZE);
      if (db_csize_stored_hash != db_csize
          && hash_fsize != 0) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s contains "L_DPAT" bytes"
                    " instead of the "L_DPAT" that %s claims",
                    db_nm, db_csize,
                    db_csize_stored_hash, db_hash_nm);
            return 0;
      }

      return 1;
}



/* open the files and generally get ready to work */
u_char                              /* 0=failed, 1=ok */
db_open(DCC_EMSG emsg,
      const char *new_db_nm,
      DB_HADDR new_hash_len,        /* 0 or # of entries */
      u_char mode)                  /* DB_OPEN_* */
{
      u_int cur_page_size;
      int hash_flags, db_open_flags;
      struct stat db_sb;
      DB_PTR window;

      db_close(1);
      db_failed_line = 1;

      memset(&db_stats, 0, sizeof(db_stats));

      if (!new_db_nm && db_nm[0] == '\0')
            new_db_nm = grey_on ? DB_GREY_NAME : DB_DCC_NAME;
      if (new_db_nm) {
            if (!fnm2path(db_nm, new_db_nm, 0)
                || !fnm2path(db_hash_nm, db_nm, DB_HASH_SUFFIX)) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "invalid DB nm \"%s\"", new_db_nm);
                  return 0;
            }
      }

      db_rdonly = (mode & DB_OPEN_RDONLY) != 0;
      db_no_mmap = !db_rdonly && (mode & DB_OPEN_NO_MMAP) != 0;

      if (mode & DB_OPEN_LOCK_EXT) {
            if (new_hash_len) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "extending db_open(%s) without locking",
                          db_nm);
                  return 0;
            }
            if (!db_rdonly) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "db_open(%s) read/write without locking",
                          db_nm);
                  return 0;
            }
            db_open_flags = O_RDONLY;
            hash_flags = O_RDONLY;
      } else {
            db_open_flags = O_RDWR;
            if (new_hash_len) {
                  if (db_rdonly) {
                        dcc_pemsg(EX_SOFTWARE, emsg,
                                "db_open(%s) creating read-only",
                                db_nm);
                        return 0;
                  }
                  hash_flags = O_RDWR | O_CREAT;
            } else {
                  /* must open the file read/write to lock it */
                  hash_flags = O_RDWR;
            }
      }

      db_fd = dcc_lock_open(emsg, db_nm, db_open_flags,
                        (mode & DB_OPEN_LOCK_NOWAIT)
                        ? DCC_LOCK_OPEN_NOWAIT
                        : (mode & DB_OPEN_LOCK_EXT)
                        ? DCC_LOCK_OPEN_EXT
                        : 0,
                        DCC_LOCK_ALL_FILE, 0);
      if (db_fd == -1) {
            db_close(-1);
            return 0;
      }
      gettimeofday(&db_locked, 0);
      if (0 > fstat(db_fd, &db_sb)) {
            dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", db_nm, ERROR_STR());
            db_close(-1);
            return 0;
      }
      db_csize = db_fsize = db_sb.st_size;
      if (db_fsize < ISZ(DB_HDR)) {
            dcc_pemsg(EX_IOERR, emsg,
                    "%s with %d bytes is too small to be a DCC database",
                    db_nm, (int)db_fsize);
            db_close(-1);
            return 0;
      }

      /* check the header of the database file */
      db_buf_init(0);
      if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms)) {
            db_close(-1);
            return 0;
      }

      db_parms_stored = *db_sts.db_parms.d.parms;
      db_parms = *db_sts.db_parms.d.parms;

      DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_INVALID);
      DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_FLOD_PATH);

      if (memcmp(db_parms.version, version_buf, sizeof(version_buf))) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s contains the wrong magic string \"%.*s\"",
                    db_nm, ISZ(db_parms.version), db_parms.version);
            db_close(-1);
            return 0;
      }
      if (!(db_parms.flags & DB_PARM_FG_GREY) != !grey_on) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s is%s a greylist database but must%s be",
                    db_nm,
                    (db_parms.flags & DB_PARM_FG_GREY) ? "" : " not",
                    grey_on ? "" : " not");
            db_close(-1);
            return 0;
      }

      db_ck_fuzziness = grey_on ? grey_ck_fuzziness : dcc_ck_fuzziness;

      db_csize = db_parms.db_csize;
      if (db_csize < sizeof(DB_HDR)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s says it contains "L_DPAT" bytes"
                    " or fewer than the minimum of %d",
                    db_nm, db_csize, DB_PTR_BASE);
            /* that is a fatal error if we are not rebuilding */
            if (new_hash_len != 0) {
                  db_close(-1);
                  return 0;
            }
      }
      if (db_csize > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s says it contains "L_DPAT" bytes"
                    " or more than the actual size of "OFF_DPAT,
                    db_nm, db_csize, db_fsize);
            /* that is a fatal error if we are not rebuilding */
            if (new_hash_len != 0) {
                  db_close(-1);
                  return 0;
            }
      }

      cur_page_size = db_sts.db_parms.d.parms->page_size;
      db_rel_state(&db_sts.db_parms);

      /* The buffer or page size we use must be the page size used to
       * write the files.  Try to change our size to match the file */
      if (cur_page_size != db_page_size) {
            db_invalidate = 1;
            rel_db_states(0);
            if (!db_unload(emsg, 0)) {
                  db_close(-1);
                  return 0;
            }
            db_invalidate = 0;
            if (!db_buf_init(cur_page_size)) {
                  dcc_error_msg("%s has page size %d"
                              " incompatible with %d in %s",
                              db_nm,
                              cur_page_size, db_get_page_size(0, 0),
                              path2fnm(db_hash_nm));
                  db_close(-1);
                  return 0;
            }
      }

      db_csize_stored_hash = 0;
      db_hash_len = 0;
      db_hash_fd = open(db_hash_nm, hash_flags, 0666);
      if (db_hash_fd < 0) {
            dcc_pemsg(EX_IOERR, emsg, "open(%s): %s",
                    db_hash_nm, ERROR_STR());
            db_close(-1);
            return 0;
      }
      if (0 > fcntl(db_hash_fd, F_SETFD, FD_CLOEXEC)) {
            dcc_pemsg(EX_IOERR, emsg, "fcntl(%s, FD_CLOEXEC): %s",
                    db_hash_nm, ERROR_STR());
            db_close(-1);
            return 0;
      }

      if (new_hash_len != 0) {
            if (!make_new_hash(emsg, new_hash_len)) {
                  db_close(-1);
                  return 0;
            }
      } else {
            if (!check_old_hash(emsg)) {
                  db_close(-1);
                  return 0;
            }
      }

      db_end_pg_num = db_fsize / db_page_size;
      if (db_fsize % db_page_size != 0) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has size "OFF_HPAT","
                    " not a multiple of its page size of %#x",
                    db_nm, db_fsize, db_page_size);
            db_close(-1);
            return 0;
      }
      /* Fill the last page of the database with zeros in case
       * the length was wrong.
       * That is possible without too much database corruption
       * only if the length is wrong by less than a page. */
      if ((DB_PTR)db_fsize > db_csize + db_page_size
          || db_csize > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "%s has size "OFF_HPAT" but claims "L_HPAT,
                    db_nm, db_fsize, db_csize);
            db_close(-1);
            return 0;
      }
      if (!db_rdonly && (DB_PTR)db_fsize > db_csize) {
            if (!map_db(emsg, db_csize, db_fsize - db_csize, &db_sts.rcd)) {
                  db_close(-1);
                  return 0;
            }
            memset(db_sts.rcd.d.r, 0, db_fsize - db_csize);
            db_sts.rcd.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      }

      /* write new sizes and other parameters.
       * This should be a NOP if the file is read only,
       * but try it just in case. */
      if (!db_flush_parms(emsg)) {
            db_close(-1);
            return 0;
      }

      window = db_page_size*db_buf_total;
      if (window >= (1024*1024)) {
            snprintf(db_window_size, sizeof(db_window_size),
                   "%d MByte window",
                   (int)(window / (1024*1024)));
      } else {
            snprintf(db_window_size, sizeof(db_window_size),
                   "%d KByte window",
                   (int)(window / 1024));
      }
      rel_db_states(0);
      db_failed_line = 0;
      return 1;
}



/* get a free buffer for a chunk of either the hash table or database files */
PSTATIC DB_BUF *
get_free_buf(DCC_EMSG emsg, DB_BUF **bh)
{
      DB_BUF *b;

      /* Look for an unlocked buffer.
       * We know there is one because we have more buffers than
       * can be locked simultaneously. */
      b = buf_oldest;
      for (;;) {
            if (!b) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "broken DB buffer MRU chain");
                  DB_FAILED();
                  return 0;
            }
            if (!b->lock_cnt)
                  break;
            b = b->newer;
      }

      /* Found an unlocked buffer.
       * Unlink it from its hash chain. */
      if (b->fwd)
            b->fwd->bak = b->bak;
      if (b->bak)
            b->bak->fwd = b->fwd;
      else if (b->hash)
            *b->hash = b->fwd;
      if (b->type != DB_BUF_TYPE_FREE) {
            if (!buf_munmap(emsg, b))
                  return 0;
      }

      /* put it on the new hash chain */
      b->bak = 0;
      b->hash = bh;
      b->fwd = *bh;
      *bh = b;
      if (b->fwd)
            b->fwd->bak = b;

      return b;
}



PSTATIC DB_BUF *
find_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_PG_NUM pg_num)
{
      DB_BUF *b, **bh;

      bh = DB_BUF_HASH(pg_num);
      b = *bh;
      for (;;) {
            if (!b) {
                  /* we ran off the end of the buffer hash chain,
                   * so get a free buffer */
                  b = get_free_buf(emsg, bh);
                  if (!b)
                        return 0;
                  b->type = type;
                  b->pg_num = pg_num;
                  break;
            }
            if (b->type == type
                && b->pg_num == pg_num)
                  break;            /* found the buffer we need */

            b = b->fwd;
      }

      /* make the buffer newest */
      if (buf_newest != b) {
            /* unlink it */
            b->newer->older = b->older;
            if (b->older)
                  b->older->newer = b->newer;
            else
                  buf_oldest = b->newer;
            /* insert it at the head of the MRU list */
            b->newer = 0;
            b->older = buf_newest;
            buf_newest->newer = b;
            buf_newest = b;
      }
      if (!db_rdonly)
            b->flags |= DB_BUF_FG_DIRTY;
      return b;
}



PSTATIC DB_BUF *
find_st_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_STATE *st, DB_PG_NUM pg_num)
{
      DB_BUF *b;

      /* release previous buffer unless it is the right one */
      b = st->b;
      if (b) {
            if (b->pg_num == pg_num
                && b->type == type)
                  return b;   /* already have the target buffer */

            st->b = 0;
            st->d.p = 0;
            if (--b->lock_cnt == 0) {
                  if (!DB_IS_LOCKED()) {
                        if (!buf_munmap(emsg, b))
                              return 0;
                  }
            } else if (b->lock_cnt < 0) {
                  dcc_logbad(EX_SOFTWARE,
                           "negative database buffer lock");
            }
      }

      /* look for the buffer */
      b = find_buf(emsg, type, pg_num);
      if (!b)
            return 0;

      ++b->lock_cnt;
      if (!b->buf.v) {
            /* fill it if it did not exist */
            if (!buf_mmap(b, pg_num)) {
                  b->type = DB_BUF_TYPE_FREE;
                  b->pg_num = -1;
                  if (--b->lock_cnt != 0)
                        dcc_logbad(EX_SOFTWARE,
                                 "stolen database buffer lock %d",
                                 b->lock_cnt);
                  return 0;
            }
            if (type == DB_BUF_TYPE_DB)
                  ++db_stats.db_mmaps;
            else
                  ++db_stats.hash_mmaps;
      }

      st->b = b;
      st->d.p = 0;
      return b;
}



PSTATIC u_char
buf_msync(DCC_EMSG emsg, DB_BUF *b)
{
#ifdef MADV_FREE
      static u_char madvise_ok = 1;
#endif
      off_t offset;
      char *p;
      int fd, i;

      if (!(b->flags & DB_BUF_FG_DIRTY))
            return 1;


      if (db_invalidate) {
            b->flags &= ~(DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
            return 1;
      }

      if (b->flags & DB_BUF_FG_NO_MMAP) {
            if (b->flags & DB_BUF_FG_ANON) {
                  p = b->buf.v;
            } else {
                  /* at least FreeBSD fails writing to the file that
                   * underlies a mmap() region from that region */
                  static char *wbuf;
                  static u_int wbuf_len;

                  if (wbuf_len != db_page_size) {
                        wbuf_len = db_page_size;
                        if (wbuf)
                              free(wbuf);
                        wbuf = malloc(db_page_size);
                  }
                  p = wbuf;
                  memcpy(p, b->buf.v, db_page_size);
            }

            fd = (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd;
            offset = (off_t)b->pg_num * (off_t)db_page_size;
            if (offset != lseek(fd, offset, SEEK_SET)) {
                  dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"OFF_HPAT"): %s",
                          B2PATH(b), offset, ERROR_STR());
                  DB_FAILED();
                  return 0;
            }
            i = write(fd, p, db_page_size);
            if (i != (int)db_page_size) {
                  dcc_pemsg(EX_IOERR, emsg,
                          "buf_msync write(%s,%d)=%d: %s",
                          B2PATH(b), (int)db_page_size, i, ERROR_STR());
                  DB_FAILED();
                  return 0;
            }
#ifdef MADV_FREE
            if (madvise_ok
                && 0 > madvise(b->buf.v, db_page_size, MADV_FREE)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_FREE): %s",
                                    ERROR_STR());
                  madvise_ok = 0;
            }
#endif

#ifndef HAVE_OLD_MSYNC
      } else {
            if (--msync_bufs >= 0
                || (b->flags & DB_BUF_FG_MSYNC)) {
                  if (0 > MSYNC(b->buf.v, db_page_size, MS_ASYNC)) {
                        dcc_pemsg(EX_IOERR, emsg,
                                "msync(db buffer %#lx,%#x): %s",
                                (long)b->buf.v, db_page_size,
                                ERROR_STR());
                        DB_FAILED();
                        return 0;
                  }
            }
#endif
      }

      b->flags &= ~(DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      return 1;
}



PSTATIC u_char
buf_munmap(DCC_EMSG emsg, DB_BUF *b)
{
      u_char result;

      if (b->lock_cnt != 0)
            dcc_logbad(EX_SOFTWARE, "unmapping locked DB buffer");

      result = buf_msync(emsg, b);
      if (!result)
            emsg = 0;

      if (0 > munmap(b->buf.v, db_page_size)) {
            dcc_pemsg(EX_IOERR, emsg, "munmap(%s,%d): %s",
                    B2PATH(b), db_page_size, ERROR_STR());
            DB_FAILED();
            result = 0;
      }
      b->buf.v = 0;

      b->pg_num = -1;
      b->type = DB_BUF_TYPE_FREE;
      b->flags = 0;

      return result;
}



PSTATIC u_char
buf_mmap(DB_BUF *b, DB_PG_NUM pg_num)
{
#ifndef HAVE_OLD_MSYNC
#ifdef MADV_RANDOM
      static u_char madv_random_ok = 1;
#endif
#ifdef MADV_WILLNEED
      static u_char madv_willneed_ok = 1;
#endif
#endif
      int flags;
      off_t offset;
      void *p;
      int retry;
      u_char unloaded;


      if (db_no_mmap
          && (b->type == DB_BUF_TYPE_HASH
            || pg_num >= db_end_pg_num)) {
            /* If there is enough RAM to avoid thrashing
             * or if this is a hash table page that we will probably
             *    be changing
             * or if this is the current data page whose hash table
             *    entries are being rebuilt,
             * then read and write entire buffers instead of letting
             * the Solaris virtual memory system do it.
             * Solaris will bog the system down doing nothing but
             * flushing dirty pages mmap() */
            b->flags |= DB_BUF_FG_NO_MMAP;
            flags = MAP_PRIVATE;
      } else {
#ifdef MAP_NOSYNC
            flags = (MAP_SHARED | MAP_NOSYNC);
#else
            flags = MAP_SHARED;
#endif
      }
      offset = (off_t)pg_num * (off_t)db_page_size;

      for (retry = 1, unloaded = 2; unloaded > 1; ++retry) {
            p = mmap(0, db_page_size,
                   db_rdonly ? PROT_READ : (PROT_READ | PROT_WRITE),
                   flags,
                   (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd,
                   offset);

            if (p == MAP_FAILED) {
                  dcc_error_msg("try #%d mmap(%s,%#x,"OFF_HPAT"): %s",
                              retry, B2PATH(b), db_page_size, offset,
                              ERROR_STR());
/* #define MMAP_FAIL_DEBUG 3 */
#ifdef MMAP_FAIL_DEBUG
            } else if (((uint)random() % MMAP_FAIL_DEBUG) == 0) {
                  dcc_error_msg(" test fail #%d mmap(%s,%#x,"OFF_HPAT")",
                              retry,
                              B2PATH(b), db_page_size, offset);
                  if (0 > munmap(p, db_page_size))
                        dcc_error_msg( "test munmap(): %s",
                                    ERROR_STR());
#endif
            } else {
                  if (retry != 1)
                        dcc_error_msg("try #%d"
                                    " mmap(%s,%#x,"OFF_HPAT") ok",
                                    retry,
                                    B2PATH(b), db_page_size, offset);
                  break;
            }

            /* mmap() fails occassionally on some systems,
             * so try to release something and try again */
            unloaded = db_unload(0, 1);
      }


#ifndef HAVE_OLD_MSYNC
      if ((b->flags & DB_BUF_FG_NO_MMAP)
          || (DB_PTR)(db_fsize+hash_fsize) < db_max_rss)  {
#ifdef MADV_WILLNEED
            /* Tell the kernel to keep entire buffers in RAM if
             * we have plenty */
            if (madv_willneed_ok
                && 0 > madvise(p, db_page_size, MADV_WILLNEED)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_WILLNEED): %s",
                                    ERROR_STR());
                  madv_willneed_ok = 0;
            }
#endif

      } else if ((DB_PTR)db_fsize >= db_max_rss) {
#ifdef MADV_RANDOM
            /* Tell the kernel to not read entire buffers if we are short
             * of RAM.  Let it read-ahead and try to fill buffers if we
             * hope to keep the whole database in RAM. */
            if (madv_random_ok
                && 0 > madvise(p, db_page_size, MADV_RANDOM)) {
                  if (db_debug)
                        dcc_trace_msg("madvise(MADV_RANDOM): %s",
                                    ERROR_STR());
                  madv_random_ok = 0;
            }
#endif
      }
#endif /* !HAVE_OLD_MSYNC */

      b->buf.v = p;
      return 1;
}



/* mmap() a hash table entry */
PSTATIC u_char
map_hash(DCC_EMSG emsg,
       DB_HADDR haddr,        /* this entry */
       DB_STATE *st)                /* point this to the entry */
{
      DB_PG_NUM pg_num;
      DB_PG_OFF pg_off;
      DB_BUF *b;

      if (haddr >= db_hash_len) {
            dcc_pemsg(EX_DATAERR, emsg, "invalid hash address %#x",
                    haddr);
            return 0;
      }

      pg_num = haddr / db_hash_page_len;
      pg_off = haddr % db_hash_page_len;

      b = find_st_buf(emsg, DB_BUF_TYPE_HASH, st, pg_num);
      if (!b)
            return 0;
      st->s.haddr = haddr;
      st->d.h = &b->buf.h[pg_off];
      return 1;
}



/* unlink a hash table entry from the free list */
PSTATIC u_char
unlink_free_hash(DCC_EMSG emsg,
             DB_STATE *hash_st,     /* remove this from the free list */
             DB_STATE *tmp_st)
{
      DB_HADDR fwd, bak;

      if (!db_make_dirty(emsg))
            return 0;

      fwd = DB_HADDR_EX(hash_st->d.h->fwd);
      bak = DB_HADDR_EX(hash_st->d.h->bak);
      if (!HE_IS_FREE(hash_st->d.h)
          || (DB_HADDR_INVALID(fwd) && fwd != DB_HADDR_FREE)
          || (DB_HADDR_INVALID(bak) && bak != DB_HADDR_FREE)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "bad hash free list entry at %#x", hash_st->s.haddr);
            return 0;
      }

      if (!map_hash(emsg, fwd, tmp_st))
            return 0;
      if (DB_HADDR_EX(tmp_st->d.h->bak) != hash_st->s.haddr) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "free %#x --> bad-free %#x", hash_st->s.haddr, fwd);
            return 0;
      }
      DB_HADDR_CP(tmp_st->d.h->bak, bak);

      if (!map_hash(emsg, bak, tmp_st))
            return 0;
      if (DB_HADDR_EX(tmp_st->d.h->fwd) != hash_st->s.haddr) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "bad free %#x <-- free %#x", bak, hash_st->s.haddr);
            return 0;
      }
      DB_HADDR_CP(tmp_st->d.h->fwd, fwd);

      DB_HADDR_CP(hash_st->d.h->fwd, DB_HADDR_NULL);
      DB_HADDR_CP(hash_st->d.h->bak, DB_HADDR_NULL);
      ++db_hash_used;
      return 1;
}



/* get a free hash table entry and leave db_sts.free pointing to it */
PSTATIC u_char                      /* 0=failed, 1=got it */
get_free_hash(DCC_EMSG emsg,
            DB_HADDR result)        /* try near here */
{
      DB_HADDR pg_lim;
      int i;

      if (db_hash_len <= db_hash_used) {
            dcc_pemsg(EX_SOFTWARE, emsg, "no free hash table entry;"
                    " %d of %d used", db_hash_used, db_hash_len);
            return 0;
      }

      /* look near the target
       * Try hard because going off the page is so expensive that it
       * justifies plenty of time here.*/
      if (result != DB_HADDR_NULL) {
            pg_lim = (result - (result % db_hash_page_len)
                    + db_hash_page_len-1);
            for (i = 0; i < 50; ++i) {
                  if (!map_hash(emsg, result, &db_sts.free))
                        return 0;
                  if (HE_IS_FREE(db_sts.free.d.h))
                        return unlink_free_hash(emsg, &db_sts.free,
                                          &db_sts.tmp);
                  if (++result >= pg_lim)
                        result -= db_hash_page_len-1-DB_HADDR_MIN;
            }
      }

      /* then try the free list */
      if (!map_hash(emsg, DB_HADDR_FREE, &db_sts.free))
            return 0;
      result = DB_HADDR_EX(db_sts.free.d.h->fwd);
      if (DB_HADDR_INVALID(result)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "broken hash free list head of %#x", result);
            return 0;
      }
      if (!map_hash(emsg, result, &db_sts.free))
            return 0;
      return unlink_free_hash(emsg, &db_sts.free, &db_sts.tmp);
}



/* mmap() a database entry
 *    We assume that no database entry spans buffers,
 *    and that there are enough buffers to accomodate all possible
 *    concurrent requests. */
PSTATIC u_char
map_db(DCC_EMSG emsg,
       DB_PTR rptr,                 /* address of the record */
       u_int tgt_len,               /* its length */
       DB_STATE *st)                /* point this to the record */
{
      DB_PG_NUM pg_num;
      DB_PG_OFF pg_off;
      DB_BUF *b;

      if (rptr+tgt_len > (DB_PTR)db_fsize) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid database address "L_HPAT" or length %d"
                    " past db_fsize "OFF_HPAT" in %s",
                    rptr, tgt_len, db_fsize, db_nm);
            DB_FAILED();
            return 0;
      }

      pg_num = rptr / db_page_size;
      pg_off = rptr % db_page_size;

      /* do not go past the end of a buffer */
      if (tgt_len+pg_off > db_page_size) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid database address "L_HPAT
                    " or length %#x in %s",
                    rptr, tgt_len, db_nm);
            DB_FAILED();
            return 0;
      }

      b = find_st_buf(emsg, DB_BUF_TYPE_DB, st, pg_num);
      if (!b)
            return 0;
      st->s.rptr = rptr;
      st->d.r = (DB_RCD *)&b->buf.c[pg_off];
      return 1;
}



u_char                              /* 0=failed, 1=got it */
db_map_rcd(DCC_EMSG emsg,
         DB_STATE *rcd_st,          /* point this to the record */
         DB_PTR rptr,               /* that is here */
         u_int *rcd_lenp)           /* put its length here */
{
      u_int rcd_len;

      if (DB_PTR_IS_BAD(rptr)) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "getting bogus record at "L_HPAT", in %s",
                    rptr, db_nm);
            return 0;
      }

      if (!map_db(emsg, rptr, DB_RCD_HDR_LEN, rcd_st))
            return 0;

      rcd_len = DB_RCD_LEN(rcd_st->d.r);

      if (&rcd_st->d.c[rcd_len] > &rcd_st->b->buf.c[db_page_size]) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "invalid checksum count %d at "L_HPAT" in %s",
                    DB_NUM_CKS(rcd_st->d.r), rptr, db_nm);
            return 0;
      }

      if (rcd_lenp)
            *rcd_lenp = rcd_len;
      return 1;
}



/* write the new sizes of the files into the files */
PSTATIC u_char
db_set_sizes(DCC_EMSG emsg)
{
      u_char result = 1;

      if (db_hash_fd != -1
          && db_csize_stored_hash != db_csize) {
            if (!map_hash(emsg, DB_HADDR_SIZES, &db_sts.hash_ctl)) {
                  result = 0;
            } else {
                  DB_HPTR_CP(db_sts.hash_ctl.d.h->HASH_STORE_DB_CSIZE,
                           db_csize);
                  db_csize_stored_hash = db_csize;
            }
      }

      if (db_fd != -1
          && db_parms_stored.db_csize != db_csize) {
            if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms)) {
                  result = 0;
            } else {
                  db_sts.db_parms.d.parms->db_csize = db_csize;
                  db_parms_stored.db_csize = db_csize;
                  db_parms.db_csize = db_csize;
                  db_sts.db_parms.b->flags |= (DB_BUF_FG_MSYNC
                                         | DB_BUF_FG_DIRTY);
            }
      }

      return result;
}



/* write the database parameters into the magic number headers of the files */
u_char
db_flush_parms(DCC_EMSG emsg)
{
      if (!db_set_sizes(emsg))
            return 0;

      if (db_fd == -1)
            return 1;

      if (memcmp(&db_parms, &db_parms_stored, sizeof(db_parms))) {
            if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms))
                  return 0;

            db_parms.db_csize = db_csize;
            db_parms.page_size = db_page_size;
            *db_sts.db_parms.d.parms = db_parms;
            db_parms_stored = db_parms;

            db_sts.db_parms.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      }

      return 1;
}



/* find a checksum in an already mapped record */
DB_RCD_CK *                   /* 0=not found, 1=broken database */
db_find_ck(DCC_EMSG emsg,
         DB_RCD *rcd,
         DB_PTR rptr,
         DCC_CK_TYPES type)         /* find this type of checksum */
{
      DB_RCD_CK *rcd_ck;
      int i;

      rcd_ck = rcd->cks;
      i = DB_NUM_CKS(rcd);
      if (i > DCC_NUM_CKS) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "impossible %d checksums in "L_HPAT" in %s",
                    i, rptr, db_nm);
            return (DB_RCD_CK *)1;
      }

      for (; i != 0; --i, ++rcd_ck) {
            if (DB_CK_TYPE(rcd_ck) == type)
                  return rcd_ck;
      }

      return 0;
}



/* find a checksum type known to be in a record */
DB_RCD_CK *                   /* 0=it's not there */
db_map_rcd_ck(DCC_EMSG emsg,
            DB_STATE *rcd_st,       /* point this to the record */
            DB_PTR rptr,            /* that is here */
            DCC_CK_TYPES type)      /* find this type of checksum */
{
      DB_RCD_CK *rcd_ck;

      if (!db_map_rcd(emsg, rcd_st, rptr, 0))
            return 0;

      rcd_ck = db_find_ck(emsg, rcd_st->d.r, rptr, type);
      if (rcd_ck == (DB_RCD_CK *)1)
            return 0;
      if (rcd_ck == 0) {
            dcc_pemsg(EX_DATAERR, emsg,
                    "missing \"%s\" checksum in "L_HPAT" in %s",
                    DB_TYPE2STR(type), rptr, db_nm);
            return 0;
      }
      return rcd_ck;
}



DB_HADDR
db_hash(DCC_CK_TYPES type, const DCC_SUM sum)
{
      u_long accum;
      DB_HADDR haddr;

      accum = type;
      accum += (sum[0]<<24)+(sum[1]<<16)+(sum[2]<<8)+sum[3];
      accum += (sum[4]<<24)+(sum[5]<<16)+(sum[6]<<8)+sum[7];
      accum += (sum[8]<<24)+(sum[9]<<16)+(sum[10]<<8)+sum[11];
      accum += (sum[12]<<24)+(sum[13]<<16)+(sum[14]<<8)+sum[15];
      haddr = mhash(accum, db_hash_len);
      if (haddr < DB_HADDR_MIN)
            haddr = DB_HADDR_MIN;
      return haddr;
}



/* look for a checksum in the hash table
 *    return with not-found, the home slot, or the last entry on
 *    the collision chain */
DB_FOUND
db_lookup(DCC_EMSG emsg, DCC_CK_TYPES type, const DCC_SUM sum,
        DB_HADDR lo,                /* postpone if out of this window */
        DB_HADDR hi,
        DB_STATE *hash_st,          /* hash block for record or related */
        DB_STATE *rcd_st,           /* put the record or garbage here */
        DB_RCD_CK **prcd_ck)        /* point to cksum if found */
{
      DB_HADDR haddr, haddr1;
      DB_PTR db_ptr;
      DB_RCD_CK *found_ck;
      int failsafe;

      haddr = db_hash(type, sum);
      if (haddr < lo || haddr > hi) {
            if (lo == 0 && hi == MAX_HASH_ENTRIES) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "out of range hash address");
                  return DB_FOUND_SYSERR;
            }
            return DB_FOUND_LATER;
      }

      if (prcd_ck)
          *prcd_ck = 0;

      if (!map_hash(emsg, haddr, hash_st))
            return DB_FOUND_SYSERR;

      if (HE_IS_FREE(hash_st->d.h))
            return DB_FOUND_EMPTY;

      if (!DB_HADDR_C_NULL(hash_st->d.h->bak))
            return DB_FOUND_INTRUDER;

      /* We know that the current hash table entry is in its home slot.
       * It might be for the key or checksum we are looking for
       * or it might be for some other checksum with the same hash value. */
      for (failsafe = db_hash_len; failsafe >=0; --failsafe) {
            if (HE_CMP(hash_st->d.h, type, sum)) {
                  /* This hash table entry could be for our target
                   * checksum.  Read the corresponding record so we
                   * decide whether we have a hash collision or we
                   * have found a record containing our target checksum.
                   *
                   * find right type of checksum in the record */
                  db_ptr = DB_HPTR_EX(hash_st->d.h->rcd);
                  found_ck = db_map_rcd_ck(emsg, rcd_st, db_ptr, type);
                  if (!found_ck)
                        return DB_FOUND_SYSERR;
                  if (!memcmp(sum, found_ck->sum,
                            sizeof(DCC_SUM))) {
                        if (prcd_ck)
                              *prcd_ck = found_ck;
                        return DB_FOUND_IT;
                  }
            }

            /* This DB record was a hash collision, or for a checksum
             * other than our target.
             * Fail if this is the end of the hash chain */
            haddr1 = DB_HADDR_EX(hash_st->d.h->fwd);
            if (haddr1 == DB_HADDR_NULL)
                  return DB_FOUND_CHAIN;

            if (DB_HADDR_INVALID(haddr1)) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "broken hash chain fwd-link %#x at %#x in %s",
                          haddr1, haddr, db_hash_nm);
                  return DB_FOUND_SYSERR;
            }

            if (!map_hash(emsg, haddr1, hash_st))
                  return DB_FOUND_SYSERR;

            if (DB_HADDR_EX(hash_st->d.h->bak) != haddr) {
                  dcc_pemsg(EX_DATAERR, emsg,
                          "broken hash chain back-link"
                          " %#x<--%#x instead of %#x<--%#x in %s",
                          DB_HADDR_EX(hash_st->d.h->bak), haddr1,
                          haddr, haddr1, db_hash_nm);
                  return DB_FOUND_SYSERR;
            }
            haddr = haddr1;
      }
      dcc_pemsg(EX_DATAERR, emsg, "infinite hash chain at %#x in %s",
              haddr, db_hash_nm);
      return DB_FOUND_SYSERR;
}



/* combine checksums */
DCC_TGTS
db_sum_ck(DCC_TGTS prev,            /* previous sum */
        DCC_TGTS rcd_tgts,          /* from the record */
        DCC_CK_TYPES type UATTRIB)
{
      DCC_TGTS res;

      /* This arithmetic must be commutative (after handling deleted
       * values), because inter-server flooding causes records to appear in
       * the database out of temporal order.
       *
       * DCC_TGTS_TOO_MANY can be thought of as a count of plus infinity.
       * DCC_TGTS_OK is like minus infinity.
       * DCC_TGTS_OK2 like half of minus infinity
       * DCC_TGTS_TOO_MANY (plus infinity) added to DCC_TGTS_OK (minus
       *    infinity) or DCC_TGTS_OK2 yields DCC_TGTS_OK or DCC_TGTS_OK2.
       *
       * Reputations never reach infinity.
       *
       * Claims of not-spam from all clients are discarded as they arrive
       * and before here. They can only come from the local white list
       */
#define SUM_OK_DEL(p,r) {                                       \
            if (rcd_tgts == DCC_TGTS_OK || prev == DCC_TGTS_OK)       \
                  return DCC_TGTS_OK;                           \
            if (rcd_tgts == DCC_TGTS_OK2 || prev == DCC_TGTS_OK2)     \
                  return DCC_TGTS_OK2;                          \
            if (rcd_tgts == DCC_TGTS_DEL)                       \
                  return prev;                                  \
      }

      res = prev+rcd_tgts;
      if (res <= DCC_TGTS_TOO_MANY)
            return res;

      SUM_OK_DEL(prev, rcd_tgts);
      return DCC_TGTS_TOO_MANY;
#undef SUM_OK_DEL
}



/* delete all reports that contain the given checksum */
static u_char                       /* 1=done, 0=broken database */
del_ck(DCC_EMSG emsg,
       DCC_TGTS *res,               /* residual targets after deletion */
       const DB_RCD *new,           /* delete reports older than this one */
       DCC_CK_TYPES type,           /* delete this type of checksum */
       DB_RCD_CK *prev_ck,          /* starting with this one */
       DB_STATE *prev_st)           /* use this scratch state block */
{
      DB_PTR prev;

      *res = 0;
      for (;;) {
            /* delete reports that are older than the delete request */
            if (DCC_TS_NEWER_TS(new->ts, prev_st->d.r->ts)
                && DB_RCD_ID(prev_st->d.r) != DCC_ID_WHITE) {
                  DB_TGTS_RCD_SET(prev_st->d.r, 0);
                  DB_TGTS_CK_SET(prev_ck, 0);

            } else {
                  /* sum reports that are not deleted */
                  *res = db_sum_ck(*res, DB_TGTS_RCD(prev_st->d.r), type);
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;
            prev_ck = db_map_rcd_ck(emsg, prev_st, prev, type);
            if (!prev_ck)
                  return 0;
      }
}



/* Mark reports made obsolete by a spam report
 *    A new report of spam makes sufficiently old reports obsolete.
 *    Sufficiently recent existing reports make a new report obsolete,
 *    or at least not worth spending bandwidth to flood. */
PSTATIC u_char                      /* 1=done, 0=broken database */
db_obs_ck(DCC_EMSG emsg,
        const DB_RCD *new,
        DB_RCD_CK *new_ck,
        DCC_CK_TYPES type,          /* check this type of checksum */
        DB_RCD_CK *prev_ck,         /* starting with this one */
        DCC_TGTS prev_ck_tgts,
        DB_STATE *prev_st)          /* use this scratch state block */
{
      struct timeval tv;
      time_t secs;
      DCC_TS ts;
      int limit;
      DB_PTR prev;

      secs = db_parms.ex_secs[type].all;
      if (secs > DCC_NEW_SPAM_SECS)
            secs = DCC_NEW_SPAM_SECS;
      dcc_ts2timeval(&tv, new->ts);
      dcc_timeval2ts(ts, &tv, -secs);

      limit = 100;
      for (;;) {
            /* preceding white listed entries make new entries obsolete */
            if (DB_RCD_ID(prev_st->d.r) == DCC_ID_WHITE) {
                  new_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            if (DB_CK_OBS(prev_ck)) {
                  /* don't look forever for recent existing report */
                  if (--limit == 0)
                        return 1;

            } else if (prev_ck_tgts != DCC_TGTS_TOO_MANY) {
                  /* mark this predecessor obsolete if it
                   * was before the checksum became spam */
                  prev_ck->type_fgs |= DB_CK_FG_OBS;

            } else if (DCC_TS_OLDER_TS(prev_st->d.r->ts, &ts)) {
                  /* this older predecessor is now obsolete */
                  prev_ck->type_fgs |= DB_CK_FG_OBS;
                  /* we're finished, because all older preceding reports
                   * were marked obsolete when it was inserted  */
                  return 1;

            } else {
                  /* this predecessor is recent, so it makes
                   * our new record obsolete. */
                  new_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;   /* it is a new report of spam */

            prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, prev, type);
            if (!prev_ck)
                  return 0;
            prev_ck_tgts = DB_TGTS_CK(prev_ck);
      }
}



/* mark extra server-ID declarations obsolete */
static u_char                       /* 1=done, 0=broken database */
srvr_id_ck(DCC_EMSG emsg,
         const DB_RCD *new,
         DB_RCD_CK *new_ck,
         DB_RCD_CK *prev_ck,        /* starting with this one */
         DB_STATE *prev_st)         /* use this scratch state block */
{
      DB_PTR prev;

      for (;;) {
            if (DB_RCD_ID(prev_st->d.r) == DB_RCD_ID(new)) {
                  /* keep newest server-ID declaration */
                  if (DCC_TS_NEWER_TS(prev_st->d.r, new->ts))
                        new_ck->type_fgs |= DB_CK_FG_OBS;
                  else
                        prev_ck->type_fgs |= DB_CK_FG_OBS;
                  return 1;
            }

            prev = DB_PTR_EX(prev_ck->prev);
            if (prev == DB_PTR_NULL)
                  return 1;

            prev_ck = db_map_rcd_ck(emsg, prev_st, prev, DCC_CK_SRVR_ID);
            if (!prev_ck)
                  return 0;
      }
}



/* Install pointers in the hash table for a record and fix the accumulated
 *    counts in the record pointed to by db_sts.rcd
 *    Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp */
u_char                              /* 0=failed, 1=done */
db_link_rcd(DCC_EMSG emsg, DB_HADDR lo, DB_HADDR hi)
{
      DCC_TGTS res;
      DB_RCD *rcd;
      DB_RCD_CK *prev_ck;
      DB_RCD_CK *rcd_ck;
      DCC_CK_TYPES rcd_type;
      DCC_TGTS rcd_tgts, prev_ck_tgts;
      int ck_num;
      DB_HADDR haddr;

      if (!db_make_dirty(emsg))
            return 0;

      rcd = db_sts.rcd.d.r;
      rcd_tgts = DB_TGTS_RCD_RAW(rcd);
      rcd_ck = rcd->cks;
      ck_num = DB_NUM_CKS(rcd);
      if (ck_num > DIM(rcd->cks)) {
            dcc_pemsg(EX_SOFTWARE, emsg,
                    "bogus checksum count %#x at "L_HPAT" in %s",
                    rcd->fgs_num_cks, db_sts.rcd.s.rptr, db_nm);
            return 0;
      }
      for (; ck_num > 0; --ck_num, ++rcd_ck) {
            rcd_type = DB_CK_TYPE(rcd_ck);
            if (!DCC_CK_OK_DB(grey_on, rcd_type)) {
                  dcc_pemsg(EX_SOFTWARE, emsg,
                          "invalid checksum type %s at "L_HPAT" in %s",
                          DB_TYPE2STR(rcd_type),
                          db_sts.rcd.s.rptr, db_nm);
                  return 0;
            }

            rcd_ck->prev = DB_PTR_CP(DB_PTR_NULL);

            /* Do not link or total some checksums unless they are
             * whitelist entries.  If they are whitelist entries, they
             * will eventually get set to DCC_TGTS_OK or DCC_TGTS_OK2. */
            if (DB_TEST_NOKEEP(db_parms.nokeep_cks, rcd_type)
                && DB_RCD_ID(rcd) != DCC_ID_WHITE) {
                  DB_TGTS_CK_SET(rcd_ck, 1);
                  continue;
            }

            res = (rcd_tgts == DCC_TGTS_DEL) ? 0 : rcd_tgts;

            switch (db_lookup(emsg, rcd_type, rcd_ck->sum, lo, hi,
                          &db_sts.hash, &db_sts.rcd2, &prev_ck)) {
            case DB_FOUND_SYSERR:
                  return 0;

            case DB_FOUND_LATER:
                  continue;

            case DB_FOUND_IT:
                  /* We found the checksum
                   * Update the hash table to point to the new record */
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  rcd_ck->prev = DB_PTR_CP(db_sts.rcd2.s.rptr);

                  /* delete predecessors to a delete request
                   * and compute the remaining sum */
                  if (rcd_tgts == DCC_TGTS_DEL) {
                        if (!del_ck(emsg, &res, rcd, rcd_type,
                                  prev_ck, &db_sts.rcd2))
                              return 0;
                        /* delete requests are obsolete if the
                         * checksum is white-listed */
                        if (res == DCC_TGTS_OK
                            || res == DCC_TGTS_OK2)
                              rcd_ck->type_fgs |= DB_CK_FG_OBS;
                        break;
                  }

                  /* Simple checksum with a predecessor
                   * This does not do the substantial extra work
                   * to notice delete requests that arrived early.
                   * That problem is handled by the incoming
                   * flooding duplicate report detection mechanism. */
                  prev_ck_tgts = DB_TGTS_CK(prev_ck);
                  if (DB_RCD_SUMRY(rcd) || DB_CK_DUP(rcd_ck))
                        res = prev_ck_tgts;
                  else
                        res = db_sum_ck(prev_ck_tgts, res, rcd_type);
                  if ((res == DCC_TGTS_OK || res == DCC_TGTS_OK2
                       || (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE))
                      && DB_RCD_ID(rcd) != DCC_ID_WHITE){
                        /* obsolete white-listed checksums */
                        rcd_ck->type_fgs |= DB_CK_FG_OBS;
                        break;
                  }
                  if (res == DCC_TGTS_TOO_MANY) {
                        /* mark obsolete unneeded reports of spam */
                        if (!DB_CK_OBS(rcd_ck)
                            && !db_obs_ck(emsg, rcd, rcd_ck, rcd_type,
                                      prev_ck, prev_ck_tgts,
                                      &db_sts.rcd2))
                              return 0;
                  } else if (rcd_type == DCC_CK_SRVR_ID) {
                        /* mark obsolete server-ID assertions */
                        if (!DB_CK_OBS(rcd_ck)
                            && !srvr_id_ck(emsg, rcd, rcd_ck,
                                       prev_ck, &db_sts.rcd2))
                              return 0;
                  }
                  break;

            case DB_FOUND_EMPTY:
                  /* We found an empty hash table slot.
                   * Update the slot to point to our new record
                   * after removing it from the free list. */
                  if (!unlink_free_hash(emsg, &db_sts.hash, &db_sts.tmp))
                        return 0;
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum);
                  break;

            case DB_FOUND_CHAIN:
                  /* We found a hash collision, a chain of 1 or more
                   * records with the same hash value.
                   * Get a free slot, link it to the end of the chain,
                   * and point it to the record */
                  if (!get_free_hash(emsg, db_sts.hash.s.haddr))
                        return 0;
                  DB_HADDR_CP(db_sts.free.d.h->bak, db_sts.hash.s.haddr);
                  DB_HADDR_CP(db_sts.hash.d.h->fwd, db_sts.free.s.haddr);
                  DB_HPTR_CP(db_sts.free.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.free.d.h,rcd_type, rcd_ck->sum);
                  break;

            case DB_FOUND_INTRUDER:
                  /* The home hash slot for our key contains an
                   * intruder.  Find a place to put it. */
                  haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd);
                  if (haddr == DB_HADDR_NULL)
                        haddr = DB_HADDR_EX(db_sts.hash.d.h->bak);
                  if (!get_free_hash(emsg, haddr))
                        return 0;
                  /* Move the intruder */
                  *db_sts.free.d.h = *db_sts.hash.d.h;
                  /* re-link the neighbors of the intruder */
                  haddr = DB_HADDR_EX(db_sts.free.d.h->bak);
                  if (haddr == DB_HADDR_NULL) {
                        dcc_pemsg(EX_DATAERR, emsg,
                                "bad hash chain reverse link at %#x"
                                " in %s",
                                haddr, db_hash_nm);
                        return 0;
                  }
                  if (!map_hash(emsg, haddr, &db_sts.tmp))
                        return 0;
                  DB_HADDR_CP(db_sts.tmp.d.h->fwd, db_sts.free.s.haddr);
                  haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd);
                  if (haddr != DB_HADDR_NULL) {
                        if (!map_hash(emsg, haddr, &db_sts.tmp))
                              return 0;
                        DB_HADDR_CP(db_sts.tmp.d.h->bak,
                                  db_sts.free.s.haddr);
                  }
                  /* install the new entry in its home slot */
                  DB_HADDR_CP(db_sts.hash.d.h->fwd, DB_HADDR_NULL);
                  DB_HADDR_CP(db_sts.hash.d.h->bak, DB_HADDR_NULL);
                  DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr);
                  HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum);
                  break;
            }

            /* Fix the checksum in the report */
            DB_TGTS_CK_SET(rcd_ck, res);
      }

      return db_set_sizes(emsg);
}



/* Add a record to the database and the hash table
 *    The record must be known to be valid
 *    Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp
 *    On exit db_sts.rcd points to the new record in the database */
DB_PTR                              /* 0=failed */
db_add_rcd(DCC_EMSG emsg, DB_RCD *new_rcd)
{
      u_int new_rcd_len, pad_len;
      DB_PTR new_db_csize, new_db_fsize, rcd_pos, new_page_num;

      if (!db_make_dirty(emsg))
            return 0;

      new_rcd_len = (sizeof(*new_rcd)
                   - sizeof(new_rcd->cks)
                   + (DB_NUM_CKS(new_rcd) * sizeof(new_rcd->cks[0])));
      rcd_pos = db_csize;
      new_db_csize = rcd_pos+new_rcd_len;
      new_page_num = new_db_csize/db_page_size;

      /* advance rcd_pos with zero filler reports to get past
       * a page boundary */
      if (new_page_num != db_csize/db_page_size) {
            pad_len = new_page_num*db_page_size - db_csize;
            pad_len = (((pad_len + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN)
                     * DB_RCD_HDR_LEN);
            rcd_pos = db_csize + pad_len;
            new_db_fsize = (new_page_num+1)*db_page_size;
            db_extended = 1;
            if (!db_extend(emsg, db_fd, db_nm, new_db_fsize, db_fsize))
                  return 0;
            db_end_pg_num = new_page_num;
            db_fsize = new_db_fsize;
            db_csize = rcd_pos;
            new_db_csize = rcd_pos + new_rcd_len;
      }

      /* install the record */
      if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd))
            return 0;
      /* Mark its buffer to be sent to the disk to keep the database
       * as good as possible even if we crash.  We don't need to worry
       * about later changes to the hash links because dbclean will
       * rebuild them if we crash */
      memcpy(db_sts.rcd.d.r, new_rcd, new_rcd_len);
      db_sts.rcd.b->flags |= (DB_BUF_FG_MSYNC | DB_BUF_FG_DIRTY);
      db_csize = new_db_csize;

      /* install pointers in the hash table
       * and update the total counts in the record */
      if (!db_link_rcd(emsg, 0, MAX_HASH_ENTRIES))
            return 0;

      ++db_stats.adds;
      return rcd_pos;
}

Generated by  Doxygen 1.6.0   Back to index