Logo Search packages:      
Sourcecode: dcc version File versions

clnt_send.c

/* Distributed Checksum Clearinghouse
 *
 * send a request from client to server
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.149 $Revision$
 */

#include "dcc_clnt.h"
#ifdef USE_POLL
#include <poll.h>
#endif
#ifdef HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#ifdef HAVE_RESOLV_H
#include <resolv.h>
#endif

DCC_CLNT_INFO *dcc_clnt_info;       /* memory mapped shared data */
u_char dcc_all_srvrs = 0;           /* try to contact all servers */

/* #define CLNT_LOSSES */
#ifdef CLNT_LOSSES
static u_int clnt_losses;
#endif

#define DCC_SRVRS_MOD       59

u_char dcc_clnt_debug;
u_int dcc_min_delay  = DCC_MIN_RTT; /* override minimum RTT */
int dcc_debug_ttl;


#define AGE_AVG(_v,_n,_a,_b) ((_v) = ((_v)*_a + (_n)*_b + (_a+_b)/2)/(_a+_b))

#define FAST_RTT_SECS   (15*60)


char dcc_clnt_hostname[MAXHOSTNAMELEN];
static u_int32_t dcc_clnt_hid;            /* our DCC host-ID */


/* Each client knows about one or more servers, lest the current server
 * crash.  To ensure that counts of spam accumulate as quickly as possible,
 * all of the processes on a client try to use a single server.  The
 * closest (or fastest) server is preferred.  It is desirable for the
 * servers to convert the hostnames of the servers to IP addresses
 * frequently enough to track changes in address records, but not so
 * often that a lot of time is wasted on the DNS.
 *
 * All of that implies that independent processes on the client need to
 * cooperate in measuring the round trip time to the servers and maintaining
 * their IP addresses.  On UNIX systems, this is accomplished with mmap()
 * and a well known file.
 *
 * The DCC client uses 4 locks:
 * 1 mutex to ensure that only one thread in process sends bursts of NOPs
 *    to measure RTTs or resolves DNS names
 * 2 fcntl() lock on another byte in the file to ensure that only one
 *    process measures RTTs or resolves names.
 * 3 mutex protecting the shared information in the map file for threads
 *    within a process
 * 4 fcntl() lock on a byte in the file to protect the shared information
 *    among processes
 *
 * To avoid ABBA deadlocks, the locks are always sought in that order.
 * For most operaitons, only #3/#4 is needed.  Sometimes only #3.
 *
 * Some systems have broken fcntl() locking (e.g. NFS in Solaris).
 * They lock the entire file. */


/* the contexts must be locked to read or change these values */
static int info_fd = -1;
#ifdef DCC_WIN32
HANDLE info_map = INVALID_HANDLE_VALUE;
#endif
DCC_PATH dcc_info_nm;

/* Some systems have a broken fcntl().  Examples include Solaris over NFS.
 * Their bug is that you can lock entire files, but not ranges within a file. */
#ifdef BAD_FCNTL_LOCKS
#undef INFO_LOCK_NUM
#undef RESOLVE_LOCK_NUM
#else
#define INFO_LOCK_NUM       0
#define RESOLVE_LOCK_NUM    1
#endif

/* info_locked is set when the file system lock (INFO_LOCK_NUM) on changing
 *    the mapped file is held.  The contexts must be locked while info_locked
 *    is set, as well as when it is checked or changed. */
static u_char info_locked;

/* resolve_locked is set when the file system lock on resolving host names
 *    in the mapped file is held (RESOLVE_LOCK_NUM).
 *    resolve_lock must not be checked or changed unless the contexts are
 *    locked.
 *    The clnt_resolve_mutex is used  to ensure that only one     thread at a
 *    time gets the file system lock. */
static u_char resolve_locked;



/* Get the RTT used to pick servers.
 *    It includes the local bias and the penalty for version mismatches */
static int
effective_rtt(const DCC_SRVR_CLASS *class, const DCC_SRVR_ADDR *ap)
{
      int rtt;

      rtt = ap->rtt;
      if (rtt >= DCC_MAX_RTT)
            return DCC_RTT_BAD;

      rtt += class->nms[ap->nm_inx].rtt_adj;
      /* penalize servers with strange versions */
      if (ap->srvr_pkt_vers < DCC_PKT_VERSION
          && ap->srvr_id != DCC_ID_INVALID)
            rtt += DCC_RTT_VERS_ADJ;

      if (rtt >= DCC_RTT_BAD)
            return DCC_RTT_BAD;
      return rtt;
}



#define AP2CLASS(ap) DCC_GREY2CLASS(ap >= dcc_clnt_info->grey.addrs)


/* compare addresses while trying to ignore IPv4 vs. IPv6 details */
static u_char                       /* 0=the addresses are equal */
dcc_cmp_ap2su(const DCC_SRVR_ADDR *ap, const DCC_SOCKU *su)
{
      DCC_SRVR_ADDR ap4;
      struct in_addr su_addr4;

      if (ap->ip.port != *DCC_SU_PORT(su))
            return 1;

      if (ap->ip.family == AF_INET6
          && dcc_ipv6toipv4(&ap4.ip.u.v4, &ap->ip.u.v6)) {
            ap4.ip.family = AF_INET;
            ap = &ap4;
      }

      if (su->sa.sa_family == AF_INET) {
            return (ap->ip.family != AF_INET
                  || ap->ip.u.v4.s_addr != su->ipv4.sin_addr.s_addr);
      }

      if (dcc_ipv6toipv4(&su_addr4, &su->ipv6.sin6_addr)) {
            return (ap->ip.family != AF_INET
                  || ap->ip.u.v4.s_addr != su_addr4.s_addr);
      }

      if (ap->ip.family != AF_INET6)
            return 1;

      /* both are real IPv6 and not ::1 */
      return memcmp(&ap->ip.u.v6, &su->ipv6.sin6_addr, sizeof(ap->ip.u.v6));
}



const char *
dcc_ap2str_opt(char *buf, int buf_len,
             const DCC_SRVR_CLASS *class, u_char inx,
             char port_str)         /* '\0' or '-' */
{
      const DCC_SRVR_ADDR *ap;
      char *buf1;
      int i;

      ap = &class->addrs[inx];
      dcc_ip2str(buf, buf_len, &ap->ip);

      i = strlen(buf);
      buf1 = buf+i;
      buf_len -= i;
      if (ap->ip.port == DCC_CLASS2PORT(class)) {
            if (port_str) {
                  if (buf_len >= 1) {
                        *buf1 = ',';
                        if (buf_len >= 2)
                              *++buf1 = port_str;
                        *++buf1 = '\0';
                  }
            }
      } else {
            snprintf(buf1, buf_len, ",%d", ntohs(ap->ip.port));
      }

      return buf;
}



static const char *
addr2str(char *buf, u_int buf_len, DCC_SRVR_CLASS *class,
       int addrs_gen, const DCC_SRVR_ADDR *ap, const DCC_SOCKU *sup)
{
      DCC_SOCKU su;
      char str[DCC_SU2STR_SIZE];
      const char *host;

      if (class->gen == addrs_gen) {
            if (!sup) {
                  dcc_mk_su(&su, ap->ip.family, &ap->ip.u, ap->ip.port);
                  sup = &su;
            }
            dcc_su2str(str, sizeof(str), sup);
            host = class->nms[ap->nm_inx].hostname;
            if (!strncmp(host, str, strlen(host))) {
                  snprintf(buf, buf_len, "%s", str);
            } else {
                  snprintf(buf, buf_len, "%s (%s)", host, str);
            }

      } else if (sup) {
            dcc_su2str(buf, buf_len, sup);

      } else {
            snprintf(buf, buf_len, "DCC server");
      }
      return buf;
}



static void
trace_perf(const char *msg, const DCC_SRVR_ADDR *ap)
{
      DCC_SRVR_CLASS *class;
      char abuf[60];
      char rbuf[30];

      class = AP2CLASS(ap);
      if (class->nms[ap->nm_inx].rtt_adj == 0) {
            rbuf[0] = 0;
      } else if (ap->srvr_pkt_vers < DCC_PKT_VERSION
               && ap->srvr_id != DCC_ID_INVALID) {
            snprintf(rbuf, sizeof(rbuf), "%+d+%d",
                   class->nms[ap->nm_inx].rtt_adj/1000,
                   DCC_RTT_VERS_ADJ/1000);
      } else {
            snprintf(rbuf, sizeof(rbuf), "%+d",
                   class->nms[ap->nm_inx].rtt_adj/1000);
      }

      if (ap->rtt == DCC_RTT_BAD) {
            dcc_trace_msg("%s %s with unknown RTT",
                        msg, addr2str(abuf, sizeof(abuf), class,
                                  class->gen, ap, 0));
      } else if (ap->total_xmits == 0) {
            dcc_trace_msg("%s %s with %.2f%s ms RTT, %d ms queue wait",
                        msg, addr2str(abuf, sizeof(abuf), class,
                                  class->gen, ap, 0),
                        ap->rtt/1000.0, rbuf,
                        ap->srvr_wait/1000);
      } else {
            dcc_trace_msg("%s %s with %.0f%% of %d requests answered,"
                        " %.2f%s ms RTT, %d ms queue wait",
                        msg, addr2str(abuf, sizeof(abuf), class,
                                  class->gen, ap, 0),
                        (ap->total_resps*100.0)/ap->total_xmits,
                        ap->total_xmits,
                        ap->rtt/1000.0, rbuf,
                        ap->srvr_wait/1000);
      }
}



/* If the socket isn't always connected, it can receive
 * datagrams from almost everywhere (for example, a DNS
 * datagram could leak-in if the local port range is small
 * and the local port has been recently doing DNS queries
 * in its previous life).
 *
 * If the socket is connected, it can still receive
 * datagrams not belonging to the connection per se. This
 * will happen if it has been disconnected recently and there
 * was pending data in the socket's queue.
 *
 * Before complaining, check that this datagram seems to be a response
 * to something we sent */
static void PATTRIB(5,6)
trace_bad_packet(const DCC_XLOG *xlog, const DCC_SOCKU *su, const char *sustr,
             int *resp,
             const char *p, ...)
{
      const DCC_XLOG_ENTRY *xloge;
      va_list args;

      if (!dcc_clnt_debug) {
            for (xloge = xlog->base; ; ++xloge) {
                  /* forget the error message if not from a DCC server */
                  if (xloge >= xlog->next)
                        return;

                  /* Don't check this server entry if we haven't
                   * transmitted anything to this host. */
                  if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
                        continue;

                  if (!memcmp(su, &xloge->su, sizeof(*su)))
                        break;
            }
      }

      va_start(args, p);
      dcc_verror_msg(p, args);
      va_end(args);
      if (dcc_clnt_debug)
            dcc_error_msg("packet from %s:"
                        " %08x %08x %08x %08x"
                        " %08x %08x %08x %08x", sustr,
                        resp[0], resp[1], resp[2], resp[3],
                        resp[4], resp[5], resp[6], resp[7]);
}



/* Compute the delay before the next retransmission
 *      It always should be long enough for the DCC server to do some disk
 *    operations even if the server and network have usually been faster. */
static int
dcc_retrans_time(int rtt, u_int xmit_num)
{
      u_int backoff;

      if (rtt < DCC_MIN_RTT)
            rtt = DCC_MIN_RTT;
      backoff = rtt << xmit_num;    /* exponential backoff */
      backoff += DCC_DCCD_DELAY;    /* varying server & network load */
      if (backoff > DCC_MAX_RTT)
            backoff = DCC_MAX_RTT;
      /* let dbclean delay for many seconds */
      if (backoff < dcc_min_delay)
            backoff = dcc_min_delay;
      return backoff;
}



static void
get_start_time(DCC_CLNT_CTXT *ctxt)
{
      gettimeofday(&ctxt->start, 0);
      ctxt->now = ctxt->start;
      ctxt->now_us = 0;
}



static u_char                       /* 1=ok, 0=time jumped */
get_now(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
      gettimeofday(&ctxt->now, 0);
      ctxt->now_us = tv_diff2us(&ctxt->now, &ctxt->start);
      if (ctxt->now_us >= 0 && ctxt->now_us < FOREVER_USECS)
            return 1;

      /* ignore tiny reverse time jumps on some systems such as BSD/OS 4.1 */
      if (ctxt->now_us < 0
          && ctxt->now_us > -1000) {
            ctxt->now = ctxt->start;
            ctxt->now_us = 0;
            return 1;
      }

      dcc_pemsg(EX_OSERR, emsg,
              "clock changed an impossible %.6f seconds",
              ctxt->now_us/(1000.0*1000.0));
      return 0;
}



#ifdef DCC_DEBUG_CLNT_LOCK
static void
have_info_lock(void)
{
      have_ctxts_lock();
      if (!info_locked)
            dcc_logbad(EX_SOFTWARE, "don't have info locked");
}


static void
have_resolve_lock(void)
{
      have_clnt_resolve_mutex();
      /* should have already checked the contexts lock */
      if (!resolve_locked)
            dcc_logbad(EX_SOFTWARE, "don't have resolving locked");
}
#else
#define have_info_lock()
#define have_resolve_lock()
#endif



/* Unlock the shared memory for other processes.
 *      The contexts must be locked */
u_char                              /* 0=failed 1=ok */
dcc_info_unlock(DCC_EMSG emsg)
{
      have_ctxts_lock();

      if (!info_locked)
            return 1;

      info_locked = 0;

#ifdef BAD_FCNTL_LOCKS
      if (!resolve_locked) {
            if (!dcc_unlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE,
                           "all-info ", dcc_info_nm))
                  return 0;
      }
#else /* BAD_FCNTL_LOCKS */
      if (!dcc_unlock_fd(emsg, info_fd, INFO_LOCK_NUM,
                     "info ", dcc_info_nm))
            return 0;
#endif /* BAD_FCNTL_LOCKS */
      return 1;
}



/* Lock the shared memory so we can read and perhaps change it
 *      The contexts must be locked */
u_char                              /* 0=failed, 1=ok */
dcc_info_lock(DCC_EMSG emsg)
{
      have_ctxts_lock();

      if (info_locked)
            return 1;

#ifdef BAD_FCNTL_LOCKS
      if (!resolve_locked) {
            if (!dcc_exlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE,
                           "all-info ", dcc_info_nm))
                  return 0;
      }
#else /* BAD_FCNTL_LOCKS */
      if (!dcc_exlock_fd(emsg, info_fd, INFO_LOCK_NUM,
                     "info ", dcc_info_nm))
            return 0;
#endif /* BAD_FCNTL_LOCKS */

      info_locked = 1;
      return 1;
}



/* stop working on server IP addresses or measuring RTTs */
u_char                              /* 0=failed, 1=ok */
dcc_clnt_resolve_unlock(DCC_EMSG emsg)
{
      have_ctxts_lock();
      have_resolve_lock();

      resolve_locked = 0;

#ifdef BAD_FCNTL_LOCKS
      if (!info_locked) {
            if (!dcc_unlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE,
                           "all-resolve ", dcc_info_nm)) {
                  dcc_clnt_resolve_mutex_unlock();
                  return 0;
            }
      }
#else /* BAD_FCNTL_LOCKS */
      if (!dcc_unlock_fd(emsg, info_fd, RESOLVE_LOCK_NUM,
                     "resolve ", dcc_info_nm)) {
            dcc_clnt_resolve_mutex_unlock();
            return 0;
      }
#endif /* BAD_FCNTL_LOCKS */

      dcc_clnt_resolve_mutex_unlock();

      return 1;
}



/* Lock resolving server hostnames
 *      The contexts must be locked, and the shared information may be locked.
 *      All 4 are locked on success.
 *      On failure, the contexts are re-locked but the others are unlocked. */
u_char                              /* 0=failed, 1=ok */
dcc_clnt_resolve_lock(DCC_EMSG emsg)
{
      have_ctxts_lock();

      /* unlock the shared data and the contexts while we wait for the
       * resolving mutex & resolving file lock */
      if (!dcc_info_unlock(emsg))
            return 0;
      dcc_ctxts_unlock();
      dcc_clnt_resolve_mutex_lock();
      if (resolve_locked)
            dcc_logbad(EX_SOFTWARE, "locking locked resolve lock");

#ifdef BAD_FCNTL_LOCKS
      dcc_ctxts_lock();
      resolve_locked = 1;
      if (!info_locked) {
            dcc_ctxts_unlock();
            if (!dcc_exlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE,
                           "all-resolve ", dcc_info_nm)) {
                  dcc_clnt_resolve_mutex_unlock();
                  dcc_ctxts_lock();
                  return 0;
            }
            dcc_ctxts_lock();
      }
#else /* BAD_FCNTL_LOCKS */
      if (!dcc_exlock_fd(emsg, info_fd, RESOLVE_LOCK_NUM,
                     "resolve ", dcc_info_nm)) {
            dcc_clnt_resolve_mutex_unlock();
            dcc_ctxts_lock();
            return 0;
      }
      dcc_ctxts_lock();
      resolve_locked = 1;
#endif /* BAD_FCNTL_LOCKS */

      if (!dcc_info_lock(emsg)) {
            dcc_clnt_resolve_unlock(0);
            return 0;
      }
      return 1;
}



/* Unlock and un-map the shared info.
 *      The contexts must be locked */
u_char                              /* 0=something wrong, 1=all over */
dcc_unmap_info(DCC_EMSG emsg)
{
      u_char result = 1;

      have_ctxts_lock();

      if (!dcc_clnt_info)
            return result;

      if (!dcc_info_unlock(emsg))
            result = 0;

#ifdef DCC_WIN32
      win32_unmap(&info_map, dcc_clnt_info, dcc_info_nm);
#else
      if (0 > munmap((void *)dcc_clnt_info, sizeof(*dcc_clnt_info))) {
            dcc_pemsg(EX_OSERR, emsg, "munmap(%s): %s",
                    dcc_info_nm, ERROR_STR());
            result = 0;
      }
#endif
      dcc_clnt_info = 0;

      if (0 > close(info_fd)) {
            if (emsg && *emsg == '\0')
                  dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
                          dcc_info_nm, ERROR_STR());
            result = 0;
      }
      info_fd = -1;

      return result;
}



/* discover our host ID if we do not already know it */
static u_char
get_clnt_hid(DCC_EMSG emsg)
{
      int i;

      if (dcc_clnt_hid)
            return 1;

#ifdef HAVE_GETHOSTID
      dcc_clnt_hid = gethostid();
#endif
      if (0 > gethostname(dcc_clnt_hostname,
                      sizeof(dcc_clnt_hostname)-1)) {
            dcc_pemsg(EX_NOHOST, emsg, "gethostname(): %s", ERROR_STR());
            return 0;
      }
      if (dcc_clnt_hostname[0] == '\0') {
            dcc_pemsg(EX_NOHOST, emsg, "null hostname from gethostname()");
            return 0;
      }
      for (i = 0; i < ISZ(dcc_clnt_hostname); ++i) {
            if (!dcc_clnt_hostname[i])
                  break;
            dcc_clnt_hid += dcc_clnt_hostname[i]*i;
      }
      return 1;
}



/* write a new DCC map file */
u_char
dcc_create_map(DCC_EMSG emsg, const DCC_PATH map_nm0, int *pfd,
             const DCC_SRVR_NM *dcc_nms, int dcc_nms_len,
             const DCC_SRVR_NM *grey_nms, int grey_nms_len,
             const DCC_IP *src,
             u_char info_flags)     /* DCC_INFO_FG_* */
{
      static int op_nums_r;
      DCC_CLNT_INFO info_clear;
      int fd;
      u_char created;
      DCC_PATH map_nm;
      int i;

      if (pfd && (fd = *pfd) >= 0) {
            created = 0;
      } else {
            if (!fnm2path(map_nm, map_nm0, 0)) {
                  dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
                          map_nm);
                  return 0;
            }
            fd = open(map_nm, O_RDWR|O_CREAT|O_EXCL, 0600);
            if (fd < 0) {
                  dcc_pemsg(EX_IOERR, emsg, "open(%s): %s",
                          map_nm, ERROR_STR());
                  return 0;
            }
            created = 1;
      }

      memset(&info_clear, 0, sizeof(info_clear));
      strcpy(info_clear.version, DCC_MAP_INFO_VERSION);

      if (dcc_nms_len != 0) {
            if (dcc_nms_len > DCC_MAX_SRVR_NMS)
                  dcc_nms_len = DCC_MAX_SRVR_NMS;
            memcpy(info_clear.dcc.nms, dcc_nms,
                   sizeof(info_clear.dcc.nms[0])*dcc_nms_len);
      }
      info_clear.dcc.act_inx = DCC_NO_SRVR;

      if (grey_nms_len != 0) {
            if (grey_nms_len > DCC_MAX_SRVR_NMS)
                  grey_nms_len = DCC_MAX_SRVR_NMS;
            memcpy(info_clear.grey.nms, grey_nms,
                   sizeof(info_clear.grey.nms[0])*grey_nms_len);
      }
      info_clear.grey.act_inx = DCC_NO_SRVR;

      if (src != 0)
            info_clear.src = *src;

      info_clear.flags = info_flags;
      if (!get_clnt_hid(emsg)) {
            close(fd);
            if (pfd)
                  *pfd = -1;
            if (created)
                  unlink(map_nm);
            return 0;
      }
      info_clear.residue = dcc_clnt_hid % DCC_SRVRS_MOD;
      if (info_clear.residue == 0)
            info_clear.residue = 1;

      /* ensure that we have a new report # even if we are repeatedly
       * recreating a temporary map file */
      if (dcc_clnt_info)
            op_nums_r += dcc_clnt_info->proto_hdr.op_nums.r;
      info_clear.proto_hdr.op_nums.r = ++op_nums_r;

      i = write(fd, &info_clear, sizeof(info_clear));
      if (i != ISZ(info_clear)) {
            if (i < 0)
                  dcc_pemsg(EX_SOFTWARE, emsg, "write(%s): %s",
                          map_nm, ERROR_STR());
            else
                  dcc_pemsg(EX_IOERR, emsg,
                          "write(%s)=%d instead of %d",
                          map_nm, i, ISZ(info_clear));
            close(fd);
            if (pfd)
                  *pfd = -1;
            if (created)
                  unlink(map_nm);
            return 0;
      }

      if (created) {
            if (pfd)
                  *pfd = fd;
            else
                  close(fd);
      }
      return 1;
}



#ifdef DCC_MAP_INFO_VERSION_9
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_start(DCC_EMSG emsg,
              void *old_info, int old_info_size,
              const char *old_magic, int old_magic_size,
              DCC_PATH new_info_nm)
{
      int i;

      /* only one process or thread can fix the file so wait for
       * exclusive access to the file */
      if (!dcc_info_lock(emsg))
            return -1;

      i = read(info_fd, old_info, old_info_size);
      if (i != old_info_size) {
            if (i < 0) {
                  dcc_pemsg(EX_IOERR, emsg, "read(%s): %s",
                          dcc_info_nm, ERROR_STR());
            } else {
                  dcc_pemsg(EX_IOERR, emsg, "read(%s)=%d instead of %d",
                          dcc_info_nm, i, old_info_size);
            }
            return -1;
      }

      if (-1 == lseek(info_fd, SEEK_SET, 0)) {
            dcc_pemsg(EX_IOERR, emsg, "lseek(%s): %s",
                    dcc_info_nm, ERROR_STR());
            return -1;
      }

      if (strncmp(old_info, old_magic, old_magic_size)) {
            if (!dcc_info_unlock(emsg))
                  return -1;
            return 0;
      }

      if (!fnm2path(new_info_nm, dcc_info_nm, "-new")) {
            dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
                    dcc_info_nm);
            return -1;
      }
      unlink(new_info_nm);
      return 1;
}



static int                    /* -1=error, 1=done */
map_convert_fin(DCC_EMSG emsg,
            DCC_PATH new_info_nm, int new_fd, struct stat *old_sb)
{
#ifdef DCC_WIN32
      DCC_PATH old_info_nm;

      /* there are at least two races here,
       * but Windows does not allow renaming or unlinking (e.g. by
       * rename()) open files */
      if (!fnm2path(old_info_nm, dcc_info_nm, "-old")) {
            dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
                    dcc_info_nm);
            return -1;
      }
      unlink(old_info_nm);

      if (!dcc_info_unlock(emsg)) {
            close(new_fd);
            unlink(new_info_nm);
            return -1;
      }
      if (0 > close(info_fd)) {
            dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
                    dcc_info_nm, ERROR_STR());
            close(new_fd);
            unlink(new_info_nm);
            return -1;
      }
      info_fd = -1;

      if (0 > rename(dcc_info_nm, old_info_nm)) {
            dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
                    dcc_info_nm, old_info_nm, ERROR_STR());
            close(new_fd);
            unlink(new_info_nm);
            return -1;
      }

      close(new_fd);
      if (0 > rename(new_info_nm, dcc_info_nm)) {
            dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
                    new_info_nm, dcc_info_nm, ERROR_STR());
            unlink(new_info_nm);
            return -1;
      }
      return 1;
#else /* !DCC_WIN32 */
      /* if we are running as root,
       * don't change the owner of the file */
      if (getuid() == 0
          && 0 > fchown(new_fd, old_sb->st_uid, old_sb->st_gid)) {
            dcc_pemsg(EX_IOERR, emsg, "chown(%s,%d,%d): %s",
                    new_info_nm, (int)old_sb->st_uid, (int)old_sb->st_gid,
                    ERROR_STR());
            unlink(new_info_nm);
            close(new_fd);
            return -1;
      }

      if (0 > rename(new_info_nm, dcc_info_nm)) {
            dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
                    new_info_nm, dcc_info_nm, ERROR_STR());
            unlink(new_info_nm);
            close(new_fd);
            return -1;
      }

      close(new_fd);
      return 1;
#endif /* DCC_WIN32 */
}


#endif /* DCC_MAP_INFO_VERSION_9 */
#ifdef DCC_MAP_INFO_VERSION_5
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_v5(DCC_EMSG emsg, struct stat *old_sb)
{
      DCC_PATH new_info_nm;
      DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
      DCC_V5_CLNT_INFO old_info;
      int new_fd;
      int i;

      if ((int)old_sb->st_size < ISZ(DCC_V5_CLNT_INFO))
            return 0;

      i = map_convert_start(emsg, &old_info, sizeof(DCC_V5_CLNT_INFO),
                        DCC_MAP_INFO_VERSION_5, sizeof(old_info.version),
                        new_info_nm);
      if (i <= 0)
            return i;

      memset(&new_nms, 0, sizeof(new_nms));
      for (i = 0; i < DIM(new_nms); ++i) {
            new_nms[i].clnt_id = old_info.nms[i].clnt_id;
            new_nms[i].port = old_info.nms[i].port;
            strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
            memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
                   sizeof(new_nms[i].passwd));
            new_nms[i].rtt_adj = old_info.nms[i].rtt_adj*10*1000;
      }
      new_fd = -1;
      if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                      new_nms, DIM(new_nms), 0, 0,
                      0, old_info.flags))
            return -1;

      return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_5 */
#ifdef DCC_MAP_INFO_VERSION_6
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_v6(DCC_EMSG emsg, struct stat *old_sb)
{
      DCC_PATH new_info_nm;
      DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
      DCC_V6_CLNT_INFO old_info;
      int new_fd;
      int i;

      if ((int)old_sb->st_size < ISZ(DCC_V6_CLNT_INFO))
            return 0;

      i = map_convert_start(emsg, &old_info, sizeof(DCC_V6_CLNT_INFO),
                        DCC_MAP_INFO_VERSION_6, sizeof(old_info.version),
                        new_info_nm);
      if (i <= 0)
            return i;

      memset(&new_nms, 0, sizeof(new_nms));
      for (i = 0; i < DIM(new_nms); ++i) {
            new_nms[i].clnt_id = old_info.nms[i].clnt_id;
            new_nms[i].port = old_info.nms[i].port;
            strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
            memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
                   sizeof(new_nms[i].passwd));
            new_nms[i].rtt_adj = old_info.nms[i].rtt_adj;
      }
      new_fd = -1;
      if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                      new_nms, DIM(new_nms), 0, 0,
                      0, old_info.flags))
            return -1;

      return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_6 */
#ifdef DCC_MAP_INFO_VERSION_7
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_v7(DCC_EMSG emsg, struct stat *old_sb)
{
      DCC_PATH new_info_nm;
      union {
          DCC_V7_IPV6_CLNT_INFO   v6;
          DCC_V7_NOIPV6_CLNT_INFO nov6;
      } old;
      int new_fd;
      int i;

      if (old_sb->st_size == sizeof(old.v6)) {
            i = map_convert_start(emsg, &old.v6, sizeof(old.v6),
                              DCC_MAP_INFO_VERSION_7,
                              sizeof(old.v6.version),
                              new_info_nm);
            if (i <= 0)
                  return i;

            new_fd = -1;
            if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                            old.v6.dcc.nms, DIM(old.v6.dcc.nms),
                            old.v6.grey.nms, DIM(old.v6.grey.nms),
                            0, old.v6.flags))
                  return -1;

      } else if (old_sb->st_size == sizeof(old.nov6)) {
            i = map_convert_start(emsg, &old.nov6, sizeof(old.nov6),
                              DCC_MAP_INFO_VERSION_7,
                              sizeof(old.nov6.version),
                              new_info_nm);
            if (i <= 0)
                  return i;

            new_fd = -1;
            if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                            old.nov6.dcc.nms, DIM(old.nov6.dcc.nms),
                            old.nov6.grey.nms, DIM(old.nov6.grey.nms),
                            0, old.nov6.flags))
                  return -1;

      } else {
            return 0;
      }

      return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_7 */
#ifdef DCC_MAP_INFO_VERSION_8
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_v8(DCC_EMSG emsg, struct stat *old_sb)
{
      DCC_PATH new_info_nm;
      DCC_V8_CLNT_INFO old;
      int new_fd;
      int i;

      if ((int)old_sb->st_size == ISZ(old)) {
            i = map_convert_start(emsg, &old, sizeof(old),
                              DCC_MAP_INFO_VERSION_8,
                              sizeof(old.version),
                              new_info_nm);
            if (i <= 0)
                  return i;

            new_fd = -1;
            if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                            old.dcc.nms, DIM(old.dcc.nms),
                            old.grey.nms, DIM(old.grey.nms),
                            0, old.flags))
                  return -1;
      } else {
            return 0;
      }

      return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_8 */
#ifdef DCC_MAP_INFO_VERSION_9
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int                    /* -1=error, 0=wrong version, 1=done */
map_convert_v9(DCC_EMSG emsg, struct stat *old_sb)
{
      DCC_PATH new_info_nm;
      DCC_V9_CLNT_INFO old;
      int new_fd;
      int i;

      if ((int)old_sb->st_size == ISZ(old)) {
            i = map_convert_start(emsg, &old, sizeof(old),
                              DCC_MAP_INFO_VERSION_9,
                              sizeof(old.version),
                              new_info_nm);
            if (i <= 0)
                  return i;

            new_fd = -1;
            if (!dcc_create_map(emsg, new_info_nm, &new_fd,
                            old.dcc.nms, DIM(old.dcc.nms),
                            old.grey.nms, DIM(old.grey.nms),
                            &old.src, old.flags))
                  return -1;
      } else {
            return 0;
      }

      return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_9 */
/* Ensure that the shared information is available, but do not lock it.
 *      The contexts must be locked
 *      SUID privileges are often released */
u_char                              /* 0=problem, 1=nop, 2=mapped */
dcc_map_info(DCC_EMSG emsg, const char *new_info_nm, int new_info_fd)
{
      struct stat sb;
#ifndef DCC_WIN32
      void *p;
#endif
      u_char result;

      have_ctxts_lock();

      /* work only if needed, but always check for corruption */
      result = 1;
      while ((new_info_nm && strcmp(new_info_nm, dcc_info_nm))
             || new_info_fd >= 0
             || !dcc_clnt_info) {
            result = 2;
            if (!dcc_unmap_info(emsg)) {
                  if (new_info_fd >= 0)
                        close(new_info_fd);
                  return 0;
            }

            if (new_info_nm) {
                  if (!fnm2path(dcc_info_nm, new_info_nm, 0)) {
                        dcc_pemsg(EX_IOERR, emsg,
                                "long map name \"%s\"",
                                new_info_nm);
                        return 0;
                  }
                  /* don't change name if we convert the file */
                  new_info_nm = 0;
            }
            if (dcc_info_nm[0] == '\0') {
                  dcc_pemsg(EX_USAGE, emsg, "missing map file");
                  return 0;
            }

            if (new_info_fd >= 0) {
                  info_fd = new_info_fd;
                  new_info_fd = -1;
            } else {
                  info_fd = open(dcc_info_nm, O_RDWR, 0600);
#ifndef DCC_WIN32
                  if (info_fd < 0
                      && dcc_get_priv_home(dcc_info_nm)) {
                        info_fd = open(dcc_info_nm, O_RDWR, 0600);
                        dcc_rel_priv();
                  }
#endif
                  if (info_fd < 0) {
                        dcc_pemsg(EX_NOINPUT, emsg, "open(%s): %s",
                                dcc_info_nm, ERROR_STR());
                        return 0;
                  }
            }

            /* refuse to use the file if it is not private */
            if (!dcc_ck_private(emsg, &sb, dcc_info_nm, info_fd)) {
                  dcc_unmap_info(0);
                  return 0;
            }

            if ((int)sb.st_size != ISZ(*dcc_clnt_info)) {
#ifdef DCC_MAP_INFO_VERSION_9
                  int i;

#ifdef DCC_MAP_INFO_VERSION_6
                  i = map_convert_v5(emsg, &sb);
                  if (i < 0) {
                        dcc_unmap_info(0);
                        return 0;
                  }
                  /* unlock old file and open & lock new file */
                  if (i > 0)
                        continue;

                  i = map_convert_v6(emsg, &sb);
                  if (i < 0) {
                        dcc_unmap_info(0);
                        return 0;
                  }
                  /* unlock old file and open & lock new file */
                  if (i > 0)
                        continue;
#endif /* DCC_MAP_INFO_VERSION_6 */
#ifdef DCC_MAP_INFO_VERSION_7
                  i = map_convert_v7(emsg, &sb);
                  if (i < 0) {
                        dcc_unmap_info(0);
                        return 0;
                  }
                  /* unlock old file and open & lock new file */
                  if (i > 0)
                        continue;
#endif /* DCC_MAP_INFO_VERSION_7 */
#ifdef DCC_MAP_INFO_VERSION_8
                  i = map_convert_v8(emsg, &sb);
                  if (i < 0) {
                        dcc_unmap_info(0);
                        return 0;
                  }
                  /* unlock old file and open & lock new file */
                  if (i > 0)
                        continue;
#endif /* DCC_MAP_INFO_VERSION_8 */
                  i = map_convert_v9(emsg, &sb);
                  if (i < 0) {
                        dcc_unmap_info(0);
                        return 0;
                  }
                  /* unlock old file and open & lock new file */
                  if (i > 0)
                        continue;
#endif /* DCC_MAP_INFO_VERSION_9 */
                  dcc_pemsg(EX_DATAERR, emsg,
                          "%s is not the size of a DCC map file",
                          dcc_info_nm);
                  close(info_fd);
                  info_fd = -1;
                  return 0;
            }

#ifdef DCC_WIN32
            dcc_clnt_info= win32_map(emsg, &info_map, dcc_info_nm,
                               info_fd, sizeof(*dcc_clnt_info));
            if (!dcc_clnt_info) {
                  close(info_fd);
                  info_fd = -1;
                  return 0;
            }
#else
            p = mmap(0, sizeof(*dcc_clnt_info),
                   PROT_READ|PROT_WRITE, MAP_SHARED, info_fd, 0);
            if (p == MAP_FAILED) {
                  dcc_pemsg(EX_IOERR, emsg, "mmap(%s): %s",
                          dcc_info_nm, ERROR_STR());
                  close(info_fd);
                  info_fd = -1;
                  return 0;
            }
            dcc_clnt_info = p;
#endif /* DCC_WIN32 */
      }

      if (strncmp(dcc_clnt_info->version, DCC_MAP_INFO_VERSION,
                sizeof(dcc_clnt_info->version))) {
            dcc_pemsg(EX_DATAERR, emsg, "unrecognized data in %s",
                    dcc_info_nm);
            dcc_unmap_info(0);
            return 0;
      }

      if (!get_clnt_hid(emsg)) {
            dcc_unmap_info(0);
            return 0;
      }

      if (result > 1)
            dcc_clnt_info->proto_hdr.op_nums.h = dcc_clnt_hid;

      return result;
}



/* SUID privileges are often released */
u_char                              /* 0=something wrong, 1=mapped */
dcc_map_lock_info(DCC_EMSG emsg, const char *new_info_nm, int new_info_fd)
{
      return(0 < dcc_map_info(emsg, new_info_nm, new_info_fd)
             && dcc_info_lock(emsg));
}



/* All servers are broken, so make a note to not try for a while.
 *      The contexts and the mapped information must be locked */
static void
fail_more(const DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
      have_info_lock();

      /* do not inflate the delay if we are already delaying */
      if (class->fail_time >= ctxt->now.tv_sec)
            return;

      if (++class->fail_exp > DCC_MAX_FAIL_EXP)
            class->fail_exp = DCC_MAX_FAIL_EXP;
      class->fail_time = (time(0) + (DCC_INIT_FAIL_SECS << class->fail_exp));
}



static u_char                       /* 0=failing */
ck_fail_time(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
      int dt;

      have_info_lock();

      if (class->fail_exp == 0)
            return 1;

      dt = class->fail_time - ctxt->now.tv_sec;
      if (dt > 0 && dt <= DCC_MAX_FAIL_SECS) {
            dcc_pemsg(EX_IOERR, emsg,
                    "continue not asking %s %d seconds after failure",
                    DCC_IS_GREY_STR(class), dt);
            return 0;
      }

      class->fail_exp = 0;
      return 1;
}



/* Resolving host names must be locked */
static void
dcc_force_resolve(DCC_SRVR_CLASS *class)
{
      class->resolve = 0;           /* force the resolution eventually */

      class->act_inx = DCC_NO_SRVR; /* wait for it */
      class->fail_exp = 0;
}



/* Resolving host names must be locked */
void
dcc_force_measure_rtt(DCC_SRVR_CLASS *class, u_char new_avg_thold_rtt)
{
      dcc_force_resolve(class);
      if (new_avg_thold_rtt)
            class->avg_thold_rtt = -DCC_RTT_BAD;
      class->measure = 0;
}



/* pick the best server
 *      The client information and the contexts must be exclusively locked.
 *      Assume there is at least one hostname. */
static u_char                       /* 0=have none, 1=found one */
pick_srvr(DCC_EMSG emsg, DCC_SRVR_CLASS *class)
{
      const DCC_SRVR_ADDR *ap, *min_ap;
      int rtt;
      int min_rtt;                  /* smallest RTT   */
      int min2_rtt;                 /* second smallest RTT */
      u_int16_t old_act_inx;
      u_char have_addr;

      have_info_lock();

      old_act_inx = class->act_inx;
      min2_rtt = min_rtt = DCC_RTT_BAD;
      min_ap = 0;
      have_addr = 0;
      for (ap = class->addrs; ap <= LAST(class->addrs); ++ap) {
            if (ap->ip.family == AF_UNSPEC)
                  continue;
            have_addr = 1;
            rtt = effective_rtt(class, ap);
            if (rtt == DCC_RTT_BAD)
                  continue;

            if (min_rtt > rtt) {
                  if (min2_rtt > min_rtt)
                        min2_rtt = min_rtt;
                  min_rtt = rtt;
                  min_ap = ap;
            } else if (min2_rtt > rtt) {
                  min2_rtt = rtt;
            }
      }

      /* we found a usable server */
      if (min_ap) {
            /* Compute the basic RTT to the server including a variance */
            class->base_rtt = min_rtt + DCC_DCCD_DELAY;
            if (class->base_rtt > DCC_MAX_RTT)
                  class->base_rtt = DCC_MAX_RTT;
            /* Decide how bad the server must get before we check for
             *    an alternative.
             * If there is no good second choice, there is no point in a
             *    threshold for switching to it */
            class->thold_rtt = min2_rtt + DCC_DCCD_DELAY;
            if (class->thold_rtt >= DCC_MAX_RTT)
                  class->thold_rtt = DCC_RTT_BAD;

            class->act_inx = (min_ap - class->addrs);
            if (class->act_inx != old_act_inx) {
                  if (dcc_clnt_debug
                      && old_act_inx < class->num_addrs) {
                        trace_perf("replacing",
                                 &class->addrs[old_act_inx]);
                        trace_perf("pick", min_ap);
                  }
            }
            return 1;
      }

      /* we failed to find a working server
       * complain if we have any known servers */
      if (have_addr && emsg && *emsg == '\0') {
            char a1[DCC_SU2STR_SIZE+1+5];
            char a2[DCC_SU2STR_SIZE+1+5];
            char a3[DCC_SU2STR_SIZE+1+5];

            dcc_pemsg(EX_IOERR, emsg,
                    "no working %s server%s %s%s%s%s%s%s at %s%s%s%s%s%s",
                    DCC_IS_GREY_STR(class),
                    class->nms[1].hostname[0] ? "s" : "",
                    class->nms[0].hostname,
                    class->nms[1].hostname[0] ? " " : "",
                    class->nms[1].hostname,
                    class->nms[2].hostname[0] ? " " : "",
                    class->nms[2].hostname,
                    class->nms[3].hostname[0] ? " ..." : "",

                    dcc_ap2str_opt(a1, sizeof(a1), class, 0, '\0'),
                    class->num_addrs > 1 ? " " : "",
                    class->num_addrs > 1
                    ? dcc_ap2str_opt(a2, sizeof(a2), class, 1, '\0')
                    : "",
                    class->num_addrs > 2 ? " " : "",
                    class->num_addrs > 2
                    ? dcc_ap2str_opt(a3, sizeof(a3), class, 2, '\0')
                    : "",
                    class->num_addrs > 3 ? " ..." : "");
      }
      class->act_inx = DCC_NO_SRVR;
      return 0;
}



/* count IP addresses per host name and per second level domain name */
typedef struct name_addrs {
    const char *sld;                /* domain name */
    u_char     sld_addrs;           /* # of addresses for domain name */
    u_char     host_addrs;          /* # of addresses for a host name */
    u_char     sld_addrs_inx;
} NAME_ADDRS[DCC_MAX_SRVR_NMS];


/* delete an address from a growing list of addresses */
static void
del_new_addr(DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
           int *num_new_addrs,
           NAME_ADDRS name_addrs,   /* addresses per server name */
           int tgt)                 /* delete this address */
{
      int nm_inx;
      int i;

      /* adjust that host's and domain's numbers of addresses and our
       * total number of addresses */
      nm_inx = new_addrs[tgt].nm_inx;
      --name_addrs[nm_inx].host_addrs;
      --name_addrs[name_addrs[nm_inx].sld_addrs_inx].sld_addrs;
      --*num_new_addrs;

      /* slide the array of addresses to get rid of the discarded address */
      i = *num_new_addrs - tgt;
      if (i > 0)
            memmove(&new_addrs[tgt], &new_addrs[tgt+1],
                  i * sizeof(new_addrs[0]));
      memset(&new_addrs[*num_new_addrs], 0, sizeof(new_addrs[0]));
}



static inline u_int
su_srvrs_mod(const DCC_SOCKU *sup,
           DCC_SOCKU *sup2)
{
      u_int su_res;

      if (dcc_ipv6sutoipv4(sup2, sup)) {
            su_res = sup2->ipv4.sin_addr.s_addr % DCC_SRVRS_MOD;
            su_res += DCC_SRVRS_MOD;
            return su_res;
      }

      su_res = (sup->ipv6.sin6_addr.s6_addr32[0]
              + sup->ipv6.sin6_addr.s6_addr32[1]
              + sup->ipv6.sin6_addr.s6_addr32[2]
              + sup->ipv6.sin6_addr.s6_addr32[3]) % DCC_SRVRS_MOD;
      *sup2 = *sup;
      return su_res;
}



/* partially order a pair of IP addresses */
static int
sucmp(const DCC_SOCKU *sup1, const DCC_SOCKU *sup2)
{
      DCC_SOCKU su1, su2;
      u_int su1_res, su2_res;
      int i;

      su1_res = su_srvrs_mod(sup1, &su1);
      su2_res = su_srvrs_mod(sup2, &su2);

      i = (int)su1_res - (int)su2_res;
      if (i)
            return i;
      return memcmp(&su1, &su2, sizeof(DCC_SOCKU));
}



/* deal with a list of IP addresses or aliases for one DCC server hostname
 * the contexts and the mmap()'ed info must be locked */
static void
get_addrs(DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
        int *num_new_addrs,
        const DCC_SRVR_CLASS *cur,
        DCC_SRVR_NM *nmp, int nm_inx,     /* server name being resolved */
        NAME_ADDRS name_addrs,      /* addresses per server name */
        u_int16_t port)
{
      DCC_SRVR_ADDR *ap;
      const DCC_SRVR_NM *nmp2;
      DCC_SOCKU *np, su, nxt, prev;
      int nm1_inx, sld1_inx, sld2_inx;
      int host_max, sld_max;
      int i, j, k;

      have_ctxts_lock();

      /* Keep as many IP addresses as we have room, but for as many
       * named servers as possible
       * Sort the addresses to keep our list stable when we re-check.
       * Otherwise, we would start from scratch when nothing changes
       * but the order of responses from a DNS server.
       * Sort by residue class to pick a random subset when there
       * are too many servers to fit in our list. */

      nxt.sa.sa_family = AF_UNSPEC;
      for (;;) {
            /* Pick the nxt address in the newly resolved list
             * to consider.  We want the smallest address larger
             * than the previous address we considered.
             * "Smallest" is defined using the local random ordering
             * of addresses. */
            prev = nxt;
            nxt.sa.sa_family = AF_UNSPEC;
            for (np = dcc_hostaddrs; np < dcc_hostaddrs_end; ++np) {
                  if (np->sa.sa_family == AF_UNSPEC)
                        continue;
                  su = *np;
                  *DCC_SU_PORT(&su) = port;
                  if ((prev.sa.sa_family == AF_UNSPEC
                       || sucmp(&su, &prev) > 0)
                      && (nxt.sa.sa_family == AF_UNSPEC
                        || sucmp(&nxt, &su) > 0))
                        nxt = su;
            }
            /* quit if we've considered them all */
            if (nxt.sa.sa_family == AF_UNSPEC)
                  break;

            /* ignore duplicate A records even for other hostnames,
             * unless the port numbers differ */
            ap = &new_addrs[*num_new_addrs];
            while (--ap >= new_addrs) {
                  if (!dcc_cmp_ap2su(ap, &nxt)) {
                        /* they are the same, so keep the one with
                         * the non-anonymous ID
                         * or smallest RTT adjustment */
                        nmp2 = &cur->nms[ap->nm_inx];
                        i = (nmp->clnt_id == DCC_ID_ANON);
                        j = (nmp2->clnt_id == DCC_ID_ANON);
                        if (i != j) {
                              if (i)
                                  goto next_addr;
                        } else {
                              if (nmp->rtt_adj >= nmp2->rtt_adj)
                                  goto next_addr;
                        }
                        /* delete the previous instance */
                        del_new_addr(new_addrs, num_new_addrs,
                                   name_addrs, ap - new_addrs);
                        break;
                  }
            }

            /* If we already have as many addresses as we will use,
             * then pick one to discard. Discard the last address of
             * the host in the second level domain with the most
             * addresses but without eliminating all addresses for any
             * host name.  Look for the domain with the most IP addresses
             * and that has at least one host with at least two
             * addersses. */
            if (*num_new_addrs == DCC_MAX_SRVR_ADDRS) {
                  host_max = -1;
                  sld_max = -1;
                  nm1_inx = -1;
                  sld1_inx = -1;
                  for (i = 0; i <= nm_inx; i++) {
                        /* ignore hosts with only 1 IP address */
                        j = name_addrs[i].host_addrs;
                        if (j <= 1)
                              continue;
                        sld2_inx = name_addrs[i].sld_addrs_inx;
                        k = name_addrs[sld2_inx].sld_addrs;
                        if (sld_max <= k) {
                              if (sld1_inx != sld2_inx) {
                                  sld_max = k;
                                  sld1_inx = sld2_inx;
                                  host_max = j;
                                  nm1_inx = i;
                              } else if (host_max <= j) {
                                  host_max = j;
                                  nm1_inx = i;
                              }
                        }
                  }
                  /* no additional IP addresses for the target host if
                   * it has the most IP addresses */
                  if (nm1_inx == nm_inx)
                        return;

                  /* find the last address of the host with the most */
                  for (i = 0, j = 0; i < *num_new_addrs; i++) {
                        if (new_addrs[i].nm_inx == nm1_inx)
                              j = i;
                  }
                  /* and delete it */
                  del_new_addr(new_addrs, num_new_addrs, name_addrs, j);
            }

            /* install the new address in the growing list */
            ap = &new_addrs[*num_new_addrs];
            ap->rtt = DCC_RTT_BAD;
            if (nxt.sa.sa_family == AF_INET && DCC_INFO_IPV6())
                  dcc_ipv4sutoipv6(&nxt, &nxt);
            else if (nxt.sa.sa_family == AF_INET6 && !DCC_INFO_IPV6())
                  dcc_ipv6sutoipv4(&nxt, &nxt);
            dcc_su2ip(&ap->ip, &nxt);

            /* If this is a previously known address,
             * preserve what we already knew about it
             * Check the address family separately because dcc_cmp_ap2su()
             * does not and DCC_INFO_IPV6() might have changed. */
            for (i = 0; i < DIM(cur->addrs); ++i) {
                  if (cur->addrs[i].ip.family == nxt.sa.sa_family
                      && !dcc_cmp_ap2su(&cur->addrs[i], &nxt)) {
                        *ap = cur->addrs[i];
                        break;
                  }
            }
            ap->nm_inx = nm_inx;
            ++*num_new_addrs;

            ++name_addrs[nm_inx].host_addrs;
            ++name_addrs[name_addrs[nm_inx].sld_addrs_inx].sld_addrs;
next_addr:;
      }
}



/* resolve a server name into addresses
 * the contexts and the mmap()'ed info must be locked */
static void
dcc_clnt_resolve_host(DCC_EMSG emsg,
                  DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
                  int *num_new_addrs,
                  const DCC_SRVR_CLASS *cur,
                  DCC_SRVR_NM *nmp, int nm_inx, /* name being resolved */
                  NAME_ADDRS name_addrs)  /* addresses per server name */
{
      const char *domain, *p1, *p2;
      int error;
      u_char result;
      int i;

      nmp->defined = 0;
      if (nmp->hostname[0] == '\0')
            return;

      if (nmp->rtt_adj > DCC_RTT_ADJ_MAX)
            nmp->rtt_adj = DCC_RTT_ADJ_MAX;
      else if (nmp->rtt_adj < -DCC_RTT_ADJ_MAX)
            nmp->rtt_adj = -DCC_RTT_ADJ_MAX;

      /* find the total number of addresses for this domain name */
      domain = nmp->hostname;
      p1 = strchr(domain, '.');
      if (p1) {
            for (;;) {
                  p2 = strchr(++p1, '.');
                  if (!p2)
                        break;
                  domain = p1;
                  p1 = p2;
            }
      }
      name_addrs[nm_inx].sld = domain;
      for (i = 0; i < nm_inx; ++i) {
            if (name_addrs[i].sld != 0
                && !strcmp(domain, name_addrs[i].sld))
                  break;
      }
      name_addrs[nm_inx].sld_addrs_inx = i;

      dcc_host_lock();
      if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
            result = dcc_get_host_SOCKS(nmp->hostname,
                                  DCC_INFO_IPV6() ? 2 : 0, &error);
      else
            result = dcc_get_host(nmp->hostname,
                              DCC_INFO_IPV6() ? 2 : 0, &error);
      if (!result) {
            dcc_pemsg(EX_NOHOST, emsg, "%s: %s",
                    nmp->hostname, DCC_HSTRERROR(error));
            dcc_host_unlock();
            return;
      }
      nmp->defined = 1;
      get_addrs(new_addrs, num_new_addrs, cur, nmp, nm_inx,
              name_addrs, nmp->port);
      dcc_host_unlock();
}



/* resolve server hostnames again
 *      all 4 locks must be held */
static u_char                       /* 0=no good addresses, 1=at least 1 */
dcc_clnt_resolve(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
      DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS];
      int num_new_addrs;
      int nm_inx;
      NAME_ADDRS name_addrs;
      DCC_SRVR_ADDR *new_ap, *cur_ap;
      int i;

      have_info_lock();
      have_resolve_lock();

      if (dcc_clnt_debug > 1)
            dcc_trace_msg("resolve %s server host names",
                        DCC_IS_GREY_STR(class));

      memset(new_addrs, 0, sizeof(new_addrs));
      num_new_addrs = 0;
      memset(&name_addrs, 0, sizeof(name_addrs));

      if (dcc_clnt_info->residue == 0) {
            dcc_clnt_info->residue = dcc_clnt_hid % DCC_SRVRS_MOD;
            if (dcc_clnt_info->residue == 0)
                  dcc_clnt_info->residue = 1;
      }

      if (emsg)
            *emsg = '\0';
      for (nm_inx = 0; nm_inx < DIM(class->nms); ++nm_inx)
            dcc_clnt_resolve_host(emsg, new_addrs, &num_new_addrs,
                              class, &class->nms[nm_inx], nm_inx,
                              name_addrs);

      class->resolve = ctxt->now.tv_sec+DCC_RE_RESOLVE;

      /* see if anything changed */
      i = num_new_addrs;
      if (i != 0 && i == class->num_addrs) {
            new_ap = new_addrs;
            cur_ap = class->addrs;
            for (;;) {
                  if (!i)
                        return 1;   /* nothing changed */
                  if (new_ap->nm_inx != cur_ap->nm_inx
                      || memcmp(&new_ap->ip, &cur_ap->ip,
                              sizeof(new_ap->ip))) {
                        ++class->gen;
                        break;
                  }
                  ++new_ap;
                  ++cur_ap;
                  --i;
            }
      }

      /* Something changed.
       * Get the new values and arrange to recompute RTTs */
      class->act_inx = DCC_NO_SRVR;
      class->avg_thold_rtt = -DCC_RTT_BAD;
      class->measure = 0;
      memcpy(&class->addrs, &new_addrs, sizeof(class->addrs));
      class->num_addrs = num_new_addrs;

      if (!class->num_addrs) {
            /* failed to resolve any server host names */
            class->resolve = 0;
            if (emsg && *emsg == '\0')
                  dcc_pemsg(EX_USAGE, emsg,
                          "no valid %s server hostnames",
                          DCC_IS_GREY_STR(class));
            return 0;
      }
      return 1;
}



void
dcc_clnt_soc_close(DCC_CLNT_CTXT *ctxt)
{
      if (ctxt->soc == INVALID_SOCKET)
            return;
      if (SOCKET_ERROR == closesocket(ctxt->soc)
          && dcc_clnt_debug)
            dcc_trace_msg("closesocket(ctxt): %s", ERROR_STR());
      ctxt->soc = INVALID_SOCKET;
      ctxt->conn_su.sa.sa_family = AF_UNSPEC;
}



/* disconnect (or close) and (re)open the client
 *    The contexts and shared information must be locked on entry.
 *    The contexts remain locked on failure.  The shared information
 *    is locked only on success. */
u_char                              /* 0=failed to open the socket */
dcc_clnt_soc_reopen(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
      DCC_SOCKU su;
      DCC_SOCKLEN_T soc_len;
      int retries;

      have_info_lock();

      if (ctxt->soc != INVALID_SOCKET)
            dcc_clnt_soc_close(ctxt);

      /* try to bind to the local interface address if
       * we have one and it has changed or it has been
       * some time since we last tried and failed.
       */
      retries = -1;
      if (dcc_clnt_info->src.family == AF_UNSPEC) {
            ctxt->bind_ip = dcc_clnt_info->src;
            ctxt->flags &= ~DCC_CTXT_BAD_SRC;
            ctxt->bind_time = 0;
      } else if (memcmp(&dcc_clnt_info->src, &ctxt->bind_ip,
                    sizeof(dcc_clnt_info->src))
               || !(ctxt->flags & DCC_CTXT_BAD_SRC)
               || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time,
                          DCC_CTXT_REBIND_SECS)) {
            ctxt->bind_ip = dcc_clnt_info->src;
            dcc_ip2su(&su, &ctxt->bind_ip);
            *DCC_SU_PORT(&su) = ctxt->port;
            ctxt->soc = socket(DCC_INFO_IPV6() ? AF_INET6 : AF_INET,
                           SOCK_DGRAM, 0);
            if (!dcc_udp_bind(emsg, &ctxt->soc, &su, &retries)
                || ctxt->soc == INVALID_SOCKET) {
                  dcc_info_unlock(0);
                  ctxt->flags |= DCC_CTXT_BAD_SRC;
                  ctxt->bind_time = (ctxt->start.tv_sec
                                 + DCC_CTXT_REBIND_SECS);
                  return 0;
            }
            ctxt->flags &= ~DCC_CTXT_BAD_SRC;
            ctxt->bind_time = 0;
      }

      /* Try to bind a socket with IPv6 first if allowed.
       * If that doesn't work, try IPv4 */
      if (ctxt->soc == INVALID_SOCKET
          && DCC_INFO_IPV6()
          && !dcc_udp_bind(emsg, &ctxt->soc,
                       dcc_mk_su(&su, AF_INET6, 0, ctxt->port),
                       &retries)) {
            dcc_info_unlock(0);
            return 0;
      }
      if (ctxt->soc == INVALID_SOCKET) {
            dcc_clnt_info->flags &= ~DCC_INFO_FG_IPV6;
            if (!dcc_udp_bind(emsg, &ctxt->soc,
                          dcc_mk_su(&su, AF_INET, 0, ctxt->port),
                          &retries)) {
                  dcc_info_unlock(0);
                  return 0;
            }
      }
#if !defined(USE_POLL) && !defined(DCC_WIN32)
      if (ctxt->soc >= FD_SETSIZE) {
            dcc_info_unlock(0);
            dcc_pemsg(EX_IOERR, emsg, "socket FD %d > FD_SETSIZE %d",
                    ctxt->soc, FD_SETSIZE);
            dcc_clnt_soc_close(ctxt);
            return 0;
      }
#endif

#if defined(IPPROTO_IP) && defined(IP_TTL)
      if (dcc_debug_ttl != 0
          && 0 > setsockopt(ctxt->soc, IPPROTO_IP, IP_TTL,
                        (void *)&dcc_debug_ttl, sizeof(dcc_debug_ttl))) {
            dcc_info_unlock(0);
            dcc_pemsg(EX_IOERR, emsg, "setsockopt(TTL=%d):%s",
                    dcc_debug_ttl, ERROR_STR());
            dcc_clnt_soc_close(ctxt);
            return 0;
      }
#endif

      soc_len = sizeof(su);
      if (0 > getsockname(ctxt->soc, &su.sa, &soc_len)) {
            dcc_info_unlock(0);
            dcc_pemsg(EX_IOERR, emsg, "getsockname(): %s", ERROR_STR());
            dcc_clnt_soc_close(ctxt);
            return 0;
      }
      ctxt->port = *DCC_SU_PORT(&su);
      if (su.sa.sa_family == AF_INET)
            ctxt->flags |= DCC_CTXT_USING_IPV4;
      else
            ctxt->flags &= ~DCC_CTXT_USING_IPV4;
      return 1;
}



static int
do_recv(DCC_CLNT_CTXT *ctxt, DCC_HDR *resp, int resp_len, DCC_SOCKU *sup)
{
      DCC_SOCKLEN_T su_len;

      su_len = sizeof(*sup);
      memset(sup, 0, sizeof(*sup));
      if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
            return Rrecvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0,
                         &sup->sa, &su_len);
      else
            return recvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0,
                        &sup->sa, &su_len);
}



/* clear the socket buffer */
static void
dcc_clnt_soc_flush(DCC_CLNT_CTXT *ctxt)
{
      DCC_HDR hdr;
      DCC_SOCKU su;
      char str[DCC_SU2STR_SIZE];
      int i, j;

      for (j = 0; j < 20; ++j) {
            i = do_recv(ctxt, &hdr, sizeof(hdr), &su);
            if (0 <= i) {
                  if (dcc_clnt_debug > 1 || j > 10)
                        dcc_trace_msg("flush %d stray bytes", i);
                  continue;
            }
            if (DCC_BLOCK_ERROR())
                  return;
            if (DCC_CONNECT_ERRORS()) {
                  if (dcc_clnt_debug > 1 || j > 10)
                        dcc_trace_msg("ignore flushed error: %s",
                                    ERROR_STR());
                  continue;
            }
            dcc_trace_msg("recvfrom(%s): %s",
                        su.sa.sa_family
                        ? dcc_su2str(str, sizeof(str), &su) : "",
                        ERROR_STR());
            return;
      }

      dcc_trace_msg("too many flushed packets or errors");
}



/* connect() to the server
 *    The contexts and shared information must be locked on entry
 *        because we might call dcc_clnt_soc_reopen().
 *    The contexts remain locked on failure.  The shared information
 *    is locked only on success. */
u_char
dcc_clnt_connect(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt,
             const DCC_SOCKU *su)   /* 0=disconnect */
{
      char sustr[DCC_SU2STR_SIZE];
      u_char was_connected;

      have_info_lock();

      /* disconnect if asked
       *    In theory you can use connect() with a "null address."
       *    In practice on some systems there is more than one or even
       *    no  notion of an effective "null" address. */
      if (!su) {
            if (ctxt->conn_su.sa.sa_family == AF_UNSPEC)
                  return 1;
            return dcc_clnt_soc_reopen(emsg, ctxt);
      }

      /* already properly connected */
      if (!memcmp(&ctxt->conn_su, su, sizeof(ctxt->conn_su)))
            return 1;

#ifdef linux
      if (ctxt->conn_su.sa.sa_family != AF_UNSPEC) {
            /* at least some versions of Linux do not allow
             * connsecutive valid calls to connect(),
             * but work if the socket is first disconnected */
            if (!dcc_clnt_soc_reopen(emsg, ctxt))
                  return 0;
      }
#endif /* linux */

      was_connected = (ctxt->conn_su.sa.sa_family != AF_UNSPEC);

      if (SOCKET_ERROR == connect(ctxt->soc, &su->sa, DCC_SU_LEN(su))) {
            dcc_pemsg(EX_IOERR, emsg, "connect(%s): %s",
                    dcc_su2str(sustr, sizeof(sustr), su),
                    ERROR_STR());
            dcc_info_unlock(0);
            dcc_clnt_soc_close(ctxt);
            return 0;
      }
      ctxt->conn_su = *su;

      /* clear ICMP Unreachable errors from previous connections */
      if (was_connected) {
            int err;
            DCC_SOCKLEN_T errlen;
            errlen = sizeof(err);
            getsockopt(ctxt->soc, SOL_SOCKET, SO_ERROR, &err, &errlen);
      }

      dcc_clnt_soc_flush(ctxt);

      return 1;
}



/* send a single DCC message
 * the contexts and the shared information must be locked
 * nothing is unlocked */
static u_char
clnt_xmit(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_XLOG *xlog,
        DCC_SRVR_CLASS *class, DCC_SRVR_ADDR *ap,
        DCC_HDR *msg, int msg_len, u_char connect_ok)
{
      DCC_XLOG_ENTRY *xloge;
      char abuf[80];
      DCC_XLOG_ENTRY *xloge1;
      int i;

      msg->len = htons(msg_len);

      xloge = xlog->next;
      if (xloge > xlog->last)
            abort();
      ++msg->op_nums.t;
      xloge->op_nums = msg->op_nums;
      xloge->addr_inx = ap - class->addrs;
      dcc_mk_su(&xloge->su, ap->ip.family, &ap->ip.u, ap->ip.port);
      xloge->addrs_gen = class->gen;
      xloge->sent_us = ctxt->now_us;
      xloge->op = msg->op;
      xloge->id = class->nms[ap->nm_inx].clnt_id;
      if (xloge->id == 0)
            dcc_logbad(EX_SOFTWARE, "clnt_xmit: zero client-ID");
      if (class->nms[ap->nm_inx].passwd[0] == '\0')
            xloge->id = DCC_ID_ANON;
      msg->sender = htonl(xloge->id);
      if (xloge->id != DCC_ID_ANON) {
            strncpy(xloge->passwd, class->nms[ap->nm_inx].passwd,
                  sizeof(xloge->passwd));
            dcc_sign(xloge->passwd, sizeof(xloge->passwd), msg, msg_len);
      } else {
            xloge->passwd[0] = '\0';
            memset((char *)msg + (msg_len-sizeof(DCC_SIGNATURE)), 0,
                   sizeof(DCC_SIGNATURE));
      }

      /* Use connect() when possible to get ICMP Unreachable messages.
       * It is impossible when talking to more than one server. */
      for (xloge1 = xlog->base; connect_ok && xloge1 < xloge; ++xloge1) {
            if (xloge1->op_nums.t == DCC_OP_NUMS_NULL)
                  continue;
            if (xloge->addr_inx != xloge1->addr_inx) {
                  connect_ok = 0;
                  break;
            }
      }
      if (!dcc_clnt_connect(emsg, ctxt,
                        connect_ok ? &xloge->su : 0))
            return 0;

      if (ctxt->conn_su.sa.sa_family != AF_UNSPEC) {
            if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
                  i = Rsend(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0);
            else
                  i = send(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0);

      } else {
            if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
                  i = Rsendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0,
                            &xloge->su.sa, DCC_SU_LEN(&xloge->su));
            else
                  i = sendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0,
                           &xloge->su.sa, DCC_SU_LEN(&xloge->su));
      }
      ++xlog->cur[ap - class->addrs].xmits;
      if (i == msg_len) {
            if (dcc_clnt_debug > 3)
                  dcc_trace_msg("sent %s to %s",
                              dcc_hdr_op2str(0, 0, msg),
                              addr2str(abuf, sizeof(abuf), class,
                                     class->gen, ap, 0));
            if (xloge < xlog->last) {
                  ++xlog->next;
                  ++xlog->outstanding;
            }
            return 1;
      }
      if (i < 0) {
            dcc_pemsg(EX_IOERR, emsg, "%s(%s): %s",
                    connect_ok ? "send" : "sendto",
                    addr2str(abuf, sizeof(abuf), class,
                           class->gen, ap, 0),
                    ERROR_STR());
      } else {
            dcc_pemsg(EX_IOERR, emsg, "%s(%s)=%d instead of %d",
                    connect_ok ? "send" : "sendto",
                    addr2str(abuf, sizeof(abuf), class,
                           class->gen, ap, 0),
                    i, msg_len);
      }
      return 0;
}



static void
update_rtt(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class, DCC_XLOG_ENTRY *xloge,
         int us)
{
      DCC_SRVR_ADDR *ap;

      /* compute new RTT onl if the map data structure is locked,
       * the clock did not jump,
       * and we're talking about the same hosts */
      if (!info_locked
          || xloge->addrs_gen != class->gen)
            return;

      ap = &class->addrs[xloge->addr_inx];

      if (us < 0)
            us = 0;

      if (us > DCC_MAX_RTT)
            us = DCC_MAX_RTT;

      if (ap->rtt == DCC_RTT_BAD) {
            /* just set the RTT if this is a newly working server */
            ap->rtt = us;
            ap->total_xmits = 0;
            ap->total_resps = 0;
            ap->resp_mem = 0;
            ap->rtt_updated = 0;

      } else if (ctxt->now.tv_sec < ap->rtt_updated + FAST_RTT_SECS) {
            /* adjust the RTT quickly if this is the first
             * measurement in a long time */
            AGE_AVG(ap->rtt, us, 2, 1);
            ap->rtt_updated = ctxt->now.tv_sec;

      } else {
            AGE_AVG(ap->rtt, us, 9, 1);
            ap->rtt_updated = ctxt->now.tv_sec;
      }
}



/* Update response rate and penalize the RTT of servers that failed to respond.
 *    the data must be locked */
static void
resp_rates(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
         u_char measuring)
{
      DCC_SRVR_ADDR *ap;
      DCC_XLOG_ENTRY *xloge;
      const DCC_XLOG_ENTRY *xloge2;
      int us, us2;
      u_char seen;
      int i;

      for (xloge = ctxt->xlog.base; xloge < ctxt->xlog.next; ++xloge) {
            /* ignore responses we've already handled */
            if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
                  continue;

            ap = &class->addrs[xloge->addr_inx];

            /* Update the RTT of this server as if we would have received
             * ia response if we had waited a little longer, unless we
             * would be assuming a faster RTT than its current average.
             *
             * Use the longest of the time spent waiting for this request
             * and the delays of any requests that were answered by the
             * server. */
            us = ctxt->now_us - xloge->sent_us;
            seen = 0;
            for (xloge2=ctxt->xlog.base; xloge2<ctxt->xlog.next; ++xloge2) {
                  if (xloge2->addr_inx != xloge->addr_inx
                      || xloge2 == xloge)
                        continue;
                  if (xloge2->op_nums.t != DCC_OP_NUMS_NULL) {
                        seen = 1;
                        continue;
                  }
                  us2 = ctxt->now_us - xloge2->sent_us;
                  if (us < us2)
                        us = us2;
            }
            /* update the RTT
             * if we waited at least as long as the current RTT
             * or we received at least one response */
            if (ctxt->now_us >= ap->rtt
                && seen)
                  update_rtt(ctxt, class, xloge, us + DCC_DCCD_DELAY);
            xloge->op_nums.t = DCC_OP_NUMS_NULL;
      }

      /* maintain the response rate */
      for (i = 0, ap = class->addrs; i < DIM(ctxt->xlog.cur); ++i, ++ap) {
            if (ap->rtt == DCC_RTT_BAD
                || ctxt->xlog.cur[i].xmits == 0)
                  continue;
            if (measuring) {
                  if (ctxt->xlog.cur[i].resps != 0) {
                        ++ctxt->xlog.working_addrs;
                  } else if (!(ap->resp_mem & ((1<<DCC_MAX_XMITS)-1))) {
                        /* this server is bad if there were no answers
                         * at all for this mesurement cycle */
                        ap->rtt = DCC_RTT_BAD;
                        continue;
                  }
            }
            ap->total_xmits += ctxt->xlog.cur[i].xmits;
            if (ap->total_xmits > DCC_TOTAL_XMITS_MAX)
                  ap->total_xmits = DCC_TOTAL_XMITS_MAX;
            do {
                  ap->total_resps -= (ap->resp_mem
                                  >> (DCC_TOTAL_XMITS_MAX-1));
                  ap->resp_mem <<= 1;
                  if (ctxt->xlog.cur[i].resps != 0) {
                        ap->resp_mem |= 1;
                        ++ap->total_resps;
                        --ctxt->xlog.cur[i].resps;
                  }
            } while (--ctxt->xlog.cur[i].xmits != 0);
      }
}



/* receive a single DCC response
 *      The contexts must be locked.
 *      The mapped or common info ought to be locked, but reception
 *      works if it is not. */
static int      /* -1=fatal error, 0=no data, 1=unreachable, 2=ok */
clnt_recv(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
        DCC_HDR *resp, int resp_len,
        const DCC_HDR *msg, DCC_XLOG *xlog, DCC_XLOG_ENTRY **xlogep)
{
      DCC_SOCKU su;
      DCC_XLOG_ENTRY *xloge, *xloge1;
      DCC_SRVR_ADDR *ap;
      char abuf[80];
      char str[DCC_SU2STR_SIZE];
      int pkt_len;

      *xlogep = 0;
      for (;;) {
            pkt_len = do_recv(ctxt, resp, resp_len, &su);
            if (pkt_len < 0) {
                  /* Stop looking when there are no more packets */
                  if (DCC_BLOCK_ERROR())
                        return 0;

                  /* ignore ICMP Unreachables unless we have a
                   * single outstanding request */
                  if (ctxt->conn_su.sa.sa_family != AF_UNSPEC
                      && DCC_CONNECT_ERRORS()) {
                        for (xloge1 = xlog->base, xloge = 0;
                             xloge1 < xlog->next;
                             ++xloge1) {
                              if (xloge1->op_nums.t==DCC_OP_NUMS_NULL)
                                  continue;
                              if (xloge)
                                  xloge->op_nums.t = DCC_OP_NUMS_NULL;
                              xloge = xloge1;
                        }
                        if (!xloge) {
                              if (dcc_clnt_debug)
                                  dcc_trace_msg("ignore unmatched:"
                                          " %s", ERROR_STR());
                              continue;
                        }
                        if (dcc_clnt_debug)
                              dcc_trace_msg("note recvfrom(%s): %s",
                                          dcc_su2str(str,
                                            sizeof(str),
                                            &ctxt->conn_su),
                                          ERROR_STR());
                        xloge->op_nums.t = DCC_OP_NUMS_NULL;
                        xlog->outstanding = 0;
                        class->addrs[xloge->addr_inx].rtt = DCC_RTT_BAD;
                        ++xlog->cur[xloge->addr_inx].resps;
                        *xlogep = xloge;
                        return 1;
                  }

                  dcc_pemsg(EX_IOERR, emsg, "recvfrom(%s): %s",
                          su.sa.sa_family
                          ? dcc_su2str(str, sizeof(str), &su) : "",
                          ERROR_STR());
                  return -1;
            }

            if (pkt_len > resp_len) {
                  dcc_su2str(str, sizeof(str), &su);
                  trace_bad_packet(xlog, &su, str, (int *)resp,
                               "recv(%s)=%d > %d", str, pkt_len,
                               resp_len);
                  continue;
            }
            if (pkt_len < ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE)) {
                  dcc_su2str(str, sizeof(str), &su);
                  trace_bad_packet(xlog, &su, str, (int *)resp,
                               "recv(%s)=%d < %d", str, pkt_len,
                               ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE));
                  continue;
            }
            if (pkt_len != ntohs(resp->len)) {
                  dcc_su2str(str, sizeof(str), &su);
                  trace_bad_packet(xlog, &su, str, (int *)resp,
                               "recv(%s)=%d but hdr len=%d from %s",
                               str, pkt_len,
                               ntohs(resp->len),
                               dcc_su2str(abuf, sizeof(abuf), &su));
                  continue;
            }

            if (resp->pkt_vers < DCC_PKT_VERSION_MIN
                || resp->pkt_vers > DCC_PKT_VERSION_MAX) {
                  dcc_su2str(str, sizeof(str), &su);
                  trace_bad_packet(xlog, &su, str, (int *)resp,
                               "unrecognized version #%d from %s",
                               resp->pkt_vers, str);
                  continue;
            }

            /* We cannot use the server's apparent IP address because it
             * might be multi-homed and respond with an address other than
             * the address to which we sent.  So use our records of
             * which OP_NUMS was sent to which server address. */
            if (resp->op_nums.r != msg->op_nums.r
                || resp->op_nums.p != msg->op_nums.p
                || resp->op_nums.h != msg->op_nums.h) {
                  if (dcc_clnt_debug)
                        dcc_trace_msg("unmatched response from %s"
                                    " h=%#x/%#x p=%#x/%#x r=%#x/%#x"
                                    " t=%#x",
                                    dcc_su2str(str, sizeof(str), &su),
                                    resp->op_nums.h, msg->op_nums.h,
                                    resp->op_nums. p, msg->op_nums.p,
                                    resp->op_nums.r, msg->op_nums.r,
                                    resp->op_nums.t);
                  continue;
            }

            for (xloge = xlog->base; xloge < xlog->next; ++xloge) {
                  if (resp->op_nums.t == xloge->op_nums.t)
                        break;
            }
            if (xloge >= xlog->next) {
                  if (dcc_clnt_debug)
                        dcc_trace_msg("stray response from %s"
                                    " h=%#x p=%#x r=%#x t=%#x/%#x",
                                    dcc_su2str(str, sizeof(str), &su),
                                    resp->op_nums.h, resp->op_nums.p,
                                    resp->op_nums.r,
                                    resp->op_nums.t, msg->op_nums.t);
                  continue;
            }

            ap = &class->addrs[xloge->addr_inx];

#ifdef CLNT_LOSSES
            if ((++clnt_losses % 5) == 0) {
                  dcc_trace_msg("dropped answer from %s",
                              addr2str(abuf, sizeof(abuf), class,
                                     xloge->addrs_gen, ap, &su));
                  continue;
            }
#endif
            if (xloge->passwd[0] != '\0'
                && dcc_ck_signature(xloge->passwd, sizeof(xloge->passwd),
                              resp, pkt_len)) {
                  /* good signature */
                  if (dcc_clnt_debug > 3)
                        dcc_trace_msg("received response from %s"
                                    " h=%#x p=%#x r=%#x t=%#x/%#x",
                                    dcc_su2str(str, sizeof(str), &su),
                                    resp->op_nums.h, resp->op_nums.p,
                                    resp->op_nums.r,
                                    resp->op_nums.t, msg->op_nums.t);

            } else if (dcc_ck_signature((char *)&xloge->op_nums,
                                  sizeof(xloge->op_nums),
                                  resp, pkt_len)) {
                  /* server did not sign with our password,
                   * but with our transaction numbers */
                  if (xloge->passwd[0] != '\0')
                        dcc_error_msg("%s rejected our password"
                                    " for ID %d",
                                    addr2str(abuf, sizeof(abuf),
                                           class, xloge->addrs_gen,
                                           ap, &su),
                                    xloge->id);
            } else {
                  dcc_error_msg("badly signed %s response from %s",
                              dcc_hdr_op2str(0, 0, msg),
                              addr2str(abuf, sizeof(abuf), class,
                                     xloge->addrs_gen, ap, &su));
                  continue;
            }

            /* don't find the record of this transmission again */
            xloge->op_nums.t = DCC_OP_NUMS_NULL;
            if (xlog->outstanding != 0)
                  --xlog->outstanding;
            ++xlog->cur[xloge->addr_inx].resps;
            *xlogep = xloge;

            /* Notice if multi-homing is involved
             * That is true if the address from which the client answered
             * differs from the address to which we sent */
            if (!(ap->flags & DCC_SRVR_ADDR_MHOME)
                && dcc_cmp_ap2su(ap, &su)) {
                  if (dcc_clnt_debug)
                        dcc_trace_msg("%s multi-homed at %s",
                                    addr2str(abuf, sizeof(abuf),
                                           class, xloge->addrs_gen,
                                           ap, 0),
                                    dcc_su2str(str,sizeof(str), &su));
                  ap->flags |= DCC_SRVR_ADDR_MHOME;
            }

            return 2;
      }
}



/* wait for an answer */
int                           /* -1=error, 0=timeout, 1=ready */
dcc_select_poll(DCC_EMSG emsg,
            SOCKET fd,
            u_char rd,        /* 1=read 0=write */
            int usec)         /* <0=forever until signal */
{
#ifdef USE_POLL
      struct pollfd fds;
      int nfds;
      int delay;

      if (usec < 0)
            delay = -1;
      else
            delay = (usec+999)/1000;

      for (;;) {
            fds.fd = fd;
            /* At least some versions of Linux have POLLRDNORM etc. in
             * asm/poll.h, but with definitions of POLLIN, POLLPRI, etc.
             * that conflict with their definitions in sys/poll.h.
             * Perhaps it is not necessary to check for high or
             * low priority data, but the poll() documentation on
             * some systems says that asking about POLLIN does not
             * say anything about other data */
#ifdef POLLRDNORM
            if (rd)
                  fds.events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
            else
                  fds.events = POLLOUT| POLLWRNORM | POLLWRBAND | POLLPRI;
#else
            if (rd)
                  fds.events = POLLIN;
            else
                  fds.events = POLLOUT;
#endif
            fds.revents = 0;
            nfds = poll(&fds, 1, delay);
            if (nfds >= 0)
                  return nfds;
            if (!DCC_SELECT_NERROR()) {
                  dcc_pemsg(EX_OSERR, emsg, "poll(): %s", ERROR_STR());
                  return -1;
            }
            if (usec < 0)           /* stop forever on a signal */
                  return 0;
      }
#else
      struct timeval delay, *delayp;
      fd_set fds;
      int nfds;

      if (usec < 0) {
            delayp = 0;
      } else {
            us2tv(&delay, usec);
            delayp = &delay;
      }

      FD_ZERO(&fds);
      for (;;) {
            FD_SET(fd, &fds);
            if (rd)
                  nfds = select(fd+1, &fds, 0, 0, delayp);
            else
                  nfds = select(fd+1, 0, &fds, 0, delayp);
            if (nfds >= 0)
                  return nfds;
            if (!DCC_SELECT_NERROR()) {
                  dcc_pemsg(EX_OSERR, emsg, "select(): %s", ERROR_STR());
                  return -1;
            }
            if (usec < 0)           /* stop forever on a signal */
                  return 0;
      }
#endif
}



/* Make initial estimates of the RTT to all known servers
 *      The RTT's help the client pick a server that will respond quickly and
 *      reliably and to know when to retransmit a request that is lost due
 *      to network congestion or bit rot.
 * All 4 locks must be held on entry.  Require the resolving lock so that
 *    only one process or thread sends bursts of NOPs.
 * All 4 locks are held on success
 * Only the info lock is released on failure. */
static u_char                       /* 0=failed, 1=at least 1 good server */
measure_rtt(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
      DCC_SRVR_ADDR *ap;
      DCC_NOP nop;
      union {
          DCC_HDR     hdr;
          DCC_OK      ok;
          DCC_ERROR   error;
      } resp;
      int delay_us, next_xmit;
      int nfds, xmit_num;
      int tgt_addrs;
      DCC_XLOG_ENTRY *xloge;
      char opbuf[DCC_OPBUF], abuf[80];
      u_char vers;
      u_char connect_ok;
      int tgts;
      int i;

      have_info_lock();
      have_resolve_lock();

      /* Send NOP's to all addresses and wait for responses to
       * measure each server's health and RTT.
       * Treat all addresses as if they are of independent hosts */

      memcpy(&nop.hdr, &dcc_clnt_info->proto_hdr, sizeof(nop.hdr));
      /* servers ignore the version on NOPs except to guess what we
       * will accept */
      nop.hdr.pkt_vers = DCC_PKT_VERSION;
      nop.hdr.op_nums.p = getpid();
      nop.hdr.op = DCC_OP_NOP;
      nop.hdr.op_nums.r = ++dcc_clnt_info->proto_hdr.op_nums.r;

      if (!get_now(emsg, ctxt)) {
            dcc_info_unlock(0);
            return 0;
      }
      if (emsg)
            *emsg = '\0';

      tgt_addrs = class->num_addrs;
      if (tgt_addrs <= 0) {
            dcc_pemsg(EX_NOHOST, emsg, "no server addresses");
            dcc_info_unlock(0);
            return 0;
      }
      /* stop waiting for responses when we have enough working servers */
      if (!dcc_all_srvrs && tgt_addrs > 4)
            tgt_addrs = 4;

      memset(&ctxt->xlog, 0, sizeof(ctxt->xlog));
      ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries;
      ctxt->xlog.last = LAST(ctxt->xlog_entries);
      delay_us = 0;
      next_xmit = 0;
      xmit_num = 0;
      /* wait for the responses to the NOPs and retransmit as needed */
      for (;;) {
            /* wait quietly until time to retransmit */
            if (delay_us <= 0) {
                  if (xmit_num >= DCC_MAX_XMITS)
                        break;
                  if (ctxt->xlog.working_addrs >= tgt_addrs) {
                        /* do not retransmit if we have heard from
                         *    enough servers
                         * quit if we have waited at least one RTT */
                        if (xmit_num > 0)
                              break;
                        delay_us = 0;
                        next_xmit = ctxt->now_us;

                  } else {
                        /* get delay & time of next transmission */
                        delay_us = dcc_retrans_time(DCC_MIN_RTT,
                                          xmit_num++);
                        next_xmit = delay_us + ctxt->now_us;

                        connect_ok = 1;
                        tgts = 0;
                        for (i = 0, ap = class->addrs;
                             ap <= LAST(class->addrs);
                             ++i, ++ap) {
                              if (ap->ip.family == 0
                                  || ctxt->xlog.cur[i].resps != 0)
                                  continue;
                              if (ap->flags & DCC_SRVR_ADDR_MHOME)
                                  connect_ok = 0;
                              ++tgts;
                        }
                        /* Use a connected socket early to get
                         *    ICMP error messages from single server.
                         * no connection later to detect multi-homing
                         *    that makes a server appear deaf */
                        if (tgts > 1
                            || xmit_num > DCC_MAX_XMITS/2)
                              connect_ok = 0;
                        for (i = 0, ap = class->addrs;
                             tgts > 0 && ap <= LAST(class->addrs);
                             ++i, ++ap) {
                              if (ap->ip.family == 0
                                  || ctxt->xlog.cur[i].resps != 0)
                                  continue;
                              if (emsg && *emsg != '\0') {
                                  if (dcc_clnt_debug)
                                    dcc_trace_msg("%s", emsg);
                                  *emsg = '\0';
                              }
                              --tgts;
                              if (!clnt_xmit(emsg, ctxt, &ctxt->xlog,
                                           class, ap,
                                           &nop.hdr, sizeof(nop),
                                           connect_ok)) {
                                  dcc_info_unlock(0);
                                  return 0;
                              }
                        }
                  }

                  /* stop if nothing to wait for */
                  if (!ctxt->xlog.outstanding)
                        break;
            }

            if (!dcc_info_unlock(emsg))
                  return 0;
            dcc_ctxts_unlock();
            nfds = dcc_select_poll(emsg, ctxt->soc, 1, delay_us);
            if (nfds < 0) {
                  dcc_ctxts_lock();
                  return 0;
            }
            i = get_now(emsg, ctxt);
            dcc_ctxts_lock();
            /* give up if someone radically changed the local clock */
            if (!i)
                  return 0;
            if (!dcc_info_lock(emsg))
                  return 0;

            if (nfds > 0) {
                  for (;;) {
                        i = clnt_recv(emsg, ctxt, class,
                                    &resp.hdr, sizeof(resp),
                                    &nop.hdr, &ctxt->xlog, &xloge);
                        if (i <= 0)
                              break;

                        if (i == 1) /* otherwise ignore Unreachable */
                              continue;

                        /* record the results of a probe, and notice
                         * if the server is the best so far */
                        ap = &class->addrs[xloge->addr_inx];

                        if (resp.hdr.op != DCC_OP_OK) {
                              if (dcc_clnt_debug)
                                  dcc_trace_msg("RTT NOP answered"
                                          " with %s by %s",
                                          dcc_hdr_op2str(opbuf,
                                              sizeof(opbuf),
                                              &resp.hdr),
                                          addr2str(abuf,
                                              sizeof(abuf),
                                              class,
                                              xloge->addrs_gen,
                                              ap, 0));
                              ap->rtt = DCC_RTT_BAD;
                              continue;
                        }

                        vers = resp.ok.max_pkt_vers;
                        if (vers >= DCC_PKT_VERSION_MAX)
                              vers = DCC_PKT_VERSION_MAX;
                        else if (vers < DCC_PKT_VERSION_MIN)
                              vers = DCC_PKT_VERSION_MIN;
                        ap->srvr_pkt_vers = vers;
                        ap->srvr_id = ntohl(resp.hdr.sender);
                        memcpy(ap->brand, resp.ok.brand,
                               sizeof(ap->brand));
                        ap->srvr_wait = ntohs(resp.ok.qdelay_ms)*1000;

                        update_rtt(ctxt, class, xloge,
                                 ctxt->now_us - xloge->sent_us
                                 + ap->srvr_wait);
                  }
            }

            if (ctxt->xlog.outstanding == 0
                || (ctxt->xlog.working_addrs >= tgt_addrs
                  && xmit_num > 1))
                  next_xmit = ctxt->now_us;
            delay_us = next_xmit - ctxt->now_us;
      }

      resp_rates(ctxt, class, 1);

      if (!pick_srvr(emsg, class)) {
            fail_more(ctxt, class);
            dcc_info_unlock(0);
            return 0;
      }

      /* maintain long term average that is used to switch back to
       * a good server that temporarily goes bad */
      if (class->thold_rtt == DCC_RTT_BAD) {
            /* There is no point in trying to change servers
             * Maybe we have only 1 */
            class->avg_thold_rtt = DCC_RTT_BAD;
      } else if (class->avg_thold_rtt == -DCC_RTT_BAD) {
            /* We are being forced to consider changing servers.
             * The threshold for changing will be based on the RTT
             * for the new server */
            class->avg_thold_rtt = class->base_rtt;
      } else {
            AGE_AVG(class->avg_thold_rtt, class->base_rtt, 9, 1);
      }

      class->measure = ctxt->now.tv_sec+FAST_RTT_SECS;

      /* Several systems do not update the mtimes of files modified with
       * mmap().  Some like BSD/OS delay changing the mtime until the file
       * accessed with read().  Others including filesystems on some
       * versions of Linux apparently never change the mtime. */
      dcc_set_mtime(emsg, dcc_info_nm, info_fd, 0);

      if (emsg && *emsg != '\0') {
            if (dcc_clnt_debug)
                  dcc_trace_msg("%s", emsg);
            *emsg = '\0';
      }

      return 1;
}



/* Get and write-lock common info
 *      The contexts must be locked.
 *      The contexts remain locked on failure.  The shared information
 *        is locked only on success. */
u_char                              /* 0=failed 1=ok */
dcc_clnt_rdy(DCC_EMSG emsg,
           DCC_CLNT_CTXT *ctxt,
           u_char clnt_flags)       /* DCC_CLNT_FG_* */
{
      DCC_SRVR_CLASS *class;
      u_char locked_resolve;

      if (!dcc_info_lock(emsg))
            return 0;

      if (!(clnt_flags & DCC_CLNT_FG_RETRY))
            get_start_time(ctxt);

      /* just fail if things were broken and it's too soon to try again */
      class = DCC_GREY2CLASS(clnt_flags & DCC_CLNT_FG_GREY);
      if (!(clnt_flags & DCC_CLNT_FG_NO_FAIL)
          && !ck_fail_time(emsg, ctxt, class)) {
            dcc_info_unlock(emsg);
            return 0;
      }

      /* re-open the socket if it is closed,
       * or we have switched between IPv4 and IPv6,
       * or if the local address has changed
       * or if the local address was broken and we have not checked
       *    recently */
      if (ctxt->soc == INVALID_SOCKET
          || ((ctxt->flags & DCC_CTXT_USING_IPV4)!=0) != (DCC_INFO_IPV6()==0)
          || (memcmp(&dcc_clnt_info->src, &ctxt->bind_ip,
                   sizeof(dcc_clnt_info->src))
            && (!(ctxt->flags & DCC_CTXT_BAD_SRC)
                || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time,
                           DCC_CTXT_REBIND_SECS)))) {
            if (!dcc_clnt_soc_reopen(emsg, ctxt)) {
                  dcc_info_unlock(emsg);
                  return 0;
            }
      }

      /* try to pick a new server if the current server
       * has become slow or unreliable and the caller cares */
      if (!DCC_HAVE_SRVR(class)
          || (!(clnt_flags & DCC_CLNT_FG_NO_SRVR_OK)
            && (effective_rtt(class, &class->addrs[class->act_inx])
                > class->thold_rtt)))
            pick_srvr(emsg, class);

      /* Check for new IP addresses occassionally
       * If we cannot awaken a separate thread, do it ourself */
      if (DCC_HAVE_SRVR(class)
          && DCC_IS_TIME(ctxt->now.tv_sec, class->resolve, DCC_RE_RESOLVE)
          && !dcc_clnt_wake_resolve())
            class->act_inx = DCC_NO_SRVR;

      locked_resolve = 0;
      if (!DCC_HAVE_SRVR(class)) {
            if (!dcc_clnt_resolve_lock(emsg))
                  return 0;
            locked_resolve = 1;
            /* If after waiting for the resolving lock,
             *    do the work if it is still needed
             * Just fail if things became badly broken while we waited
             *    and it's too soon to try again  */
            if (!get_now(emsg, ctxt)
                || (!(clnt_flags & DCC_CLNT_FG_NO_FAIL)
                  && !ck_fail_time(emsg, ctxt, class))) {
                  dcc_clnt_resolve_unlock(0);
                  dcc_info_unlock(0);
                  return 0;
            }

            /* Things might have been fixed while we waited for the lock.
             * If not, check for new A RRs and then measure RTTs */
            if (!DCC_HAVE_SRVR(class)
                && DCC_IS_TIME(ctxt->now.tv_sec, class->resolve,
                           DCC_RE_RESOLVE)) {
                  class->act_inx = DCC_NO_SRVR;
                  if (!dcc_clnt_resolve(emsg, ctxt, class)) {
                        dcc_clnt_resolve_unlock(0);
                        dcc_info_unlock(0);
                        return 0;
                  }
            }
      }

      /* We might have switched to the current server when our
       * best server became slow.
       * If it has been a while, see if our best server is back. */
      if (DCC_HAVE_SRVR(class)
          && !(clnt_flags & DCC_CLNT_FG_NO_SRVR_OK)
          && DCC_IS_TIME(ctxt->now.tv_sec, class->measure, FAST_RTT_SECS)
          && (effective_rtt(class, &class->addrs[class->act_inx])
            > class->avg_thold_rtt)) {
            class->act_inx = DCC_NO_SRVR;
      }

      /* measure the RTTs to all of the servers and pick one */
      if (!DCC_HAVE_SRVR(class)) {
            if (!locked_resolve) {
                  if (!dcc_clnt_resolve_lock(emsg))
                        return 0;
                  locked_resolve = 1;
            }
            /* measure if another thread did not do it already */
            if (!DCC_HAVE_SRVR(class)) {
                  if (!measure_rtt(emsg, ctxt, class)) {
                        if (!(clnt_flags & DCC_CLNT_FG_NO_SRVR_OK)
                            || !dcc_info_lock(emsg)) {
                              dcc_clnt_resolve_unlock(0);
                              return 0;
                        }
                  }
            }
      }

      if (locked_resolve && !dcc_clnt_resolve_unlock(emsg)) {
            dcc_info_unlock(0);
            return 0;
      }

      dcc_clnt_soc_flush(ctxt);
      return 1;
}



/* send an operation to the server and get a response
 *      The operation and response buffers must be distinct, because the
 *        response buffer is changed before the last use of the operation
 *        buffer */
u_char                              /* 0=failed 1=ok */
dcc_clnt_op(DCC_EMSG emsg,
          DCC_CLNT_CTXT *ctxt,
          u_char clnt_flags,        /* DCC_CLNT_FG_* */
          const SRVR_INX *act_inxp, /* null or ptr to server index */
          DCC_SRVR_ID *srvr_idp,    /* ID of server used */
          DCC_SOCKU *resp_su,       /* IP address of server used */
          DCC_HDR *msg, int msg_len, DCC_OPS op,
          DCC_HDR *resp, int resp_max_len)
{
      DCC_SRVR_CLASS *class;
      DCC_SRVR_ADDR *cur_addr;
      int addrs_gen;
      union {
          DCC_HDR     hdr;
          DCC_ANSWER    qa;
          DCC_ADMN_RESP aa;
#ifdef DCC_PKT_VERSION4
          DCC_REPORT    old_report;
#endif
          char    c[80];
      } buf;
      DCC_XLOG_ENTRY *xloge;
      int act_inx, xmit_num;
      int next_xmit, us, remaining, nfds;
      u_char unreachable, gotit;
      int i;

      if (emsg)
            *emsg = '\0';
      dcc_ctxts_lock();
      if (!dcc_clnt_info
          && !dcc_map_info(emsg, 0, -1)) {
            dcc_ctxts_unlock();
            if (srvr_idp)
                  *srvr_idp = DCC_ID_INVALID;
            return 0;
      }
      /* get & lock common info */
      if (!dcc_clnt_rdy(emsg, ctxt, clnt_flags)) {
            dcc_ctxts_unlock();
            if (srvr_idp)
                  *srvr_idp = DCC_ID_INVALID;
            return 0;
      }
      class = DCC_GREY2CLASS(clnt_flags & DCC_CLNT_FG_GREY);

      if (resp_max_len > ISZ(buf))
            resp_max_len = ISZ(buf);

      /* use server that the caller wants,
       * if the caller specified the valid index of a server */
      if (!act_inxp
          || (act_inx = *act_inxp) >= class->num_addrs)
            act_inx = class->act_inx;

      cur_addr = &class->addrs[act_inx];
      if (srvr_idp)
            *srvr_idp = cur_addr->srvr_id;
      if (resp_su)
            dcc_ip2su(resp_su, &cur_addr->ip);
      addrs_gen = class->gen;

      ++dcc_clnt_info->proto_hdr.op_nums.r;
      memcpy(msg, &dcc_clnt_info->proto_hdr, sizeof(*msg));
      if (cur_addr->srvr_pkt_vers > DCC_PKT_VERSION_MAX
          || cur_addr->srvr_pkt_vers < DCC_PKT_VERSION_MIN) {
            dcc_pemsg(EX_DATAERR, emsg, "impossible pkt_vers %d for %s",
                    cur_addr->srvr_pkt_vers,
                    addr2str(buf.c, sizeof(buf.c), class,
                           addrs_gen, cur_addr, 0));
            dcc_info_unlock(0);
            dcc_ctxts_unlock();
            if (srvr_idp)
                  *srvr_idp = DCC_ID_INVALID;
            return 0;
      }
#ifdef DCC_PKT_VERSION4

      /* convert new report to old */
      if (cur_addr->srvr_pkt_vers == DCC_PKT_VERSION4
          && op == DCC_OP_REPORT
          && (ntohl(((DCC_REPORT *)msg)->tgts) & DCC_TGTS_SPAM)) {
            memcpy(&buf.old_report, msg, msg_len);
            buf.old_report.tgts = htonl(DCC_TGTS_TOO_MANY);
            msg = &buf.old_report.hdr;
      }

#endif
      msg->pkt_vers = cur_addr->srvr_pkt_vers;
      msg->op_nums.p = getpid();
      msg->op = op;
      gotit = 0;
      unreachable = 0;

      /* The measured RTTs to servers helps the client pick a server
       * that will respond quickly and reliably and to know when to
       * retransmit a request that is lost due to network congestion or
       * bit rot.
       *
       * It is desirable for a client to concentrate its reports to
       * a single server.  That makes detecting spam by this and other
       * clients quicker.
       *
       * A client should retransmit when its initial transmission is lost
       * due to bit rot or congestion.  In case the loss is due to
       * congestion, it should retransmit only a limited number of
       * times and with increasing delays between retransmissions.
       *
       * It is more important that some requests from clients reach
       * a DCC server than others.  Most DCC checksum reports are not about
       * spam, and so it is best to not spend too much network bandwidth
       * retransmitting checksum reports or to delay the processing of the
       * messages. Administrative commands must be tried harder.
       * Therefore, let the caller of this routine decide whether to retry.
       * This routine merely increases the measured RTT after failures. */

      memset(&ctxt->xlog, 0, sizeof(ctxt->xlog));
      ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries;
      ctxt->xlog.last = LAST(ctxt->xlog_entries);
      xmit_num = 0;
      next_xmit = ctxt->now_us;

      /* Transmit, wait for a response, and retransmit if needed.
       * The initial transmission is done as if it were a retransmission. */
      for (;;) {
            us = next_xmit - ctxt->now_us;
            if (us <= 0) {
                  if (xmit_num >= DCC_MAX_XMITS)
                        break;

                  /* stop if we don't have enough time to wait */
                  us = dcc_retrans_time(cur_addr->rtt, xmit_num);
                  remaining =  DCC_MAX_DELAY - ctxt->now_us;
                  if (us > remaining)
                        break;

                  /* wait as long as possible on the last try */
                  if (++xmit_num == DCC_MAX_XMITS
                      && us < DCC_MAX_RTT) {
                        if (remaining > DCC_MAX_RTT)
                              us = DCC_MAX_RTT;
                        else
                              us = remaining;
                  }
                  next_xmit = us + ctxt->now_us;

                  /* because of the flooding algorithm among DCC servers,
                   * it is important that only a single server receive
                   * reports of the checksums for a mail message.
                   * That implies that retransmissions of reports must
                   * go to the original server, even if some other local
                   * client has re-resolved hostnames or switched
                   * to a better server.
                   * And that means we should not retransmit
                   * if the server address table is changed. */
                  if (addrs_gen != class->gen)
                        break;

                  /* use a connected socket early to get port
                   * unreachable ICMP error messages, but do not
                   * connect later to detect multi-homing */
                  if (!clnt_xmit(emsg, ctxt, &ctxt->xlog,
                               class, cur_addr, msg, msg_len,
                               !(cur_addr->flags & DCC_SRVR_ADDR_MHOME)
                               && xmit_num < DCC_MAX_XMITS/2
                               && ctxt->now_us <= DCC_MAX_DELAY/2))
                        break;
            }

            /* release the mapped info while we wait for an answer */
            if (!dcc_info_unlock(emsg)) {
                  dcc_ctxts_unlock();
                  if (srvr_idp)
                        *srvr_idp = DCC_ID_INVALID;
                  return 0;
            }
            dcc_ctxts_unlock();
            nfds = dcc_select_poll(emsg, ctxt->soc, 1, us);
            if (nfds < 0) {
                  /* note error, but we may already have an answer */
                  dcc_ctxts_lock();
                  class = DCC_GREY2CLASS(clnt_flags & DCC_CLNT_FG_GREY);
                  break;
            }
            if (!get_now(emsg, ctxt))
                  return 0;       /* simply give up if time jumped */

            /* recover the lock so that we can record the result of the
             * newly arrived answer in the shared and mapped file */
            dcc_ctxts_lock();
            class = DCC_GREY2CLASS(clnt_flags & DCC_CLNT_FG_GREY);
            if (!dcc_info_lock(emsg)) {
                  dcc_ctxts_unlock();
                  if (srvr_idp)
                        *srvr_idp = DCC_ID_INVALID;
                  return 0;
            }

            if (nfds > 0) {
                  for (;;) {
                        i = clnt_recv(emsg, ctxt, class, &buf.hdr,
                                    min(ISZ(buf), resp_max_len),
                                    msg, &ctxt->xlog, &xloge);
                        if (i <= 0)
                              break;
                        if (i == 1) {
                              /* stop delaying after the first
                               * ICMP Unreachable message,
                               * but collect everything that has
                               * already arrived */
                              unreachable = 1;
                              continue;
                        }

                        update_rtt(ctxt, class, xloge,
                                 ctxt->now_us - xloge->sent_us
                                 + ((xloge->op != DCC_OP_REPORT
                                     && xloge->op != DCC_OP_QUERY)
                                    ? cur_addr->srvr_wait : 0));

                        /* save the last answer we get */
                        memcpy(resp, &buf, ntohs(buf.hdr.len));
                        gotit = 1;
                  }
                  if (i < 0 || unreachable || gotit)
                        break;
            }
      }

      /* penalize server for lost packets */
      resp_rates(ctxt, class, 0);

      /* fail if the server did not answer at all */
      if (!gotit) {
#if 0
            system("./abort_dccd");
#endif
            if (dcc_clnt_debug
                && emsg && *emsg != '\0')
                  dcc_trace_msg("%s", emsg);
            dcc_pemsg(EX_TEMPFAIL, emsg, "no %s answer from %s after %d ms",
                    DCC_IS_GREY_STR(class),
                    addr2str(buf.c, sizeof(buf.c), class,
                           addrs_gen, cur_addr, 0),
                    ctxt->now_us/1000);
            /* Since we got no answer at all, look for a different server.
             * If we can't find any server or a different server
             * or if we have already spent too much time,
             * then don't try again for a while to not delay the MTA.
             * If we find another server, then return the valid server-ID
             * of the non-responsive server to let the caller know that it
             * can try again immediately. */
            if (act_inxp && act_inx == *act_inxp) {
                  /* but only if not using a caller-specified server */
                  if (srvr_idp)
                        *srvr_idp = DCC_ID_INVALID;
            } else if (!pick_srvr(0, class) || act_inx == class->act_inx) {
                  if (srvr_idp) {
                        if (dcc_clnt_debug)
                              dcc_trace_msg("no better alternate");
                        *srvr_idp = DCC_ID_INVALID;
                  }
                  fail_more(ctxt, class);
            } else if (i=dcc_retrans_time(class->addrs[class->act_inx].rtt,
                                    0),
                     ctxt->now_us + i >= DCC_MAX_DELAY) {
                  if (srvr_idp) {
                        if (dcc_clnt_debug)
                              dcc_trace_msg("alternate too slow with"
                                          " retrans %d ms after "
                                          "%d ms",
                                          i/1000,
                                          ctxt->now_us/1000);
                        *srvr_idp = DCC_ID_INVALID;
                  }
                  fail_more(ctxt, class);
            }
            dcc_info_unlock(0);
            dcc_ctxts_unlock();
            return 0;
      }

      if (!dcc_info_unlock(emsg)) {
            dcc_ctxts_unlock();
            if (srvr_idp)
                  *srvr_idp = DCC_ID_INVALID;
            return 0;
      }
      dcc_ctxts_unlock();

      if (dcc_clnt_debug
          && emsg && *emsg != '\0') {
            dcc_trace_msg("%s", emsg);
            *emsg = '\0';
      }
      return 1;
}

Generated by  Doxygen 1.6.0   Back to index