Logo Search packages:      
Sourcecode: dcc version File versions  Download package

ckfuz1.c

/* Distributed Checksum Clearinghouse
 *
 * compute fuzzy body checksum #1
 *
 * Copyright (c) 2005 by Rhyolite Software
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.2.74-1.37 $Revision$
 */

#include "ck.h"

#define FZ1  cks->fuz1

#define MAX_FUZ1_LEN    (4*1024)


void
dcc_ck_fuz1_init(DCC_GOT_CKS *cks)
{
      cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1;
      cks->sums[DCC_CK_FUZ1].tgts = DCC_TGTS_INVALID;
      cks->sums[DCC_CK_FUZ1].rpt = 0;
      FZ1.total = 0;                /* bytes summed */
      FZ1.eol = FZ1.cp = FZ1.buf;
      FZ1.url.st = DCC_URL_ST_IDLE;

      MD5Init(&FZ1.md5);
}



static inline u_char                /* 0=keep the line, 1=discard it */
dear_sucker(const char *cp, u_int llen)
{
#define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, sizeof(w)-1))

      if (CK_WORD("dear"))
            return 1;
      if (CK_WORD("hello"))
            return 1;
      if (CK_WORD("greeting"))
            return 1;
      if (CK_WORD("date"))
            return 1;

      return 0;
#undef CKWORD
}



static inline u_char
add_sum(DCC_GOT_CKS *cks, int len)
{
      int i;

      if (!len)
            return 1;

      /* ignore the end of very long spam, since
       * it is likely to make the checksum differ */
      i = MAX_FUZ1_LEN - (FZ1.total + len);
      if (i < 0)
            len += i;
      MD5Update(&FZ1.md5, FZ1.buf, len);
      return (FZ1.total += len) < MAX_FUZ1_LEN;
}



void
dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
{
      char *cp;
      DNSBL_WORK* dnsbl;
      int i, len, c;

      if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
            return;

      if (FZ1.total >= MAX_FUZ1_LEN)
            return;

      cp = FZ1.cp;

      for (;;) {
            if (bp_len == 0) {
                  /* Sum the buffer if it ends with a line.  Note that
                   * every message always ends with an artificial "\n". */
                  if (FZ1.eol == cp) {
                        add_sum(cks, cp - FZ1.buf);
                        FZ1.eol = cp = FZ1.buf;
                  }
                  FZ1.cp = cp;
                  return;
            }
            --bp_len;
            c = *bp++;

            i = dcc_ck_url(&FZ1.url, c, &cp);
            c = i>>DCC_CK_URL_SHIFT;
            switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) {
            case DCC_CK_URL_CHAR:
                  break;
            case DCC_CK_URL_CK_LEN:
                  /* Make room if we are too close to the end of
                   * the buffer for a maximum size URL */
                  if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) {
                        if (!FZ1.eol
                            || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) {
                              if (!add_sum(cks, cp - FZ1.buf))
                                  return;
                              FZ1.eol = 0;
                              cp = FZ1.buf;
                        } else {
                              len = FZ1.eol - FZ1.buf;
                              if (!add_sum(cks, len))
                                  return;
                              memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol);
                              FZ1.eol = FZ1.buf;
                              cp -= len;
                        }
                  }
                  if ((dnsbl = cks->dnsbl) != 0
                      && dnsbl->hit == DNSBL_HIT_NONE)
                        dnsbl->dom_len = 0;
                  continue;
            case DCC_CK_URL_HOST:
            case DCC_CK_URL_DOT:
                  if ((dnsbl = cks->dnsbl) != 0
                      && dnsbl->hit == DNSBL_HIT_NONE
                      && dnsbl->dom_len<ISZ(dnsbl->dom)-1)
                        dnsbl->dom[dnsbl->dom_len++] = c;
                  break;
            case DCC_CK_URL_HOST_END:
                  dcc_dnsbl_url(cks->dnsbl);
                  break;
            case DCC_CK_URL_HOST_RESET:
                  if ((dnsbl = cks->dnsbl) != 0
                      && !dnsbl->hit == DNSBL_HIT_NONE)
                        dnsbl->dom_len = 0;
                  break;
            case DCC_CK_URL_SKIP:
                  continue;
            }

            /* collect only ASCII letters */
            if (c >= 'a' && c <= 'z') {
                  /* Collect more of a new line */
                  *cp = c;
                  if (++cp < &FZ1.buf[sizeof(FZ1.buf)])
                        continue;

                  /* We are at the end of the buffer,
                   * so add it to the checksum */
                  if (!add_sum(cks, cp - FZ1.buf))
                        return;
                  cp = FZ1.buf;
                  FZ1.eol = 0;
                  continue;
            }

            if (c == '\n') {
                  /* Ignore short lines starting with some strings */
                  if (FZ1.eol
                      && (len = cp - FZ1.eol) > 0
                      && len <= DCC_FUZ1_MAX_LINE
                      && dear_sucker(FZ1.eol, len)) {
                        cp = FZ1.eol;
                        continue;
                  }

                  /* Add the line to the checksum if we do not
                   * have room in the buffer for another line */
                  if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE
                                          + DCC_HTTPS_LEN)]) {
                        if (!add_sum(cks, cp - FZ1.buf))
                              return;
                        cp = FZ1.buf;
                  }
                  FZ1.eol = cp;
            }
      }
}



void
dcc_ck_fuz1_fin(DCC_GOT_CKS *cks)
{
      if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
            return;

      /* we cannot compute a checksum on an empty or nearly empty message */
      if (FZ1.total < 30) {
            cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID;
            return;
      }

      MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5);
      cks->sums[DCC_CK_FUZ1].rpt = 1;
      cks->flags |= DCC_CKS_HAVE_SUM;
}

Generated by  Doxygen 1.6.0   Back to index