uint32_t w_hash_bytes()

in watchman/hash.cpp [103:347]


uint32_t w_hash_bytes(const void* key, size_t length, uint32_t initval) {
  uint32_t a, b, c; /* internal state */
  union {
    const void* ptr;
    size_t i;
  } u; /* needed for Mac Powerbook G4 */

  /* Set up the internal state */
  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

  u.ptr = key;
  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
    const uint32_t* k = (const uint32_t*)key; /* read 32-bit chunks */
    const uint8_t* k8;

    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
    while (length > 12) {
      a += k[0];
      b += k[1];
      c += k[2];
      mix(a, b, c);
      length -= 12;
      k += 3;
    }
    /*----------------------------- handle the last (probably partial) block */
    /*
     * "k[2]&0xffffff" actually reads beyond the end of the string, but
     * then masks off the part it's not allowed to read.  Because the
     * string is aligned, the masked-off tail is in the same word as the
     * rest of the string.  Every machine with memory protection I've seen
     * does it on word boundaries, so is OK with this.  But VALGRIND and ASAN
     * will still catch it and complain.  The masking trick does make the hash
     * noticeably faster for short strings (like English words).
     */
    if (!WATCHMAN_ASAN && !running_on_valgrind()) {
      switch (length) {
        case 12:
          c += k[2];
          b += k[1];
          a += k[0];
          break;
        case 11:
          c += k[2] & 0xffffff;
          b += k[1];
          a += k[0];
          break;
        case 10:
          c += k[2] & 0xffff;
          b += k[1];
          a += k[0];
          break;
        case 9:
          c += k[2] & 0xff;
          b += k[1];
          a += k[0];
          break;
        case 8:
          b += k[1];
          a += k[0];
          break;
        case 7:
          b += k[1] & 0xffffff;
          a += k[0];
          break;
        case 6:
          b += k[1] & 0xffff;
          a += k[0];
          break;
        case 5:
          b += k[1] & 0xff;
          a += k[0];
          break;
        case 4:
          a += k[0];
          break;
        case 3:
          a += k[0] & 0xffffff;
          break;
        case 2:
          a += k[0] & 0xffff;
          break;
        case 1:
          a += k[0] & 0xff;
          break;
        case 0:
          return c; /* zero length strings require no mixing */
      }

    } else {
      /* make valgrind and ASAN happy */

      k8 = (const uint8_t*)k;
      switch (length) {
        case 12:
          c += k[2];
          b += k[1];
          a += k[0];
          break;
        case 11:
          c += ((uint32_t)k8[10]) << 16; /* fall through */
        case 10:
          c += ((uint32_t)k8[9]) << 8; /* fall through */
        case 9:
          c += k8[8]; /* fall through */
        case 8:
          b += k[1];
          a += k[0];
          break;
        case 7:
          b += ((uint32_t)k8[6]) << 16; /* fall through */
        case 6:
          b += ((uint32_t)k8[5]) << 8; /* fall through */
        case 5:
          b += k8[4]; /* fall through */
        case 4:
          a += k[0];
          break;
        case 3:
          a += ((uint32_t)k8[2]) << 16; /* fall through */
        case 2:
          a += ((uint32_t)k8[1]) << 8; /* fall through */
        case 1:
          a += k8[0];
          break;
        case 0:
          return c;
      }
    }

  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
    const uint16_t* k = (const uint16_t*)key; /* read 16-bit chunks */
    const uint8_t* k8;

    /*--------------- all but last block: aligned reads and different mixing */
    while (length > 12) {
      a += k[0] + (((uint32_t)k[1]) << 16);
      b += k[2] + (((uint32_t)k[3]) << 16);
      c += k[4] + (((uint32_t)k[5]) << 16);
      mix(a, b, c);
      length -= 12;
      k += 6;
    }

    /*----------------------------- handle the last (probably partial) block */
    k8 = (const uint8_t*)k;
    switch (length) {
      case 12:
        c += k[4] + (((uint32_t)k[5]) << 16);
        b += k[2] + (((uint32_t)k[3]) << 16);
        a += k[0] + (((uint32_t)k[1]) << 16);
        break;
      case 11:
        c += ((uint32_t)k8[10]) << 16; /* fall through */
      case 10:
        c += k[4];
        b += k[2] + (((uint32_t)k[3]) << 16);
        a += k[0] + (((uint32_t)k[1]) << 16);
        break;
      case 9:
        c += k8[8]; /* fall through */
      case 8:
        b += k[2] + (((uint32_t)k[3]) << 16);
        a += k[0] + (((uint32_t)k[1]) << 16);
        break;
      case 7:
        b += ((uint32_t)k8[6]) << 16; /* fall through */
      case 6:
        b += k[2];
        a += k[0] + (((uint32_t)k[1]) << 16);
        break;
      case 5:
        b += k8[4]; /* fall through */
      case 4:
        a += k[0] + (((uint32_t)k[1]) << 16);
        break;
      case 3:
        a += ((uint32_t)k8[2]) << 16; /* fall through */
      case 2:
        a += k[0];
        break;
      case 1:
        a += k8[0];
        break;
      case 0:
        return c; /* zero length requires no mixing */
    }

  } else { /* need to read the key one byte at a time */
    const uint8_t* k = (const uint8_t*)key;

    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
    while (length > 12) {
      a += k[0];
      a += ((uint32_t)k[1]) << 8;
      a += ((uint32_t)k[2]) << 16;
      a += ((uint32_t)k[3]) << 24;
      b += k[4];
      b += ((uint32_t)k[5]) << 8;
      b += ((uint32_t)k[6]) << 16;
      b += ((uint32_t)k[7]) << 24;
      c += k[8];
      c += ((uint32_t)k[9]) << 8;
      c += ((uint32_t)k[10]) << 16;
      c += ((uint32_t)k[11]) << 24;
      mix(a, b, c);
      length -= 12;
      k += 12;
    }

    /*-------------------------------- last block: affect all 32 bits of (c) */
    switch (length) /* all the case statements fall through */
    {
      case 12:
        c += ((uint32_t)k[11]) << 24; /* fall through */
      case 11:
        c += ((uint32_t)k[10]) << 16; /* fall through */
      case 10:
        c += ((uint32_t)k[9]) << 8; /* fall through */
      case 9:
        c += k[8]; /* fall through */
      case 8:
        b += ((uint32_t)k[7]) << 24; /* fall through */
      case 7:
        b += ((uint32_t)k[6]) << 16; /* fall through */
      case 6:
        b += ((uint32_t)k[5]) << 8; /* fall through */
      case 5:
        b += k[4]; /* fall through */
      case 4:
        a += ((uint32_t)k[3]) << 24; /* fall through */
      case 3:
        a += ((uint32_t)k[2]) << 16; /* fall through */
      case 2:
        a += ((uint32_t)k[1]) << 8; /* fall through */
      case 1:
        a += k[0];
        break;
      case 0:
        return c;
    }
  }

  final(a, b, c);
  return c;
}