size_t untrustedChunks()

in glean/rts/string.cpp [39:81]


size_t untrustedChunks(folly::ByteRange range, Chunk&& chunk) {
  const unsigned char * const p = range.data();
  const size_t size = range.size();

  assert(p != nullptr);

  int i;
  for (i = 0; i < size && p[i] > 0 && p[i] < 0x80; ++i) {}
  if (i+1 < size && p[i] == 0 && p[i+1] == 0) {
    chunk(p, i);
    return i+2;
  }

  int k = 0;
  while (true) {
    UChar c;
    // NOTE: U8_NEXT returns c<0 on overlong (invalid) points so this doesn't
    // transcode (and we don't have to worry about, say, overlong \NUL).
    U8_NEXT(p, i, size, c);
    if (c == 0) {
      if (i < size) {
        switch (p[i]) {
          case 0:
            chunk(p+k, i-k-1);
            return i+1;

          case 1:
            chunk(p+k, i-k);
            ++i;
            k = i;
            break;

          default:
            rts::error("invalid NUL in mangled string");
        }
      } else {
        rts::error("truncated terminator in mangled string");
      }
    } else if (c < 0) {
      rts::error("invalid UTF-8 string");
    }
  }
}