static Py_ssize_t _encode()

in eden/scm/edenscm/mercurial/cext/pathencode.c [198:484]


static Py_ssize_t _encode(
    const uint32_t twobytes[8],
    const uint32_t onebyte[8],
    char* dest,
    Py_ssize_t destlen,
    size_t destsize,
    const char* src,
    Py_ssize_t len,
    int encodedir) {
  enum path_state state = START;
  Py_ssize_t i = 0;

  /*
   * Python strings end with a zero byte, which we use as a
   * terminal token as they are not valid inside path names.
   */

  while (i < len) {
    switch (state) {
      case START:
        switch (src[i]) {
          case '/':
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case '.':
            state = LDOT;
            escape3(dest, &destlen, destsize, src[i++]);
            break;
          case ' ':
            state = DEFAULT;
            escape3(dest, &destlen, destsize, src[i++]);
            break;
          case 'a':
            state = A;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'c':
            state = C;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'l':
            state = L;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'n':
            state = N;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'p':
            state = P;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          default:
            state = DEFAULT;
            break;
        }
        break;
      case A:
        if (src[i] == 'u') {
          state = AU;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case AU:
        if (src[i] == 'x') {
          state = THIRD;
          i++;
        } else
          state = DEFAULT;
        break;
      case THIRD:
        state = DEFAULT;
        switch (src[i]) {
          case '.':
          case '/':
          case '\0':
            escape3(dest, &destlen, destsize, src[i - 1]);
            break;
          default:
            i--;
            break;
        }
        break;
      case C:
        if (src[i] == 'o') {
          state = CO;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case CO:
        if (src[i] == 'm') {
          state = COMLPT;
          i++;
        } else if (src[i] == 'n') {
          state = THIRD;
          i++;
        } else
          state = DEFAULT;
        break;
      case COMLPT:
        switch (src[i]) {
          case '1':
          case '2':
          case '3':
          case '4':
          case '5':
          case '6':
          case '7':
          case '8':
          case '9':
            state = COMLPTn;
            i++;
            break;
          default:
            state = DEFAULT;
            charcopy(dest, &destlen, destsize, src[i - 1]);
            break;
        }
        break;
      case COMLPTn:
        state = DEFAULT;
        switch (src[i]) {
          case '.':
          case '/':
          case '\0':
            escape3(dest, &destlen, destsize, src[i - 2]);
            charcopy(dest, &destlen, destsize, src[i - 1]);
            break;
          default:
            memcopy(dest, &destlen, destsize, &src[i - 2], 2);
            break;
        }
        break;
      case L:
        if (src[i] == 'p') {
          state = LP;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case LP:
        if (src[i] == 't') {
          state = COMLPT;
          i++;
        } else
          state = DEFAULT;
        break;
      case N:
        if (src[i] == 'u') {
          state = NU;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case NU:
        if (src[i] == 'l') {
          state = THIRD;
          i++;
        } else
          state = DEFAULT;
        break;
      case P:
        if (src[i] == 'r') {
          state = PR;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case PR:
        if (src[i] == 'n') {
          state = THIRD;
          i++;
        } else
          state = DEFAULT;
        break;
      case LDOT:
        switch (src[i]) {
          case 'd':
          case 'i':
            state = HGDI;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'h':
            state = H;
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          default:
            state = DEFAULT;
            break;
        }
        break;
      case DOT:
        switch (src[i]) {
          case '/':
          case '\0':
            state = START;
            memcopy(dest, &destlen, destsize, "~2e", 3);
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'd':
          case 'i':
            state = HGDI;
            charcopy(dest, &destlen, destsize, '.');
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          case 'h':
            state = H;
            memcopy(dest, &destlen, destsize, ".h", 2);
            i++;
            break;
          default:
            state = DEFAULT;
            charcopy(dest, &destlen, destsize, '.');
            break;
        }
        break;
      case H:
        if (src[i] == 'g') {
          state = HGDI;
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case HGDI:
        if (src[i] == '/') {
          state = START;
          if (encodedir)
            memcopy(dest, &destlen, destsize, ".hg", 3);
          charcopy(dest, &destlen, destsize, src[i++]);
        } else
          state = DEFAULT;
        break;
      case SPACE:
        switch (src[i]) {
          case '/':
          case '\0':
            state = START;
            memcopy(dest, &destlen, destsize, "~20", 3);
            charcopy(dest, &destlen, destsize, src[i++]);
            break;
          default:
            state = DEFAULT;
            charcopy(dest, &destlen, destsize, ' ');
            break;
        }
        break;
      case DEFAULT:
        while (inset(onebyte, src[i])) {
          charcopy(dest, &destlen, destsize, src[i++]);
          if (i == len)
            goto done;
        }
        switch (src[i]) {
          case '.':
            state = DOT;
            i++;
            break;
          case ' ':
            state = SPACE;
            i++;
            break;
          case '/':
            state = START;
            charcopy(dest, &destlen, destsize, '/');
            i++;
            break;
          default:
            if (inset(onebyte, src[i])) {
              do {
                charcopy(dest, &destlen, destsize, src[i++]);
              } while (i < len && inset(onebyte, src[i]));
            } else if (inset(twobytes, src[i])) {
              char c = src[i++];
              charcopy(dest, &destlen, destsize, '_');
              charcopy(dest, &destlen, destsize, c == '_' ? '_' : c + 32);
            } else
              escape3(dest, &destlen, destsize, src[i++]);
            break;
        }
        break;
    }
  }
done:
  return destlen;
}