public static ByteString unescapeBytes()

in tajo-common/src/main/java/org/apache/tajo/datum/protobuf/TextUtils.java [381:457]


    public static ByteString unescapeBytes(final CharSequence charString) {
      // First convert the Java characater sequence to UTF-8 bytes.
      ByteString input = ByteString.copyFromUtf8(charString.toString());
      // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
      // escapes can all be expressed with ASCII characters, so it is safe to
      // operate on bytes here.
      //
      // Unescaping the input byte array will result in a byte sequence that's no
      // longer than the input.  That's because each escape sequence is between
      // two and four bytes long and stands for a single byte.
      final byte[] result = new byte[input.size()];
      int pos = 0;
      for (int i = 0; i < input.size(); i++) {
        byte c = input.byteAt(i);
        if (c == '\\') {
          if (i + 1 < input.size()) {
            ++i;
            c = input.byteAt(i);
            if (isOctal((char)c)) {
              // Octal escape.
              int code = digitValue((char) c);
              if (i + 1 < input.size() && isOctal((char) input.byteAt(i + 1))) {
                ++i;
                code = code * 8 + digitValue((char) input.byteAt(i));
              }
              if (i + 1 < input.size() && isOctal((char) input.byteAt(i + 1))) {
                ++i;
                code = code * 8 + digitValue((char) input.byteAt(i));
              }
              // TODO: Check that 0 <= code && code <= 0xFF.
              result[pos++] = (byte)code;
            } else {
              switch (c) {
                case 'a' : result[pos++] = 0x07; break;
                case 'b' : result[pos++] = '\b'; break;
                case 'f' : result[pos++] = '\f'; break;
                case 'n' : result[pos++] = '\n'; break;
                case 'r' : result[pos++] = '\r'; break;
                case 't' : result[pos++] = '\t'; break;
                case 'v' : result[pos++] = 0x0b; break;
                case '\\': result[pos++] = '\\'; break;
                case '\'': result[pos++] = '\''; break;
                case '"' : result[pos++] = '\"'; break;

                case 'x':
                  // hex escape
                  int code = 0;
                  if (i + 1 < input.size() && isHex((char) input.byteAt(i + 1))) {
                    ++i;
                    code = digitValue((char) input.byteAt(i));
                  } else {
                    throw new IllegalArgumentException(
                        "Invalid escape sequence: '\\x' with no digits");
                  }
                  if (i + 1 < input.size() && isHex((char) input.byteAt(i + 1))) {
                    ++i;
                    code = code * 16 + digitValue((char) input.byteAt(i));
                  }
                  result[pos++] = (byte)code;
                  break;

                default:
                  throw new IllegalArgumentException(
                      "Invalid escape sequence: '\\" + (char)c + '\'');
              }
            }
          } else {
            throw new IllegalArgumentException(
                "Invalid escape sequence: '\\' at end of string.");
          }
        } else {
          result[pos++] = c;
        }
      }

      return ByteString.copyFrom(result, 0, pos);
    }