in cdk/extra/protobuf/protobuf-3.19.6/src/google/protobuf/stubs/strutil.cc [1718:1951]
int Base64UnescapeInternal(const char *src_param, int szsrc,
char *dest, int szdest,
const signed char* unbase64) {
static const char kPad64Equals = '=';
static const char kPad64Dot = '.';
int decode = 0;
int destidx = 0;
int state = 0;
unsigned int ch = 0;
unsigned int temp = 0;
// If "char" is signed by default, using *src as an array index results in
// accessing negative array elements. Treat the input as a pointer to
// unsigned char to avoid this.
const unsigned char *src = reinterpret_cast<const unsigned char*>(src_param);
// The GET_INPUT macro gets the next input character, skipping
// over any whitespace, and stopping when we reach the end of the
// string or when we read any non-data character. The arguments are
// an arbitrary identifier (used as a label for goto) and the number
// of data bytes that must remain in the input to avoid aborting the
// loop.
#define GET_INPUT(label, remain) \
label: \
--szsrc; \
ch = *src++; \
decode = unbase64[ch]; \
if (decode < 0) { \
if (ascii_isspace(ch) && szsrc >= remain) \
goto label; \
state = 4 - remain; \
break; \
}
// if dest is null, we're just checking to see if it's legal input
// rather than producing output. (I suspect this could just be done
// with a regexp...). We duplicate the loop so this test can be
// outside it instead of in every iteration.
if (dest) {
// This loop consumes 4 input bytes and produces 3 output bytes
// per iteration. We can't know at the start that there is enough
// data left in the string for a full iteration, so the loop may
// break out in the middle; if so 'state' will be set to the
// number of input bytes read.
while (szsrc >= 4) {
// We'll start by optimistically assuming that the next four
// bytes of the string (src[0..3]) are four good data bytes
// (that is, no nulls, whitespace, padding chars, or illegal
// chars). We need to test src[0..2] for nulls individually
// before constructing temp to preserve the property that we
// never read past a null in the string (no matter how long
// szsrc claims the string is).
if (!src[0] || !src[1] || !src[2] ||
(temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) & 0x80000000) {
// Iff any of those four characters was bad (null, illegal,
// whitespace, padding), then temp's high bit will be set
// (because unbase64[] is -1 for all bad characters).
//
// We'll back up and resort to the slower decoder, which knows
// how to handle those cases.
GET_INPUT(first, 4);
temp = decode;
GET_INPUT(second, 3);
temp = (temp << 6) | decode;
GET_INPUT(third, 2);
temp = (temp << 6) | decode;
GET_INPUT(fourth, 1);
temp = (temp << 6) | decode;
} else {
// We really did have four good data bytes, so advance four
// characters in the string.
szsrc -= 4;
src += 4;
decode = -1;
ch = '\0';
}
// temp has 24 bits of input, so write that out as three bytes.
if (destidx+3 > szdest) return -1;
dest[destidx+2] = temp;
temp >>= 8;
dest[destidx+1] = temp;
temp >>= 8;
dest[destidx] = temp;
destidx += 3;
}
} else {
while (szsrc >= 4) {
if (!src[0] || !src[1] || !src[2] ||
(temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) & 0x80000000) {
GET_INPUT(first_no_dest, 4);
GET_INPUT(second_no_dest, 3);
GET_INPUT(third_no_dest, 2);
GET_INPUT(fourth_no_dest, 1);
} else {
szsrc -= 4;
src += 4;
decode = -1;
ch = '\0';
}
destidx += 3;
}
}
#undef GET_INPUT
// if the loop terminated because we read a bad character, return
// now.
if (decode < 0 && ch != '\0' &&
ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))
return -1;
if (ch == kPad64Equals || ch == kPad64Dot) {
// if we stopped by hitting an '=' or '.', un-read that character -- we'll
// look at it again when we count to check for the proper number of
// equals signs at the end.
++szsrc;
--src;
} else {
// This loop consumes 1 input byte per iteration. It's used to
// clean up the 0-3 input bytes remaining when the first, faster
// loop finishes. 'temp' contains the data from 'state' input
// characters read by the first loop.
while (szsrc > 0) {
--szsrc;
ch = *src++;
decode = unbase64[ch];
if (decode < 0) {
if (ascii_isspace(ch)) {
continue;
} else if (ch == '\0') {
break;
} else if (ch == kPad64Equals || ch == kPad64Dot) {
// back up one character; we'll read it again when we check
// for the correct number of pad characters at the end.
++szsrc;
--src;
break;
} else {
return -1;
}
}
// Each input character gives us six bits of output.
temp = (temp << 6) | decode;
++state;
if (state == 4) {
// If we've accumulated 24 bits of output, write that out as
// three bytes.
if (dest) {
if (destidx+3 > szdest) return -1;
dest[destidx+2] = temp;
temp >>= 8;
dest[destidx+1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 3;
state = 0;
temp = 0;
}
}
}
// Process the leftover data contained in 'temp' at the end of the input.
int expected_equals = 0;
switch (state) {
case 0:
// Nothing left over; output is a multiple of 3 bytes.
break;
case 1:
// Bad input; we have 6 bits left over.
return -1;
case 2:
// Produce one more output byte from the 12 input bits we have left.
if (dest) {
if (destidx+1 > szdest) return -1;
temp >>= 4;
dest[destidx] = temp;
}
++destidx;
expected_equals = 2;
break;
case 3:
// Produce two more output bytes from the 18 input bits we have left.
if (dest) {
if (destidx+2 > szdest) return -1;
temp >>= 2;
dest[destidx+1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 2;
expected_equals = 1;
break;
default:
// state should have no other values at this point.
GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
}
// The remainder of the string should be all whitespace, mixed with
// exactly 0 equals signs, or exactly 'expected_equals' equals
// signs. (Always accepting 0 equals signs is a google extension
// not covered in the RFC, as is accepting dot as the pad character.)
int equals = 0;
while (szsrc > 0 && *src) {
if (*src == kPad64Equals || *src == kPad64Dot)
++equals;
else if (!ascii_isspace(*src))
return -1;
--szsrc;
++src;
}
return (equals == 0 || equals == expected_equals) ? destidx : -1;
}