in absl/strings/escaping.cc [434:665]
bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
size_t szdest, const signed char* unbase64,
size_t* len) {
static const char kPad64Equals = '=';
static const char kPad64Dot = '.';
size_t destidx = 0;
int decode = 0;
int state = 0;
unsigned int ch = 0;
unsigned int temp = 0;
// If "char" is signed by default, using *src as an array index results in
// accessing negative array elements. Treat the input as a pointer to
// unsigned char to avoid this.
const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
// The GET_INPUT macro gets the next input character, skipping
// over any whitespace, and stopping when we reach the end of the
// string or when we read any non-data character. The arguments are
// an arbitrary identifier (used as a label for goto) and the number
// of data bytes that must remain in the input to avoid aborting the
// loop.
#define GET_INPUT(label, remain) \
label: \
--szsrc; \
ch = *src++; \
decode = unbase64[ch]; \
if (decode < 0) { \
if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
state = 4 - remain; \
break; \
}
// if dest is null, we're just checking to see if it's legal input
// rather than producing output. (I suspect this could just be done
// with a regexp...). We duplicate the loop so this test can be
// outside it instead of in every iteration.
if (dest) {
// This loop consumes 4 input bytes and produces 3 output bytes
// per iteration. We can't know at the start that there is enough
// data left in the string for a full iteration, so the loop may
// break out in the middle; if so 'state' will be set to the
// number of input bytes read.
while (szsrc >= 4) {
// We'll start by optimistically assuming that the next four
// bytes of the string (src[0..3]) are four good data bytes
// (that is, no nulls, whitespace, padding chars, or illegal
// chars). We need to test src[0..2] for nulls individually
// before constructing temp to preserve the property that we
// never read past a null in the string (no matter how long
// szsrc claims the string is).
if (!src[0] || !src[1] || !src[2] ||
((temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) &
0x80000000)) {
// Iff any of those four characters was bad (null, illegal,
// whitespace, padding), then temp's high bit will be set
// (because unbase64[] is -1 for all bad characters).
//
// We'll back up and resort to the slower decoder, which knows
// how to handle those cases.
GET_INPUT(first, 4);
temp = decode;
GET_INPUT(second, 3);
temp = (temp << 6) | decode;
GET_INPUT(third, 2);
temp = (temp << 6) | decode;
GET_INPUT(fourth, 1);
temp = (temp << 6) | decode;
} else {
// We really did have four good data bytes, so advance four
// characters in the string.
szsrc -= 4;
src += 4;
}
// temp has 24 bits of input, so write that out as three bytes.
if (destidx + 3 > szdest) return false;
dest[destidx + 2] = temp;
temp >>= 8;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
destidx += 3;
}
} else {
while (szsrc >= 4) {
if (!src[0] || !src[1] || !src[2] ||
((temp = ((unsigned(unbase64[src[0]]) << 18) |
(unsigned(unbase64[src[1]]) << 12) |
(unsigned(unbase64[src[2]]) << 6) |
(unsigned(unbase64[src[3]])))) &
0x80000000)) {
GET_INPUT(first_no_dest, 4);
GET_INPUT(second_no_dest, 3);
GET_INPUT(third_no_dest, 2);
GET_INPUT(fourth_no_dest, 1);
} else {
szsrc -= 4;
src += 4;
}
destidx += 3;
}
}
#undef GET_INPUT
// if the loop terminated because we read a bad character, return
// now.
if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
!absl::ascii_isspace(ch))
return false;
if (ch == kPad64Equals || ch == kPad64Dot) {
// if we stopped by hitting an '=' or '.', un-read that character -- we'll
// look at it again when we count to check for the proper number of
// equals signs at the end.
++szsrc;
--src;
} else {
// This loop consumes 1 input byte per iteration. It's used to
// clean up the 0-3 input bytes remaining when the first, faster
// loop finishes. 'temp' contains the data from 'state' input
// characters read by the first loop.
while (szsrc > 0) {
--szsrc;
ch = *src++;
decode = unbase64[ch];
if (decode < 0) {
if (absl::ascii_isspace(ch)) {
continue;
} else if (ch == kPad64Equals || ch == kPad64Dot) {
// back up one character; we'll read it again when we check
// for the correct number of pad characters at the end.
++szsrc;
--src;
break;
} else {
return false;
}
}
// Each input character gives us six bits of output.
temp = (temp << 6) | decode;
++state;
if (state == 4) {
// If we've accumulated 24 bits of output, write that out as
// three bytes.
if (dest) {
if (destidx + 3 > szdest) return false;
dest[destidx + 2] = temp;
temp >>= 8;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 3;
state = 0;
temp = 0;
}
}
}
// Process the leftover data contained in 'temp' at the end of the input.
int expected_equals = 0;
switch (state) {
case 0:
// Nothing left over; output is a multiple of 3 bytes.
break;
case 1:
// Bad input; we have 6 bits left over.
return false;
case 2:
// Produce one more output byte from the 12 input bits we have left.
if (dest) {
if (destidx + 1 > szdest) return false;
temp >>= 4;
dest[destidx] = temp;
}
++destidx;
expected_equals = 2;
break;
case 3:
// Produce two more output bytes from the 18 input bits we have left.
if (dest) {
if (destidx + 2 > szdest) return false;
temp >>= 2;
dest[destidx + 1] = temp;
temp >>= 8;
dest[destidx] = temp;
}
destidx += 2;
expected_equals = 1;
break;
default:
// state should have no other values at this point.
ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
state);
}
// The remainder of the string should be all whitespace, mixed with
// exactly 0 equals signs, or exactly 'expected_equals' equals
// signs. (Always accepting 0 equals signs is an Abseil extension
// not covered in the RFC, as is accepting dot as the pad character.)
int equals = 0;
while (szsrc > 0) {
if (*src == kPad64Equals || *src == kPad64Dot)
++equals;
else if (!absl::ascii_isspace(*src))
return false;
--szsrc;
++src;
}
const bool ok = (equals == 0 || equals == expected_equals);
if (ok) *len = destidx;
return ok;
}