cpp-ch/local-engine/Common/Base85Codec.cpp (101 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "Base85Codec.h" #include <IO/ReadBufferFromString.h> #include <IO/ReadHelpers.h> #include <IO/WriteHelpers.h> namespace local_engine { String uuidToByteBuffer(const DB::UUID & uuid) { const UInt128 under_type = uuid.toUnderType(); long low = under_type.items[0]; long high = under_type.items[1]; String result(16, '\0'); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const Int64 low_swap = __builtin_bswap64(low); const Int64 high_swap = __builtin_bswap64(high); memcpy(result.data(), &low_swap, 8); memcpy(result.data() + 8, &high_swap, 8); #else memcpy(result.data(), &high, 8); memcpy(result.data() + 8, &low, 8); #endif return result; } DB::UUID uuidFromByteBuffer(const String & buffer) { chassert(buffer.size() >= 16); DB::ReadBufferFromString buf(buffer); Int64 lowBits; Int64 highBits; DB::readBinaryBigEndian(lowBits, buf); DB::readBinaryBigEndian(highBits, buf); DB::UUID uuid; uuid.toUnderType().items[0] = lowBits; uuid.toUnderType().items[1] = highBits; return uuid; } String Base85Codec::encodeUUID(const DB::UUID & uuid) { const String blocks = uuidToByteBuffer(uuid); return encodeBlocks(blocks); } DB::UUID Base85Codec::decodeUUID(const String & encoded) { const String blocks = decodeBlocks(encoded); return uuidFromByteBuffer(blocks); } String Base85Codec::encodeBlocks(const String & blocks) { chassert(blocks.size() % 4 == 0); auto numBlocks = blocks.size() / 4; // Every 4 byte block gets encoded into 5 bytes/chars const auto outputLength = numBlocks * 5; String output(outputLength, '\0'); size_t outputIndex = 0; DB::ReadBufferFromString rb(blocks); while (!rb.eof()) { Int32 readInt; DB::readBinaryBigEndian(readInt, rb); Int64 sum = readInt & 0x00000000ffffffffL; output[outputIndex] = ENCODE_MAP[static_cast<Int32>(sum / BASE_4TH_POWER)]; sum %= BASE_4TH_POWER; output[outputIndex + 1] = ENCODE_MAP[static_cast<Int32>(sum / BASE_3RD_POWER)]; sum %= BASE_3RD_POWER; output[outputIndex + 2] = ENCODE_MAP[static_cast<Int32>(sum / BASE_2ND_POWER)]; sum %= BASE_2ND_POWER; output[outputIndex + 3] = ENCODE_MAP[static_cast<Int32>(sum / BASE)]; output[outputIndex + 4] = ENCODE_MAP[static_cast<Int32>(sum % BASE)]; outputIndex += 5; } return output; } String Base85Codec::decodeBlocks(const String & encoded) { chassert(encoded.size() % 5 == 0); String result(encoded.size() / 5 * 4, '\0'); // A mechanism to detect invalid characters in the input while decoding, that only has a // single conditional at the very end, instead of branching for every character. Int32 canary = 0; auto decodeInputChar = [&encoded, &canary](Int32 i) -> Int64 { const auto c = encoded[i]; canary |= c; // non-ascii char has bits outside of ASCII_BITMASK const auto b = DECODE_MAP[c & ASCII_BITMASK]; canary |= b; // invalid char maps to -1, which has bits outside ASCII_BITMASK return static_cast<Int64>(b); }; Int32 inputIndex = 0; DB::WriteBufferFromString buf(result); while (buf.hasPendingData()) { Int64 sum = 0L; sum += decodeInputChar(inputIndex) * BASE_4TH_POWER; sum += decodeInputChar(inputIndex + 1) * BASE_3RD_POWER; sum += decodeInputChar(inputIndex + 2) * BASE_2ND_POWER; sum += decodeInputChar(inputIndex + 3) * BASE; sum += decodeInputChar(inputIndex + 4); DB::writeBinaryBigEndian(static_cast<Int32>(sum), buf); inputIndex += 5; } buf.finalize(); return result; } }