cpp-ch/local-engine/Storages/Serializations/ExcelBoolReader.cpp (152 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <Columns/ColumnsNumber.h> #include <IO/PeekableReadBuffer.h> #include <IO/ReadHelpers.h> #include "ExcelBoolReader.h" namespace DB { namespace ErrorCodes { extern const int CANNOT_PARSE_BOOL; extern const int ILLEGAL_COLUMN; } } namespace local_engine { using namespace DB; DB::ColumnUInt8 * checkAndGetDeserializeColumnType(IColumn & column) { auto * col = typeid_cast<DB::ColumnUInt8 *>(&column); if (!checkAndGetColumn<DB::ColumnUInt8>(&column)) throw Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "Bool type can only deserialize columns of type UInt8.{}", column.getName()); return col; } bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr) { if (checkCharCaseInsensitive('1', istr)) { column->insert(true); } else if (checkCharCaseInsensitive('0', istr)) { column->insert(false); } /// 'True' and 'T' else if (checkCharCaseInsensitive('t', istr)) { /// Check if it's just short form `T` or full form `True` if (checkCharCaseInsensitive('r', istr)) { if (!checkStringCaseInsensitive("ue", istr)) return false; } column->insert(true); } /// 'False' and 'F' else if (checkCharCaseInsensitive('f', istr)) { /// Check if it's just short form `F` or full form `False` if (checkCharCaseInsensitive('a', istr)) { if (!checkStringCaseInsensitive("lse", istr)) return false; } column->insert(false); } /// 'Yes' and 'Y' else if (checkCharCaseInsensitive('y', istr)) { /// Check if it's just short form `Y` or full form `Yes` if (checkCharCaseInsensitive('e', istr)) { if (!checkCharCaseInsensitive('s', istr)) return false; } column->insert(true); } /// 'No' and 'N' else if (checkCharCaseInsensitive('n', istr)) { /// Check if it's just short form `N` or full form `No` checkCharCaseInsensitive('o', istr); column->insert(false); } /// 'On' and 'Off' else if (checkCharCaseInsensitive('o', istr)) { if (checkCharCaseInsensitive('n', istr)) column->insert(true); else if (checkStringCaseInsensitive("ff", istr)) { column->insert(false); } else return false; } /// 'Enable' and 'Enabled' else if (checkStringCaseInsensitive("enable", istr)) { /// Check if it's 'enable' or 'enabled' checkCharCaseInsensitive('d', istr); column->insert(true); } /// 'Disable' and 'Disabled' else if (checkStringCaseInsensitive("disable", istr)) { /// Check if it's 'disable' or 'disabled' checkCharCaseInsensitive('d', istr); column->insert(false); } else { return false; } return true; } void deserializeImpl( IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function<bool(ReadBuffer &)> check_end_of_value) { DB::ColumnUInt8 * col = checkAndGetDeserializeColumnType(column); DB::PeekableReadBuffer buf(istr); buf.setCheckpoint(); if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf)) { col->insert(true); return; } buf.rollbackToCheckpoint(); if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf)) { col->insert(false); buf.dropCheckpoint(); if (buf.hasUnreadData()) throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " "bool_true_representation or bool_false_representation contains some delimiters of input format"); return; } buf.rollbackToCheckpoint(); if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf)) { buf.dropCheckpoint(); if (buf.hasUnreadData()) throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " "bool_true_representation or bool_false_representation contains some delimiters of input format"); return; } buf.makeContinuousMemoryFromCheckpointToPos(); buf.rollbackToCheckpoint(); throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, "Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and " "bool_false_representation or one of " "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0", String(buf.position(), std::min(10lu, buf.available())), settings.bool_true_representation, settings.bool_false_representation); } void deserializeExcelBoolTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) { if (istr.eof()) throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF."); deserializeImpl( column, istr, settings, [&](DB::ReadBuffer & buf) { /// skip all chars before quote/delimiter exclude line delimiter while (!buf.eof() && *buf.position() == ' ') ++buf.position(); return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; }); } }