sql_utils/public/strings.cc (53 lines of code) (raw):
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sql_utils/public/strings.h"
#include <ctype.h>
#include <iterator>
#include <string>
#include <vector>
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
namespace bigquery_ml_utils {
// Digit conversion.
static char hex_char[] = "0123456789abcdef";
// ----------------------------------------------------------------------
// CEscape()
// CHexEscape()
// Utf8SafeCEscape()
// Utf8SafeCHexEscape()
// Escapes 'src' using C-style escape sequences. This is useful for
// preparing query flags. The 'Hex' version uses hexadecimal rather than
// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes.
//
// Escaped chars: \n, \r, \t, ", ', \, and !ascii_isprint().
//
// COPIED FROM strings/escaping.cc, with unnecessary modes removed, and with
// the escape_quote_char feature added.
//
// If escape_quote_char is non-zero, only escape the quote character
// (from '"`) that matches escape_quote_char.
// This allows writing "ab'cd" or 'ab"cd' or `ab"cd` without extra escaping.
// ----------------------------------------------------------------------
static std::string CEscapeInternal(absl::string_view src, bool utf8_safe,
char escape_quote_char) {
std::string dest;
bool last_hex_escape = false; // true if last output char was \xNN.
for (const char* p = src.begin(); p < src.end(); ++p) {
unsigned char c = *p;
bool is_hex_escape = false;
switch (c) {
case '\n': dest.append("\\" "n"); break;
case '\r': dest.append("\\" "r"); break;
case '\t': dest.append("\\" "t"); break;
case '\\': dest.append("\\" "\\"); break;
case '\'':
case '\"':
case '`':
// Escape only quote chars that match escape_quote_char.
if (escape_quote_char == 0 || c == escape_quote_char) {
dest.push_back('\\');
}
dest.push_back(c);
break;
default:
// Note that if we emit \xNN and the src character after that is a hex
// digit then that digit must be escaped too to prevent it being
// interpreted as part of the character code by C.
if ((!utf8_safe || c < 0x80) &&
(!absl::ascii_isprint(c) ||
(last_hex_escape && absl::ascii_isxdigit(c)))) {
dest.append("\\" "x");
dest.push_back(hex_char[c / 16]);
dest.push_back(hex_char[c % 16]);
is_hex_escape = true;
} else {
dest.push_back(c);
break;
}
}
last_hex_escape = is_hex_escape;
}
return dest;
}
std::string ToStringLiteral(absl::string_view str) {
absl::string_view quote =
(str.find('"') != str.npos && str.find('\'') == str.npos) ? "'" : "\"";
return absl::StrCat(
quote, CEscapeInternal(str, true /* utf8_safe */, quote[0]), quote);
}
} // namespace bigquery_ml_utils