mysqlshdk/libs/utils/utils_string.h (394 lines of code) (raw):
/*
* Copyright (c) 2017, 2024, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2.0,
* as published by the Free Software Foundation.
*
* This program is designed to work with certain software (including
* but not limited to OpenSSL) that is licensed under separate terms,
* as designated in a particular file or component or in included license
* documentation. The authors of MySQL hereby grant you an additional
* permission to link the program and your derivative works with the
* separately licensed software that they have either included with
* the program or referenced in the documentation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License, version 2.0, for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MYSQLSHDK_LIBS_UTILS_UTILS_STRING_H_
#define MYSQLSHDK_LIBS_UTILS_UTILS_STRING_H_
#include <algorithm>
#include <cctype>
#include <cstring>
#include <cwchar>
#include <functional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "scripting/common.h"
namespace shcore {
void clear_buffer(char *buffer, size_t size);
void clear_buffer(std::string &buffer);
namespace internal {
template <typename Char, typename F>
inline std::basic_string<Char> transform(std::basic_string_view<Char> s,
F fun) {
std::basic_string<Char> r(s);
std::transform(r.begin(), r.end(), r.begin(), fun);
return r;
}
} // namespace internal
/** Convert a copy of an ASCII string to uppercase and return */
inline std::string str_upper(std::string_view s) {
return internal::transform(s, ::toupper);
}
inline std::wstring str_upper(std::wstring_view s) {
return internal::transform(s, ::towupper);
}
/** Convert a copy of an ASCII string to lowercase and return */
inline std::string str_lower(std::string_view s) {
return internal::transform(s, ::tolower);
}
inline std::wstring str_lower(std::wstring_view s) {
return internal::transform(s, ::towlower);
}
/** Compares 2 strings case insensitive (for ascii) */
inline int str_casecmp(const char *a, const char *b) {
#ifdef _WIN32
return ::_stricmp(a, b);
#else
return ::strcasecmp(a, b);
#endif
}
inline int str_casecmp(const wchar_t *a, const wchar_t *b) {
#ifdef _WIN32
return ::_wcsicmp(a, b);
#else
return ::wcscasecmp(a, b);
#endif
}
inline int str_casecmp(const char *a, const char *b, size_t n) {
#ifdef _WIN32
return ::_strnicmp(a, b, n);
#else
return ::strncasecmp(a, b, n);
#endif
}
inline int str_casecmp(const wchar_t *a, const wchar_t *b, size_t n) {
#ifdef _WIN32
return ::_wcsnicmp(a, b, n);
#else
return ::wcsncasecmp(a, b, n);
#endif
}
struct Case_sensitive_comparator {
bool operator()(const std::string &a, const std::string &b) const {
return a.compare(b) < 0;
}
bool operator()(const std::wstring &a, const std::wstring &b) const {
return a.compare(b) < 0;
}
};
struct Case_insensitive_comparator {
bool operator()(const std::string &a, const std::string &b) const {
return str_casecmp(a.c_str(), b.c_str()) < 0;
}
bool operator()(const std::wstring &a, const std::wstring &b) const {
return str_casecmp(a.c_str(), b.c_str()) < 0;
}
};
struct Case_comparator {
bool case_sensitive{true};
Case_comparator(bool p_case_sensitive) : case_sensitive{p_case_sensitive} {}
bool operator()(const std::string &a, const std::string &b) const {
return case_sensitive ? Case_sensitive_comparator()(a, b)
: Case_insensitive_comparator()(a, b);
}
bool operator()(const std::wstring &a, const std::wstring &b) const {
return case_sensitive ? Case_sensitive_comparator()(a, b)
: Case_insensitive_comparator()(a, b);
}
};
struct Lexicographical_comparator {
bool operator()(const std::string &a, const std::string &b) const {
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}
bool operator()(const std::wstring &a, const std::wstring &b) const {
return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}
};
namespace internal {
template <typename Char>
inline bool str_caseeq_pair(std::basic_string_view<Char> a,
std::basic_string_view<Char> b) {
if (a.length() != b.length()) return false;
return str_casecmp(a.data(), b.data(), a.length()) == 0;
}
template <typename Char, typename... T>
inline bool str_caseeq(std::basic_string_view<Char> a, T &&...tokens) {
return (str_caseeq_pair<Char>(a, std::forward<T>(tokens)) || ...);
}
} // namespace internal
template <typename... T>
inline bool str_caseeq(std::string_view a, std::string_view token,
T &&...tokens) {
return internal::str_caseeq(a, token, std::forward<T>(tokens)...);
}
template <typename... T>
inline bool str_caseeq(std::wstring_view a, std::wstring_view token,
T &&...tokens) {
return internal::str_caseeq(a, token, std::forward<T>(tokens)...);
}
/** Checks whether a string has another as a prefix */
namespace internal {
template <typename Char>
inline bool str_beginswith_pair(std::basic_string_view<Char> s,
std::basic_string_view<Char> prefix) {
return s.compare(0, prefix.length(), prefix) == 0;
}
template <typename Char, typename... T>
inline bool str_beginswith(std::basic_string_view<Char> s, T &&...prefixes) {
return (str_beginswith_pair<Char>(s, std::forward<T>(prefixes)) || ...);
}
} // namespace internal
template <typename... T>
inline bool str_beginswith(std::string_view s, std::string_view prefix,
T &&...prefixes) {
return internal::str_beginswith(s, prefix, std::forward<T>(prefixes)...);
}
template <typename... T>
inline bool str_beginswith(std::wstring_view s, std::wstring_view prefix,
T &&...prefixes) {
return internal::str_beginswith(s, prefix, std::forward<T>(prefixes)...);
}
namespace internal {
template <typename Char>
inline bool str_ibeginswith_pair(std::basic_string_view<Char> s,
std::basic_string_view<Char> prefix) {
if (s.length() < prefix.length()) return false;
return str_casecmp(s.data(), prefix.data(), prefix.length()) == 0;
}
template <typename Char, typename... T>
inline bool str_ibeginswith(std::basic_string_view<Char> s, T &&...prefixes) {
return (str_ibeginswith_pair<Char>(s, std::forward<T>(prefixes)) || ...);
}
} // namespace internal
template <typename... T>
inline bool str_ibeginswith(std::string_view s, std::string_view prefix,
T &&...prefixes) {
return internal::str_ibeginswith(s, prefix, std::forward<T>(prefixes)...);
}
template <typename... T>
inline bool str_ibeginswith(std::wstring_view s, std::wstring_view prefix,
T &&...prefixes) {
return internal::str_ibeginswith(s, prefix, std::forward<T>(prefixes)...);
}
/** Checks whether a string has another as a suffix */
namespace internal {
template <typename Char>
inline bool str_endswith_pair(std::basic_string_view<Char> s,
std::basic_string_view<Char> suffix) {
if (suffix.length() > s.length()) return false;
return s.compare(s.length() - suffix.length(), suffix.length(), suffix) == 0;
}
template <typename Char, typename... T>
inline bool str_endswith(std::basic_string_view<Char> s, T &&...suffixes) {
return (str_endswith_pair<Char>(s, std::forward<T>(suffixes)) || ...);
}
} // namespace internal
template <typename... T>
inline bool str_endswith(std::string_view s, std::string_view suffix,
T &&...suffixes) {
return internal::str_endswith(s, suffix, std::forward<T>(suffixes)...);
}
template <typename... T>
inline bool str_endswith(std::wstring_view s, std::wstring_view suffix,
T &&...suffixes) {
return internal::str_endswith(s, suffix, std::forward<T>(suffixes)...);
}
namespace internal {
template <typename Char>
inline bool str_iendswith_pair(std::basic_string_view<Char> s,
std::basic_string_view<Char> suffix) {
if (suffix.length() > s.length()) return false;
return str_casecmp(s.data() + s.length() - suffix.length(), suffix.data(),
suffix.length()) == 0;
}
template <typename Char, typename... T>
inline bool str_iendswith(std::basic_string_view<Char> s, T &&...suffixes) {
return (str_iendswith_pair<Char>(s, std::forward<T>(suffixes)) || ...);
}
} // namespace internal
template <typename... T>
inline bool str_iendswith(std::string_view s, std::string_view suffix,
T &&...suffixes) {
return internal::str_iendswith(s, suffix, std::forward<T>(suffixes)...);
}
template <typename... T>
inline bool str_iendswith(std::wstring_view s, std::wstring_view suffix,
T &&...suffixes) {
return internal::str_iendswith(s, suffix, std::forward<T>(suffixes)...);
}
const char *str_casestr(const char *haystack, const char *needle);
/** Return position of the first difference in the strings or npos if they're
* the same */
inline size_t str_span(const std::string &s1, const std::string &s2) {
size_t p = 0;
while (p < s1.size() && p < s2.size()) {
if (s1[p] != s2[p]) return p;
++p;
}
if (p == s1.size() && p == s2.size()) return std::string::npos;
return p;
}
/** Partition a string in 2 at a separator, if present */
inline std::pair<std::string, std::string> str_partition(
const std::string &s, const std::string &sep, bool *found_sep = nullptr) {
auto p = s.find(sep);
if (found_sep) {
*found_sep = p != std::string::npos;
}
if (p == std::string::npos)
return std::make_pair(s, "");
else
return std::make_pair(s.substr(0, p), s.substr(p + sep.length()));
}
/** Partition a string in 2 after separator, in place, if present */
inline std::pair<std::string, std::string> str_partition_after(
const std::string &s, const std::string &sep) {
auto p = s.find(sep);
if (p == std::string::npos) {
return std::make_pair(s, "");
} else {
p += sep.length();
return std::make_pair(s.substr(0, p), s.substr(p));
}
}
/** Partition a string in 2 after separator, in place, if present */
inline std::string str_partition_after_inpl(std::string *s,
const std::string &sep) {
auto p = s->find(sep);
if (p == std::string::npos) {
std::string tmp = *s;
s->clear();
return tmp;
} else {
std::string tmp = s->substr(0, p + sep.length());
s->erase(0, p + sep.length());
return tmp;
}
}
/**
* Splits string based on each of the individual characters of the separator
* string
*
* @param input The string to be split
* @param separator_chars String containing characters wherein the input string
* is split on any of the characters
* @param maxsplit max number of times to split or -1 for no limit
* @param compress Boolean value which when true ensures consecutive separators
* do not generate new elements in the split, but they're still counted towards
* maxsplit.
*
* @returns vector of splitted strings
*/
inline std::vector<std::string> str_split(
const std::string &input, const std::string &separator_chars = " \r\n\t",
int maxsplit = -1, bool compress = false) {
std::vector<std::string> ret_val;
size_t index = 0, new_find = 0;
const size_t end = input.size();
while (new_find < end) {
if (maxsplit--)
new_find = input.find_first_of(separator_chars, index);
else
new_find = std::string::npos;
if (new_find != std::string::npos) {
// When compress is enabled, consecutive separators
// do not generate new elements
if (new_find > index || !compress || new_find == 0)
ret_val.push_back(input.substr(index, new_find - index));
index = new_find + 1;
} else {
ret_val.push_back(input.substr(index));
}
}
return ret_val;
}
/**
* Split the given input string and call the functor for each token.
*
* @param input the string to be split
* @param f a functor called once for each split string. If its return value
* is false, the splitting is interrupted.
* @param separator_chars String containing characters wherein the input string
* is split on any of the characters
* @param maxsplit max number of times to split or -1 for no limit
* @param compress Boolean value which when true ensures consecutive separators
* do not generate new elements in the split, but they're still counted
* towards maxsplit.
*
* @returns false if f returns false, true otherwise
*/
inline bool str_itersplit(const std::string &input,
const std::function<bool(const std::string &s)> &f,
const std::string &separator_chars = " \r\n\t",
int maxsplit = -1, bool compress = false) {
size_t index = 0, new_find = 0;
const size_t end = input.size();
while (new_find < end) {
if (maxsplit--)
new_find = input.find_first_of(separator_chars, index);
else
new_find = std::string::npos;
if (new_find != std::string::npos) {
// When compress is enabled, consecutive separators
// do not generate new elements
if (new_find > index || !compress || new_find == 0)
if (!f(input.substr(index, new_find - index))) return false;
index = new_find + 1;
} else {
if (!f(input.substr(index))) return false;
}
}
return true;
}
/** Strip a string out of blank chars */
std::string_view SHCORE_PUBLIC
str_strip_view(std::string_view s, std::string_view chars = " \r\n\t");
std::string_view SHCORE_PUBLIC
str_lstrip_view(std::string_view s, std::string_view chars = " \r\n\t");
std::string_view SHCORE_PUBLIC
str_rstrip_view(std::string_view s, std::string_view chars = " \r\n\t");
std::string SHCORE_PUBLIC str_strip(std::string_view s,
std::string_view chars = " \r\n\t");
std::string SHCORE_PUBLIC str_lstrip(std::string_view s,
std::string_view chars = " \r\n\t");
std::string SHCORE_PUBLIC str_rstrip(std::string_view s,
std::string_view chars = " \r\n\t");
inline std::string str_ljust(const std::string &s, size_t width,
char pad = ' ') {
if (s.size() < width) return s + std::string(width - s.size(), pad);
return s;
}
inline std::string str_rjust(const std::string &s, size_t width,
char pad = ' ') {
if (s.size() < width) return std::string(width - s.size(), pad).append(s);
return s;
}
/** Return a formatted a string
Throws invalid_argument on encoding error
*/
#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
std::string SHCORE_PUBLIC str_format(const char *formats, ...)
__attribute__((__format__(__printf__, 1, 2)));
#elif _MSC_VER
std::string SHCORE_PUBLIC
str_format(_In_z_ _Printf_format_string_ const char *format, ...);
#else
std::string SHCORE_PUBLIC str_format(const char *formats, ...);
#endif
template <typename Iter>
inline std::string str_join(Iter begin, Iter end, std::string_view sep) {
if (begin == end) return {};
std::string s;
s.append(*begin);
while (++begin != end) {
s.append(sep);
s.append(*begin);
}
return s;
}
template <typename Iter, typename CTransform>
inline std::string str_join(Iter begin, Iter end, std::string_view sep,
CTransform &&f) {
if (begin == end) return {};
std::string s;
s.append(f(*begin));
while (++begin != end) {
s.append(sep);
s.append(f(*begin));
}
return s;
}
template <typename C>
inline std::string str_join(const C &container, std::string_view sep) {
return str_join(container.begin(), container.end(), sep);
}
template <typename C, typename CTransform>
inline std::string str_join(const C &container, std::string_view sep,
CTransform &&f) {
return str_join(container.begin(), container.end(), sep,
std::forward<CTransform>(f));
}
std::string SHCORE_PUBLIC str_replace(std::string_view s, std::string_view from,
std::string_view to);
std::string SHCORE_PUBLIC bits_to_string(uint64_t bits, int nbits);
std::pair<uint64_t, int> SHCORE_PUBLIC string_to_bits(std::string_view s);
std::string SHCORE_PUBLIC bits_to_string_hex(uint64_t bits, int nbits);
size_t SHCORE_PUBLIC bits_to_string_hex_size(int nbits);
std::string SHCORE_PUBLIC string_to_hex(std::string_view s);
/**
* Escape `quote` and `\` chars.
*
* @param s String to escape.
* @param quote `'` `"`
* @return Quote escaped string.
*/
std::string quote_string(const std::string &s, char quote);
/**
* Inverse of quote_string().
*
* If the first and the last characters in the given strings match `quote`
* they are removed as well.
*
* @param s String to be processed.
* @param quote The quote character (`'`, `"`)
*
* @return Unquoted string.
*/
std::string unquote_string(std::string_view s, char quote);
// Macro to turn a symbol into a string
#define STRINGIFY(s) STRINGIFY_(s)
#define STRINGIFY_(s) #s
/** Breaks string into lines of specified width without breaking words.
*
* @param line long string to break.
* @param line_width maximum line width
* @return vector with split lines.
*/
std::vector<std::string> str_break_into_lines(const std::string &line,
std::size_t line_width);
/**
* Auxiliary function to get the quotes span (i.e., start and end positions)
* for the given string.
*
* If not quote is found then std::string::npos is returned for both elements
* in the pair. If only one quote is found (no ending quote) then
* std::string::npos is returned for the second position of the pair.
*
* @param quote_char character with the quote to look for.
* @param str target string to get the start and end quote position.
* @return return a pair with the position of the starting quote and ending
* quote.
*/
std::pair<std::string::size_type, std::string::size_type> get_quote_span(
const char quote_char, const std::string &str);
/**
* Convert UTF-8 string to UTF-16/UTF-32 (platform dependent) string.
*
* @param utf8 UTF-8 encoded string.
* @return std::wstring UTF-16/UTF-32 (platform dependent) string.
*/
std::wstring utf8_to_wide(const std::string &utf8);
/**
* Convert UTF-8 string to UTF-16/UTF-32 (platform dependent) string.
*
* @param utf8 Pointer to UTF-8 encoded string.
* @param utf8_length Length of UTF-8 string in bytes.
* @return std::wstring UTF-16/UTF-32 (platform dependent) string.
*/
std::wstring utf8_to_wide(const char *utf8, const size_t utf8_length);
/**
* Convert UTF-8 string to UTF-16/UTF-32 (platform dependent) string.
*
* @param utf8 Pointer to UTF-8 encoded string.
* @return std::wstring UTF-16/UTF-32 (platform dependent) string.
*/
std::wstring utf8_to_wide(const char *utf8);
/**
* Convert UTF-16/UTF-32 (platform dependent) string to UTF-8 string.
*
* @param wide UTF-16/UTF-32 (platform dependent) encoded string.
* @return std::string UTF-8 encoded string.
*/
std::string wide_to_utf8(const std::wstring &wide);
/**
* Convert UTF-16/UTF-32 (platform dependent) string to UTF-8 string.
*
* @param wide Pointer to UTF-16/UTF-32 (platform dependent) encoded string.
* @param wide_length Length of UTF-16/UTF-32 string in bytes.
* @return std::string UTF-8 encoded string.
*/
std::string wide_to_utf8(const wchar_t *wide, const size_t wide_length);
/**
* Convert UTF-16/UTF-32 (platform dependent) string to UTF-8 string.
*
* @param wide Pointer to UTF-16/UTF-32 (platform dependent) encoded string.
* @return std::string UTF-8 encoded string.
*/
std::string wide_to_utf8(const wchar_t *wide);
/**
* Truncates the given string to max_length code points.
*
* @param str UTF-8 string to be truncated.
* @param max_length Maximum number of code points.
*
* @return Input string truncated to max_length code points.
*/
std::string truncate(const std::string &str, const size_t max_length);
/**
* Truncates the given string to max_length code points.
*
* @param str UTF-8 string to be truncated.
* @param length Length of string in bytes.
* @param max_length Maximum number of code points.
*
* @return Input string truncated to max_length code points.
*/
std::string truncate(const char *str, const size_t length,
const size_t max_length);
/**
* Truncates the given string to max_length code points.
*
* @param str UTF-16/UTF-32 string to be truncated.
* @param max_length Maximum number of code points.
*
* @return Input string truncated to max_length code points.
*/
std::wstring truncate(const std::wstring &str, const size_t max_length);
/**
* Truncates the given string to max_length code points.
*
* @param str UTF-16/UTF-32 string to be truncated.
* @param length Length of string in bytes.
* @param max_length Maximum number of code points.
*
* @return Input string truncated to max_length code points.
*/
std::wstring truncate(const wchar_t *str, const size_t length,
const size_t max_length);
/**
* Checks if the given string contains only valid UTF-8 code points.
*
* @param s String to be checked.
*
* @returns true if the given string is a valid UTF-8 string
*/
bool is_valid_utf8(std::string_view s);
/**
* Generates a percent encoded string based on RFC-3986, only unreserved
* characters are not encoded.
*/
std::string pctencode(std::string_view s);
/**
* Decodes a string that is percent encoded based on RFC-3986.
*/
std::string pctdecode(std::string_view s);
/**
* Returns a string of the given size created with random characters from the
* provided source.
*/
std::string get_random_string(size_t size, const char *source);
} // namespace shcore
#endif // MYSQLSHDK_LIBS_UTILS_UTILS_STRING_H_