core/common/StringTools.h (268 lines of code) (raw):
/*
* Copyright 2022 iLogtail Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#include "boost/lexical_cast.hpp"
#pragma GCC diagnostic pop
#include <charconv>
#include <string>
#include <vector>
#include "boost/regex.hpp"
#include "common/StringView.h"
namespace logtail {
inline bool StartWith(const std::string& input, StringView pattern) {
return input.find(pattern.data(), 0, pattern.size()) == 0;
}
inline bool EndWith(const std::string& input, const std::string& pattern) {
auto inputLen = input.length();
auto patternLen = pattern.length();
if (patternLen > inputLen) {
return false;
}
auto pos = input.rfind(pattern);
return pos != std::string::npos && (pos == inputLen - patternLen);
}
std::string ToLowerCaseString(const std::string& orig);
std::string ToUpperCaseString(const std::string& orig);
int StringCaseInsensitiveCmp(const std::string& s1, const std::string& s2);
int CStringNCaseInsensitiveCmp(const char* s1, const char* s2, size_t n);
inline std::string LeftTrimString(const std::string& str, const char trimChar = ' ') {
auto s = str;
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [trimChar](int ch) { return trimChar != ch; }));
return s;
}
inline std::string RightTrimString(const std::string& str, const char trimChar = ' ') {
auto s = str;
s.erase(std::find_if(s.rbegin(), s.rend(), [trimChar](int ch) { return trimChar != ch; }).base(), s.end());
return s;
}
inline std::string TrimString(const std::string& str, const char leftTrimChar = ' ', const char rightTrimChar = ' ') {
return RightTrimString(LeftTrimString(str, leftTrimChar), rightTrimChar);
}
template <typename T>
inline std::string ToString(const T& value) {
return std::to_string(value);
}
inline std::string ToString(const std::string& str) {
return str;
}
inline std::string ToString(const char* str) {
if (str == nullptr) {
return "";
}
return std::string(str);
}
inline std::string ToString(char* str) {
return ToString(const_cast<const char*>(str));
}
inline std::string ToString(bool value) {
return value ? "true" : "false";
}
std::string ToString(const std::vector<std::string>& vec);
template <typename T>
std::string ToHexString(const T& value) {
uint32_t size = sizeof(T) * 8;
T valueCopy = value;
std::string str;
do {
uint8_t n = valueCopy & 0x0f;
char c = static_cast<char>(n < 10 ? ('0' + n) : ('A' + n - 10));
str.insert(str.begin(), c);
} while ((valueCopy >>= 4) && (size -= 4));
return str;
}
template <>
std::string ToHexString(const std::string& value);
// Split string by delimiter.
std::vector<std::string> SplitString(const std::string& str, const std::string& delim = " ");
// This method's behaviors is not like SplitString(string, string),
// The difference is below method use the whole delim as a separator,
// and will scan the target str from begin to end and we drop "".
// @Return: vector of substring split by delim, without ""
std::vector<std::string> StringSpliter(const std::string& str, const std::string& delim);
// Replaces all @src in @raw to @dst.
void ReplaceString(std::string& raw, const std::string& src, const std::string& dst);
// Boost regex utility.
bool BoostRegexSearch(const char* buffer,
const boost::regex& reg,
std::string& exception,
boost::match_results<const char*>& what,
boost::match_flag_type flags = boost::match_default);
bool BoostRegexMatch(const char* buffer,
size_t length,
const boost::regex& reg,
std::string& exception,
boost::match_results<const char*>& what,
boost::match_flag_type flags = boost::match_default);
bool BoostRegexMatch(const char* buffer, size_t size, const boost::regex& reg, std::string& exception);
bool BoostRegexMatch(const char* buffer, const boost::regex& reg, std::string& exception);
bool BoostRegexSearch(const char* buffer, size_t size, const boost::regex& reg, std::string& exception);
bool BoostRegexSearch(const char* buffer, const boost::regex& reg, std::string& exception);
// GetLittelEndianValue32 converts @buffer in little endian to uint32_t.
uint32_t GetLittelEndianValue32(const uint8_t* buffer);
bool ExtractTopics(const std::string& val,
const std::string& topicFormat,
std::vector<std::string>& keys,
std::vector<std::string>& values);
bool NormalizeTopicRegFormat(std::string& regStr);
void RemoveFilePathTrailingSlash(std::string& path);
bool IsInt(const char* sz);
inline bool IsInt(const std::string& str) {
return IsInt(str.c_str());
}
#if defined(_MSC_VER)
// TODO: Test it.
#define FNM_PATHNAME 0
int fnmatch(const char* pattern, const char* dirPath, int flag);
#endif
// trim from start (returns a new string_view)
static inline StringView Ltrim(StringView s, const StringView blank = " \t\n\r\f\v") {
s.remove_prefix(std::min(s.find_first_not_of(blank), s.size()));
return s;
}
// trim from end (returns a new string_view)
static inline StringView Rtrim(StringView s, const StringView blank = " \t\n\r\f\v") {
s.remove_suffix(std::min(s.size() - s.find_last_not_of(blank) - 1, s.size()));
return s;
}
// trim from both ends (returns a new string_view)
static inline StringView Trim(StringView s) {
return Ltrim(Rtrim(s));
}
static constexpr StringView kNullSv("\0", 1);
class StringViewSplitterIterator {
public:
using iterator_category = std::forward_iterator_tag;
using value_type = StringView;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
StringViewSplitterIterator() = default;
StringViewSplitterIterator(StringView str, StringView delimiter) : mStr(str), mDelimiter(delimiter), mPos(0) {
findNext();
}
value_type operator*() { return mField; }
pointer operator->() { return &mField; }
StringViewSplitterIterator& operator++() {
findNext();
return *this;
}
StringViewSplitterIterator operator++(int) {
StringViewSplitterIterator tmp = *this;
++(*this);
return tmp;
}
friend bool operator==(const StringViewSplitterIterator& a, const StringViewSplitterIterator& b) {
return a.mPos == b.mPos;
}
friend bool operator!=(const StringViewSplitterIterator& a, const StringViewSplitterIterator& b) {
return !(a == b);
}
private:
void findNext() {
if (mPos == StringView::npos) {
mField = {};
return;
}
size_t end = 0;
if (mDelimiter.empty()) {
end = mPos + 1;
} else {
end = mStr.find(mDelimiter, mPos);
}
if (end == StringView::npos) {
if (mPos <= mStr.size()) { // last field
mField = mStr.substr(mPos);
mPos = mStr.size() + 1;
} else { // equivalent to end
mField = {};
mPos = StringView::npos;
}
} else {
mField = mStr.substr(mPos, end - mPos);
mPos = end + mDelimiter.size();
}
}
StringView mStr;
StringView mDelimiter;
StringView mField;
size_t mPos = StringView::npos;
};
class StringViewSplitter {
public:
using value_type = StringView;
using iterator = StringViewSplitterIterator;
StringViewSplitter(StringView str, StringView delimiter) : mStr(str), mDelimiter(delimiter) {}
iterator begin() const { return iterator(mStr, mDelimiter); }
iterator end() const { return iterator(); }
private:
StringView mStr;
StringView mDelimiter;
};
template <class T>
bool StringTo(const char* first, const char* last, T& val, int base = 10) {
if (first == nullptr || first >= last) {
return false; // 空字符串,转换失败
}
auto convresult = std::from_chars(first, last, val, base);
if (convresult.ec != std::errc() || convresult.ptr != last) {
return false;
}
return true;
}
template <>
inline bool StringTo<double>(const char* first, const char* last, double& val, [[maybe_unused]] int base) {
if (first == nullptr || first >= last) {
return false; // 空字符串,转换失败
}
// 重置 errno 以检测转换错误
errno = 0;
char* end = nullptr;
val = std::strtod(first, &end);
// 检查转换是否成功
if (end != last) {
return false; // 没有完全转换所有字符
}
if (errno == ERANGE) {
return false; // 超出范围
}
return true;
}
template <>
inline bool StringTo<float>(const char* first, const char* last, float& val, [[maybe_unused]] int base) {
double result{};
if (!StringTo(first, last, result)) {
return false;
}
// 检查结果是否在 float 的范围内
if (result > std::numeric_limits<float>::max() || result < std::numeric_limits<float>::lowest()) {
return false; // 超出 float 范围
}
val = static_cast<float>(result);
return true;
}
template <>
inline bool StringTo<bool>(const char* first, const char* last, bool& val, [[maybe_unused]] int base) {
// 先检查长度是否为4
if (first == nullptr || last - first != 4) {
val = false;
} else { // 直接比较每个字符(忽略大小写)
val = (std::tolower(static_cast<unsigned char>(first[0])) == 't'
&& std::tolower(static_cast<unsigned char>(first[1])) == 'r'
&& std::tolower(static_cast<unsigned char>(first[2])) == 'u'
&& std::tolower(static_cast<unsigned char>(first[3])) == 'e');
}
return true;
}
template <>
inline bool StringTo<std::string>(const char* first, const char* last, std::string& val, [[maybe_unused]] int base) {
if (first == nullptr || first >= last) {
return false; // 空字符串,转换失败
}
val.assign(first, last);
return true;
}
template <class T>
bool StringTo(const char* str, T& val, int base = 10) {
if (!str) {
return false;
}
return StringTo(str, str + strlen(str), val, base);
}
template <class T>
bool StringTo(const std::string& str, T& val, int base = 10) {
return StringTo(str.data(), str.data() + str.size(), val, base);
}
template <class T>
bool StringTo(const std::string_view& str, T& val, int base = 10) {
return StringTo(str.data(), str.data() + str.size(), val, base);
}
template <class T>
bool StringTo(const StringView& str, T& val, int base = 10) {
return StringTo(str.data(), str.data() + str.size(), val, base);
}
} // namespace logtail