in Source/cmFileCommand.cxx [288:661]
bool HandleStringsCommand(std::vector<std::string> const& args,
cmExecutionStatus& status)
{
if (args.size() < 3) {
status.SetError("STRINGS requires a file name and output variable");
return false;
}
// Get the file to read.
std::string fileName = args[1];
if (!cmsys::SystemTools::FileIsFullPath(fileName)) {
fileName =
cmStrCat(status.GetMakefile().GetCurrentSourceDirectory(), '/', args[1]);
}
// Get the variable in which to store the results.
std::string const& outVar = args[2];
// Parse the options.
enum
{
arg_none,
arg_limit_input,
arg_limit_output,
arg_limit_count,
arg_length_minimum,
arg_length_maximum,
arg_maximum,
arg_regex,
arg_encoding
};
unsigned int minlen = 0;
unsigned int maxlen = 0;
int limit_input = -1;
int limit_output = -1;
unsigned int limit_count = 0;
cmsys::RegularExpression regex;
bool have_regex = false;
bool store_regex = true;
bool newline_consume = false;
bool hex_conversion_enabled = true;
enum
{
encoding_none = cmsys::FStream::BOM_None,
encoding_utf8 = cmsys::FStream::BOM_UTF8,
encoding_utf16le = cmsys::FStream::BOM_UTF16LE,
encoding_utf16be = cmsys::FStream::BOM_UTF16BE,
encoding_utf32le = cmsys::FStream::BOM_UTF32LE,
encoding_utf32be = cmsys::FStream::BOM_UTF32BE
};
int encoding = encoding_none;
int arg_mode = arg_none;
for (unsigned int i = 3; i < args.size(); ++i) {
if (args[i] == "LIMIT_INPUT") {
arg_mode = arg_limit_input;
} else if (args[i] == "LIMIT_OUTPUT") {
arg_mode = arg_limit_output;
} else if (args[i] == "LIMIT_COUNT") {
arg_mode = arg_limit_count;
} else if (args[i] == "LENGTH_MINIMUM") {
arg_mode = arg_length_minimum;
} else if (args[i] == "LENGTH_MAXIMUM") {
arg_mode = arg_length_maximum;
} else if (args[i] == "REGEX") {
arg_mode = arg_regex;
} else if (args[i] == "NEWLINE_CONSUME") {
newline_consume = true;
arg_mode = arg_none;
} else if (args[i] == "NO_HEX_CONVERSION") {
hex_conversion_enabled = false;
arg_mode = arg_none;
} else if (args[i] == "ENCODING") {
arg_mode = arg_encoding;
} else if (arg_mode == arg_limit_input) {
if (sscanf(args[i].c_str(), "%d", &limit_input) != 1 ||
limit_input < 0) {
status.SetError(cmStrCat("STRINGS option LIMIT_INPUT value \"",
args[i], "\" is not an unsigned integer."));
return false;
}
arg_mode = arg_none;
} else if (arg_mode == arg_limit_output) {
if (sscanf(args[i].c_str(), "%d", &limit_output) != 1 ||
limit_output < 0) {
status.SetError(cmStrCat("STRINGS option LIMIT_OUTPUT value \"",
args[i], "\" is not an unsigned integer."));
return false;
}
arg_mode = arg_none;
} else if (arg_mode == arg_limit_count) {
int count;
if (sscanf(args[i].c_str(), "%d", &count) != 1 || count < 0) {
status.SetError(cmStrCat("STRINGS option LIMIT_COUNT value \"",
args[i], "\" is not an unsigned integer."));
return false;
}
limit_count = count;
arg_mode = arg_none;
} else if (arg_mode == arg_length_minimum) {
int len;
if (sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0) {
status.SetError(cmStrCat("STRINGS option LENGTH_MINIMUM value \"",
args[i], "\" is not an unsigned integer."));
return false;
}
minlen = len;
arg_mode = arg_none;
} else if (arg_mode == arg_length_maximum) {
int len;
if (sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0) {
status.SetError(cmStrCat("STRINGS option LENGTH_MAXIMUM value \"",
args[i], "\" is not an unsigned integer."));
return false;
}
maxlen = len;
arg_mode = arg_none;
} else if (arg_mode == arg_regex) {
if (!regex.compile(args[i])) {
status.SetError(cmStrCat("STRINGS option REGEX value \"", args[i],
"\" could not be compiled."));
return false;
}
have_regex = true;
switch (status.GetMakefile().GetPolicyStatus(cmPolicies::CMP0159)) {
case cmPolicies::NEW:
// store_regex = true
break;
case cmPolicies::WARN:
if (status.GetMakefile().PolicyOptionalWarningEnabled(
"CMAKE_POLICY_WARNING_CMP0159")) {
status.GetMakefile().IssueMessage(
MessageType::AUTHOR_WARNING,
cmStrCat(cmPolicies::GetPolicyWarning(cmPolicies::CMP0159), '\n',
"For compatibility, CMake is leaving CMAKE_MATCH_<n> "
"unchanged."));
}
CM_FALLTHROUGH;
case cmPolicies::OLD:
store_regex = false;
break;
}
arg_mode = arg_none;
} else if (arg_mode == arg_encoding) {
if (args[i] == "UTF-8") {
encoding = encoding_utf8;
} else if (args[i] == "UTF-16LE") {
encoding = encoding_utf16le;
} else if (args[i] == "UTF-16BE") {
encoding = encoding_utf16be;
} else if (args[i] == "UTF-32LE") {
encoding = encoding_utf32le;
} else if (args[i] == "UTF-32BE") {
encoding = encoding_utf32be;
} else {
status.SetError(cmStrCat("STRINGS option ENCODING \"", args[i],
"\" not recognized."));
return false;
}
arg_mode = arg_none;
} else {
status.SetError(
cmStrCat("STRINGS given unknown argument \"", args[i], "\""));
return false;
}
}
if (hex_conversion_enabled) {
// TODO: should work without temp file, but just on a memory buffer
std::string binaryFileName =
cmStrCat(status.GetMakefile().GetCurrentBinaryDirectory(),
"/CMakeFiles/FileCommandStringsBinaryFile");
if (cmHexFileConverter::TryConvert(fileName, binaryFileName)) {
fileName = binaryFileName;
}
}
// Open the specified file.
#if defined(_WIN32) || defined(__CYGWIN__)
cmsys::ifstream fin(fileName.c_str(), std::ios::in | std::ios::binary);
#else
cmsys::ifstream fin(fileName.c_str());
#endif
if (!fin) {
status.SetError(
cmStrCat("STRINGS file \"", fileName, "\" cannot be read."));
return false;
}
// If BOM is found and encoding was not specified, use the BOM
int bom_found = cmsys::FStream::ReadBOM(fin);
if (encoding == encoding_none && bom_found != cmsys::FStream::BOM_None) {
encoding = bom_found;
}
unsigned int bytes_rem = 0;
if (encoding == encoding_utf16le || encoding == encoding_utf16be) {
bytes_rem = 1;
}
if (encoding == encoding_utf32le || encoding == encoding_utf32be) {
bytes_rem = 3;
}
// Parse strings out of the file.
int output_size = 0;
std::vector<std::string> strings;
std::string s;
while ((!limit_count || strings.size() < limit_count) &&
(limit_input < 0 || static_cast<int>(fin.tellg()) < limit_input) &&
fin) {
std::string current_str;
int c = fin.get();
for (unsigned int i = 0; i < bytes_rem; ++i) {
int c1 = fin.get();
if (!fin) {
fin.putback(static_cast<char>(c1));
break;
}
c = (c << 8) | c1;
}
if (encoding == encoding_utf16le) {
c = ((c & 0xFF) << 8) | ((c & 0xFF00) >> 8);
} else if (encoding == encoding_utf32le) {
c = (((c & 0xFF) << 24) | ((c & 0xFF00) << 8) | ((c & 0xFF0000) >> 8) |
((c & 0xFF000000) >> 24));
}
if (c == '\r') {
// Ignore CR character to make output always have UNIX newlines.
continue;
}
if (c >= 0 && c <= 0xFF &&
(isprint(c) || c == '\t' || (c == '\n' && newline_consume))) {
// This is an ASCII character that may be part of a string.
// Cast added to avoid compiler warning. Cast is ok because
// c is guaranteed to fit in char by the above if...
current_str += static_cast<char>(c);
} else if (encoding == encoding_utf8) {
// Check for UTF-8 encoded string (up to 4 octets)
static unsigned char const utf8_check_table[3][2] = {
{ 0xE0, 0xC0 },
{ 0xF0, 0xE0 },
{ 0xF8, 0xF0 },
};
// how many octets are there?
unsigned int num_utf8_bytes = 0;
for (unsigned int j = 0; num_utf8_bytes == 0 && j < 3; j++) {
if ((c & utf8_check_table[j][0]) == utf8_check_table[j][1]) {
num_utf8_bytes = j + 2;
}
}
// get subsequent octets and check that they are valid
for (unsigned int j = 0; j < num_utf8_bytes; j++) {
if (j != 0) {
c = fin.get();
if (!fin || (c & 0xC0) != 0x80) {
fin.putback(static_cast<char>(c));
break;
}
}
current_str += static_cast<char>(c);
}
// if this was an invalid utf8 sequence, discard the data, and put
// back subsequent characters
if ((current_str.length() != num_utf8_bytes)) {
for (unsigned int j = 0; j < current_str.size() - 1; j++) {
fin.putback(current_str[current_str.size() - 1 - j]);
}
current_str.clear();
}
}
if (c == '\n' && !newline_consume) {
// The current line has been terminated. Check if the current
// string matches the requirements. The length may now be as
// low as zero since blank lines are allowed.
if (s.length() >= minlen && (!have_regex || regex.find(s))) {
if (store_regex) {
status.GetMakefile().ClearMatches();
status.GetMakefile().StoreMatches(regex);
}
output_size += static_cast<int>(s.size()) + 1;
if (limit_output >= 0 && output_size >= limit_output) {
s.clear();
break;
}
strings.push_back(s);
}
// Reset the string to empty.
s.clear();
} else if (current_str.empty()) {
// A non-string character has been found. Check if the current
// string matches the requirements. We require that the length
// be at least one no matter what the user specified.
if (s.length() >= minlen && !s.empty() &&
(!have_regex || regex.find(s))) {
if (store_regex) {
status.GetMakefile().ClearMatches();
status.GetMakefile().StoreMatches(regex);
}
output_size += static_cast<int>(s.size()) + 1;
if (limit_output >= 0 && output_size >= limit_output) {
s.clear();
break;
}
strings.push_back(s);
}
// Reset the string to empty.
s.clear();
} else {
s += current_str;
}
if (maxlen > 0 && s.size() == maxlen) {
// Terminate a string if the maximum length is reached.
if (s.length() >= minlen && (!have_regex || regex.find(s))) {
if (store_regex) {
status.GetMakefile().ClearMatches();
status.GetMakefile().StoreMatches(regex);
}
output_size += static_cast<int>(s.size()) + 1;
if (limit_output >= 0 && output_size >= limit_output) {
s.clear();
break;
}
strings.push_back(s);
}
s.clear();
}
}
// If there is a non-empty current string we have hit the end of the
// input file or the input size limit. Check if the current string
// matches the requirements.
if ((!limit_count || strings.size() < limit_count) && !s.empty() &&
s.length() >= minlen && (!have_regex || regex.find(s))) {
if (store_regex) {
status.GetMakefile().ClearMatches();
status.GetMakefile().StoreMatches(regex);
}
output_size += static_cast<int>(s.size()) + 1;
if (limit_output < 0 || output_size < limit_output) {
strings.push_back(s);
}
}
// Encode the result in a CMake list.
char const* sep = "";
std::string output;
for (std::string const& sr : strings) {
// Separate the strings in the output to make it a list.
output += sep;
sep = ";";
// Store the string in the output, but escape semicolons to
// make sure it is a list.
for (char i : sr) {
if (i == ';') {
output += '\\';
}
output += i;
}
}
// Save the output in a makefile variable.
status.GetMakefile().AddDefinition(outVar, output);
return true;
}