bool HandleStringsCommand()

in Source/cmFileCommand.cxx [288:661]


bool HandleStringsCommand(std::vector<std::string> const& args,
                          cmExecutionStatus& status)
{
  if (args.size() < 3) {
    status.SetError("STRINGS requires a file name and output variable");
    return false;
  }

  // Get the file to read.
  std::string fileName = args[1];
  if (!cmsys::SystemTools::FileIsFullPath(fileName)) {
    fileName =
      cmStrCat(status.GetMakefile().GetCurrentSourceDirectory(), '/', args[1]);
  }

  // Get the variable in which to store the results.
  std::string const& outVar = args[2];

  // Parse the options.
  enum
  {
    arg_none,
    arg_limit_input,
    arg_limit_output,
    arg_limit_count,
    arg_length_minimum,
    arg_length_maximum,
    arg_maximum,
    arg_regex,
    arg_encoding
  };
  unsigned int minlen = 0;
  unsigned int maxlen = 0;
  int limit_input = -1;
  int limit_output = -1;
  unsigned int limit_count = 0;
  cmsys::RegularExpression regex;
  bool have_regex = false;
  bool store_regex = true;
  bool newline_consume = false;
  bool hex_conversion_enabled = true;
  enum
  {
    encoding_none = cmsys::FStream::BOM_None,
    encoding_utf8 = cmsys::FStream::BOM_UTF8,
    encoding_utf16le = cmsys::FStream::BOM_UTF16LE,
    encoding_utf16be = cmsys::FStream::BOM_UTF16BE,
    encoding_utf32le = cmsys::FStream::BOM_UTF32LE,
    encoding_utf32be = cmsys::FStream::BOM_UTF32BE
  };
  int encoding = encoding_none;
  int arg_mode = arg_none;
  for (unsigned int i = 3; i < args.size(); ++i) {
    if (args[i] == "LIMIT_INPUT") {
      arg_mode = arg_limit_input;
    } else if (args[i] == "LIMIT_OUTPUT") {
      arg_mode = arg_limit_output;
    } else if (args[i] == "LIMIT_COUNT") {
      arg_mode = arg_limit_count;
    } else if (args[i] == "LENGTH_MINIMUM") {
      arg_mode = arg_length_minimum;
    } else if (args[i] == "LENGTH_MAXIMUM") {
      arg_mode = arg_length_maximum;
    } else if (args[i] == "REGEX") {
      arg_mode = arg_regex;
    } else if (args[i] == "NEWLINE_CONSUME") {
      newline_consume = true;
      arg_mode = arg_none;
    } else if (args[i] == "NO_HEX_CONVERSION") {
      hex_conversion_enabled = false;
      arg_mode = arg_none;
    } else if (args[i] == "ENCODING") {
      arg_mode = arg_encoding;
    } else if (arg_mode == arg_limit_input) {
      if (sscanf(args[i].c_str(), "%d", &limit_input) != 1 ||
          limit_input < 0) {
        status.SetError(cmStrCat("STRINGS option LIMIT_INPUT value \"",
                                 args[i], "\" is not an unsigned integer."));
        return false;
      }
      arg_mode = arg_none;
    } else if (arg_mode == arg_limit_output) {
      if (sscanf(args[i].c_str(), "%d", &limit_output) != 1 ||
          limit_output < 0) {
        status.SetError(cmStrCat("STRINGS option LIMIT_OUTPUT value \"",
                                 args[i], "\" is not an unsigned integer."));
        return false;
      }
      arg_mode = arg_none;
    } else if (arg_mode == arg_limit_count) {
      int count;
      if (sscanf(args[i].c_str(), "%d", &count) != 1 || count < 0) {
        status.SetError(cmStrCat("STRINGS option LIMIT_COUNT value \"",
                                 args[i], "\" is not an unsigned integer."));
        return false;
      }
      limit_count = count;
      arg_mode = arg_none;
    } else if (arg_mode == arg_length_minimum) {
      int len;
      if (sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0) {
        status.SetError(cmStrCat("STRINGS option LENGTH_MINIMUM value \"",
                                 args[i], "\" is not an unsigned integer."));
        return false;
      }
      minlen = len;
      arg_mode = arg_none;
    } else if (arg_mode == arg_length_maximum) {
      int len;
      if (sscanf(args[i].c_str(), "%d", &len) != 1 || len < 0) {
        status.SetError(cmStrCat("STRINGS option LENGTH_MAXIMUM value \"",
                                 args[i], "\" is not an unsigned integer."));
        return false;
      }
      maxlen = len;
      arg_mode = arg_none;
    } else if (arg_mode == arg_regex) {
      if (!regex.compile(args[i])) {
        status.SetError(cmStrCat("STRINGS option REGEX value \"", args[i],
                                 "\" could not be compiled."));
        return false;
      }
      have_regex = true;
      switch (status.GetMakefile().GetPolicyStatus(cmPolicies::CMP0159)) {
        case cmPolicies::NEW:
          // store_regex = true
          break;
        case cmPolicies::WARN:
          if (status.GetMakefile().PolicyOptionalWarningEnabled(
                "CMAKE_POLICY_WARNING_CMP0159")) {
            status.GetMakefile().IssueMessage(
              MessageType::AUTHOR_WARNING,
              cmStrCat(cmPolicies::GetPolicyWarning(cmPolicies::CMP0159), '\n',
                       "For compatibility, CMake is leaving CMAKE_MATCH_<n> "
                       "unchanged."));
          }
          CM_FALLTHROUGH;
        case cmPolicies::OLD:
          store_regex = false;
          break;
      }
      arg_mode = arg_none;
    } else if (arg_mode == arg_encoding) {
      if (args[i] == "UTF-8") {
        encoding = encoding_utf8;
      } else if (args[i] == "UTF-16LE") {
        encoding = encoding_utf16le;
      } else if (args[i] == "UTF-16BE") {
        encoding = encoding_utf16be;
      } else if (args[i] == "UTF-32LE") {
        encoding = encoding_utf32le;
      } else if (args[i] == "UTF-32BE") {
        encoding = encoding_utf32be;
      } else {
        status.SetError(cmStrCat("STRINGS option ENCODING \"", args[i],
                                 "\" not recognized."));
        return false;
      }
      arg_mode = arg_none;
    } else {
      status.SetError(
        cmStrCat("STRINGS given unknown argument \"", args[i], "\""));
      return false;
    }
  }

  if (hex_conversion_enabled) {
    // TODO: should work without temp file, but just on a memory buffer
    std::string binaryFileName =
      cmStrCat(status.GetMakefile().GetCurrentBinaryDirectory(),
               "/CMakeFiles/FileCommandStringsBinaryFile");
    if (cmHexFileConverter::TryConvert(fileName, binaryFileName)) {
      fileName = binaryFileName;
    }
  }

// Open the specified file.
#if defined(_WIN32) || defined(__CYGWIN__)
  cmsys::ifstream fin(fileName.c_str(), std::ios::in | std::ios::binary);
#else
  cmsys::ifstream fin(fileName.c_str());
#endif
  if (!fin) {
    status.SetError(
      cmStrCat("STRINGS file \"", fileName, "\" cannot be read."));
    return false;
  }

  // If BOM is found and encoding was not specified, use the BOM
  int bom_found = cmsys::FStream::ReadBOM(fin);
  if (encoding == encoding_none && bom_found != cmsys::FStream::BOM_None) {
    encoding = bom_found;
  }

  unsigned int bytes_rem = 0;
  if (encoding == encoding_utf16le || encoding == encoding_utf16be) {
    bytes_rem = 1;
  }
  if (encoding == encoding_utf32le || encoding == encoding_utf32be) {
    bytes_rem = 3;
  }

  // Parse strings out of the file.
  int output_size = 0;
  std::vector<std::string> strings;
  std::string s;
  while ((!limit_count || strings.size() < limit_count) &&
         (limit_input < 0 || static_cast<int>(fin.tellg()) < limit_input) &&
         fin) {
    std::string current_str;

    int c = fin.get();
    for (unsigned int i = 0; i < bytes_rem; ++i) {
      int c1 = fin.get();
      if (!fin) {
        fin.putback(static_cast<char>(c1));
        break;
      }
      c = (c << 8) | c1;
    }
    if (encoding == encoding_utf16le) {
      c = ((c & 0xFF) << 8) | ((c & 0xFF00) >> 8);
    } else if (encoding == encoding_utf32le) {
      c = (((c & 0xFF) << 24) | ((c & 0xFF00) << 8) | ((c & 0xFF0000) >> 8) |
           ((c & 0xFF000000) >> 24));
    }

    if (c == '\r') {
      // Ignore CR character to make output always have UNIX newlines.
      continue;
    }

    if (c >= 0 && c <= 0xFF &&
        (isprint(c) || c == '\t' || (c == '\n' && newline_consume))) {
      // This is an ASCII character that may be part of a string.
      // Cast added to avoid compiler warning. Cast is ok because
      // c is guaranteed to fit in char by the above if...
      current_str += static_cast<char>(c);
    } else if (encoding == encoding_utf8) {
      // Check for UTF-8 encoded string (up to 4 octets)
      static unsigned char const utf8_check_table[3][2] = {
        { 0xE0, 0xC0 },
        { 0xF0, 0xE0 },
        { 0xF8, 0xF0 },
      };

      // how many octets are there?
      unsigned int num_utf8_bytes = 0;
      for (unsigned int j = 0; num_utf8_bytes == 0 && j < 3; j++) {
        if ((c & utf8_check_table[j][0]) == utf8_check_table[j][1]) {
          num_utf8_bytes = j + 2;
        }
      }

      // get subsequent octets and check that they are valid
      for (unsigned int j = 0; j < num_utf8_bytes; j++) {
        if (j != 0) {
          c = fin.get();
          if (!fin || (c & 0xC0) != 0x80) {
            fin.putback(static_cast<char>(c));
            break;
          }
        }
        current_str += static_cast<char>(c);
      }

      // if this was an invalid utf8 sequence, discard the data, and put
      // back subsequent characters
      if ((current_str.length() != num_utf8_bytes)) {
        for (unsigned int j = 0; j < current_str.size() - 1; j++) {
          fin.putback(current_str[current_str.size() - 1 - j]);
        }
        current_str.clear();
      }
    }

    if (c == '\n' && !newline_consume) {
      // The current line has been terminated.  Check if the current
      // string matches the requirements.  The length may now be as
      // low as zero since blank lines are allowed.
      if (s.length() >= minlen && (!have_regex || regex.find(s))) {
        if (store_regex) {
          status.GetMakefile().ClearMatches();
          status.GetMakefile().StoreMatches(regex);
        }
        output_size += static_cast<int>(s.size()) + 1;
        if (limit_output >= 0 && output_size >= limit_output) {
          s.clear();
          break;
        }
        strings.push_back(s);
      }

      // Reset the string to empty.
      s.clear();
    } else if (current_str.empty()) {
      // A non-string character has been found.  Check if the current
      // string matches the requirements.  We require that the length
      // be at least one no matter what the user specified.
      if (s.length() >= minlen && !s.empty() &&
          (!have_regex || regex.find(s))) {
        if (store_regex) {
          status.GetMakefile().ClearMatches();
          status.GetMakefile().StoreMatches(regex);
        }
        output_size += static_cast<int>(s.size()) + 1;
        if (limit_output >= 0 && output_size >= limit_output) {
          s.clear();
          break;
        }
        strings.push_back(s);
      }

      // Reset the string to empty.
      s.clear();
    } else {
      s += current_str;
    }

    if (maxlen > 0 && s.size() == maxlen) {
      // Terminate a string if the maximum length is reached.
      if (s.length() >= minlen && (!have_regex || regex.find(s))) {
        if (store_regex) {
          status.GetMakefile().ClearMatches();
          status.GetMakefile().StoreMatches(regex);
        }
        output_size += static_cast<int>(s.size()) + 1;
        if (limit_output >= 0 && output_size >= limit_output) {
          s.clear();
          break;
        }
        strings.push_back(s);
      }
      s.clear();
    }
  }

  // If there is a non-empty current string we have hit the end of the
  // input file or the input size limit.  Check if the current string
  // matches the requirements.
  if ((!limit_count || strings.size() < limit_count) && !s.empty() &&
      s.length() >= minlen && (!have_regex || regex.find(s))) {
    if (store_regex) {
      status.GetMakefile().ClearMatches();
      status.GetMakefile().StoreMatches(regex);
    }
    output_size += static_cast<int>(s.size()) + 1;
    if (limit_output < 0 || output_size < limit_output) {
      strings.push_back(s);
    }
  }

  // Encode the result in a CMake list.
  char const* sep = "";
  std::string output;
  for (std::string const& sr : strings) {
    // Separate the strings in the output to make it a list.
    output += sep;
    sep = ";";

    // Store the string in the output, but escape semicolons to
    // make sure it is a list.
    for (char i : sr) {
      if (i == ';') {
        output += '\\';
      }
      output += i;
    }
  }

  // Save the output in a makefile variable.
  status.GetMakefile().AddDefinition(outVar, output);
  return true;
}