std::vector CmdLineConverter::Tokenize()

in Diagnostic/mdsd/mdsd/CmdLineConverter.cc [11:139]


std::vector<std::string> CmdLineConverter::Tokenize(const std::string& cmdline, std::function<void(const std::string&)> ctxLogOnWarning)
{
    Trace trace(Trace::Extensions, "CmdLineConverter::Tokenize");

    auto current = cmdline.begin();
    size_t pos = 1;
    std::vector<std::string> argv;
    std::string element;

    enum TokenizerState { outside, within, escape, singlequote, doublequote, doubleescape };
    TokenizerState state = outside;
    while (current != cmdline.end()) {

        // Generally, state transitions consume the character that causes the transition. (See bottom of loop.)
        // Any exceptions to this rule are clearly noted (by using "continue").
        switch (state) {
        case outside:
            // Advance past whitespace, else transition to state=within
            switch (*current) {
            case ' ':
            case '\n':
                break;
            default:
                state = within;         // NOTE: This state transition does NOT consume the character
                continue;
            }
            break;

        case within:
            switch (*current) {
            case '\\':                  // escape character - change to matching state
                state = escape;
                break;
            case '\'':                  // start single quote - change to matching state
                state  = singlequote;
                break;
            case '"':                   // start double quote - change to matching state
                state = doublequote;
                break;
            case ' ':                   // whitespace terminates the element, which we can push
            case '\n':                  // into the vector; change to "outside" state
                argv.emplace_back(std::move(element));
                element.clear();
                state = outside;
                break;
            default:
                element.push_back(*current);
                break;
            }
            break;

        case escape:
            // Only blank, newline, backslash, singlequote, and doublequote can be escaped; if the
            // character isn't one of those, put the backslash into the element along with the
            // shouldn't-have-been-escaped character.
            if (std::string(" \n\\'\"").find_first_of(*current) == std::string::npos) {
                element.push_back('\\');
            }
            element.push_back(*current);
            state = within;
            break;

        case singlequote:
            if (*current != '\'') {
                element.push_back(*current);
            } else {
                state = within;
            }
            break;

        case doublequote:
            switch (*current) {
            case '"':
                state = within;
                break;
            case '\\':
                state = doubleescape;
                break;
            default:
                element.push_back(*current);
                break;
            }
            break;

        case doubleescape:
            // If it's not a backslash or a doublequote, it can't be escaped, so flow the escape char through
            if (std::string("\\\"").find_first_of(*current) == std::string::npos) {
                element.push_back('\\');
            }
            element.push_back(*current);
            state = doublequote;
            break;
        }

        current++;
        pos++;
    }

    std::string warnMsg;
    switch (state) {
    case outside:
        break;
    case within:
        if (element.size()) {
            argv.emplace_back(std::move(element));
        }
        break;
    case singlequote:
    case doublequote:
        // Issue config-file parsing warning about an unterminated quote at the end of a cmdline
        warnMsg = "Unterminated quote at the end of the command line";
        trace.NOTEWARN(warnMsg);
        ctxLogOnWarning(warnMsg);
        // Auto-close it and add it, even it if's an empty string
        argv.emplace_back(std::move(element));
        break;
    case escape:
    case doubleescape:
        // Issue config-file warning about incomplete escape at the end of the cmdline
        warnMsg = "Incomplete escape at the end of the command line";
        trace.NOTEWARN(warnMsg);
        ctxLogOnWarning(warnMsg);
        // Add what we have
        argv.emplace_back(std::move(element));
        break;
    }

    return argv;
}