in libredex/ProguardLexer.cpp [527:821]
std::vector<Token> lex(const std::string_view& in) {
std::unordered_map<char, TokenType> simple_tokens{
{'{', TokenType::openCurlyBracket},
{'}', TokenType::closeCurlyBracket},
{'(', TokenType::openBracket},
{')', TokenType::closeBracket},
{';', TokenType::semiColon},
{':', TokenType::colon},
{',', TokenType::comma},
{'!', TokenType::notToken},
{'/', TokenType::slash},
{'@', TokenType::annotation_application},
};
using TokenMap = UnorderedStringViewIndexableMap<TokenType>;
TokenMap word_tokens{
{"includedescriptorclasses", TokenType::includedescriptorclasses_token},
{"allowshrinking", TokenType::allowshrinking_token},
{"allowoptimization", TokenType::allowoptimization_token},
{"allowobfuscation", TokenType::allowobfuscation_token},
{"class", TokenType::classToken},
{"public", TokenType::publicToken},
{"final", TokenType::final},
{"abstract", TokenType::abstract},
{"enum", TokenType::enumToken},
{"private", TokenType::privateToken},
{"protected", TokenType::protectedToken},
{"static", TokenType::staticToken},
{"volatile", TokenType::volatileToken},
{"transient", TokenType::transient},
{"synchronized", TokenType::synchronized},
{"native", TokenType::native},
{"strictfp", TokenType::strictfp},
{"synthetic", TokenType::synthetic},
{"bridge", TokenType::bridge},
{"varargs", TokenType::varargs},
{"extends", TokenType::extends},
{"implements", TokenType::implements},
{"return", TokenType::returns},
};
TokenMap simple_commands{
// Keep Options
{"keep", TokenType::keep},
{"keepclassmembers", TokenType::keepclassmembers},
{"keepclasseswithmembers", TokenType::keepclasseswithmembers},
{"keepnames", TokenType::keepnames},
{"keepclassmembernames", TokenType::keepclassmembernames},
{"keepclasseswithmembernames", TokenType::keepclasseswithmembernames},
// Shrinking Options
{"dontshrink", TokenType::dontshrink},
{"whyareyoukeeping", TokenType::whyareyoukeeping},
// Optimization Options
{"assumenosideeffects", TokenType::assumenosideeffects},
{"allowaccessmodification", TokenType::allowaccessmodification_token},
{"dontoptimize", TokenType::dontoptimize},
{"optimizationpasses", TokenType::optimizationpasses},
{"mergeinterfacesaggressively", TokenType::mergeinterfacesaggressively},
// Obfuscation Options
{"dontobfuscate", TokenType::dontobfuscate},
{"dontusemixedcaseclassnames",
TokenType::dontusemixedcaseclassnames_token},
{"dontskipnonpubliclibraryclasses",
TokenType::dontskipnonpubliclibraryclasses},
// Preverification Options.
{"dontpreverify", TokenType::dontpreverify_token},
// General Options
{"verbose", TokenType::verbose_token},
};
TokenMap single_filepath_commands{
// Input/Output Options
{"include", TokenType::include},
{"basedirectory", TokenType::basedirectory},
{"dump", TokenType::dump},
{"printmapping", TokenType::printmapping},
{"printconfiguration", TokenType::printconfiguration},
{"printseeds", TokenType::printseeds},
// Shrinking Options
{"printusage", TokenType::printusage},
};
TokenMap multi_filepaths_commands{
// Input/Output Options
{"injars", TokenType::injars},
{"outjars", TokenType::outjars},
{"libraryjars", TokenType::libraryjars},
// Keep Options
{"keepdirectories", TokenType::keepdirectories},
};
TokenMap filter_list_commands{
// Optimization Options
{"optimizations", TokenType::optimizations},
// Obfuscation Options
{"keepattributes", TokenType::keepattributes},
// General Options
{"dontwarn", TokenType::dontwarn},
{"keeppackagenames", TokenType::keeppackagenames},
};
std::vector<Token> tokens;
tokens.reserve(std::max((size_t)1, in.size() / 20)); // 5% ratio.
unsigned int line = 1;
auto add_token = [&](TokenType type) { tokens.emplace_back(type, line); };
auto add_token_data = [&](TokenType type, const std::string_view& data) {
tokens.emplace_back(type, line, data);
};
auto add_token_line_data =
[&](TokenType type, size_t t_line, const std::string_view& data) {
tokens.emplace_back(type, t_line, data);
};
std::string_view data = in;
while (!data.empty()) {
char ch = data[0];
// Skip comments.
if (ch == '#') {
auto eol_pos = data.find('\n');
std::string_view comment_data;
if (eol_pos != std::string_view::npos) {
comment_data = data.substr(1, eol_pos - 1);
data = data.substr(eol_pos + 1);
} else {
comment_data = data.substr(1);
data = std::string_view();
}
tokens.emplace_back(TokenType::comment, line, comment_data);
++line;
continue;
}
auto consume_ws = [&line, &data]() {
size_t index = 0;
for (; index != data.size(); ++index) {
char c = data[index];
if (c == '\n') {
line++;
continue;
}
if (!isspace(c)) {
break;
}
}
data = data.substr(index);
};
// Skip whitespaces.
if (isspace(ch)) {
consume_ws();
continue;
}
{
auto it = simple_tokens.find(ch);
if (it != simple_tokens.end()) {
add_token(it->second);
data = data.substr(1);
continue;
}
}
if (ch == '[') {
auto old_view = data;
data = data.substr(1);
consume_ws(); // Consume any whitespace
// Check for closing brace.
if (data.empty()) {
add_token_data(TokenType::unknownToken, old_view);
continue;
}
if (data[0] == ']') {
add_token(TokenType::arrayType);
data = data.substr(1);
continue;
}
// Any token other than a ']' next is a bad token.
}
// Check for commands.
if (ch == '-') {
data = data.substr(1);
auto command =
parse_part_fn</*kSkipWs=*/false>(data, &line, is_deliminator);
{
auto it = simple_commands.find(command);
if (it != simple_commands.end()) {
add_token(it->second);
continue;
}
}
{
auto it = single_filepath_commands.find(command);
if (it != single_filepath_commands.end()) {
add_token(it->second);
auto path = read_path(data, &line);
if (!path.empty()) {
add_token_data(TokenType::filepath, path);
}
continue;
}
}
{
auto it = multi_filepaths_commands.find(command);
if (it != multi_filepaths_commands.end()) {
add_token(it->second);
auto paths = read_paths(data, &line);
for (auto& path : paths) {
add_token_line_data(TokenType::filepath, path.second, path.first);
}
continue;
}
}
{
auto it = filter_list_commands.find(command);
if (it != filter_list_commands.end()) {
add_token(it->second);
for (auto& filter : lex_filter_list(data, &line)) {
add_token_data(TokenType::filter_pattern, filter);
}
continue;
}
}
// Input/Output Options
if (command == "target") {
add_token(TokenType::target);
auto version = read_target_version(data, &line);
if (!version.empty()) {
add_token_data(TokenType::target_version_token, version);
}
continue;
}
// Obfuscation Options
if (command == "repackageclasses") {
add_token(TokenType::repackageclasses);
auto package_name = parse_package_name(data, &line);
if (!package_name.empty()) {
add_token_data(TokenType::identifier, package_name);
}
continue;
}
// Some other command.
add_token_data(TokenType::command, command);
continue;
}
auto word = parse_part_fn</*kSkipWs=*/false>(data, &line, is_deliminator);
{
auto it = word_tokens.find(word);
if (it != word_tokens.end()) {
add_token(it->second);
continue;
}
}
if (word == "interface") {
// If the previous symbol was a @ then this is really an annotation.
if (!tokens.empty() &&
tokens.back().type == TokenType::annotation_application) {
tokens.pop_back();
add_token(TokenType::annotation);
} else {
add_token(TokenType::interface);
}
continue;
}
if (is_identifier(word)) {
add_token_data(TokenType::identifier, word);
continue;
}
// This is an unrecognized token.
add_token_data(TokenType::unknownToken, word);
}
add_token(TokenType::eof_token);
return tokens;
}