void SetupConfParams()

in blingfiretools/fa_build_conf/fa_build_conf.cpp [74:275]


void SetupConfParams ()
{
    // sections
    g_parser.AddSection ("w2t", FAFsmConst::FUNC_W2T);
    g_parser.AddSection ("w2b", FAFsmConst::FUNC_W2B);
    g_parser.AddSection ("b2w", FAFsmConst::FUNC_B2W);
    g_parser.AddSection ("w2w", FAFsmConst::FUNC_W2W);
    g_parser.AddSection ("trs", FAFsmConst::FUNC_TRS);
    g_parser.AddSection ("w2s", FAFsmConst::FUNC_W2S);
    g_parser.AddSection ("wre", FAFsmConst::FUNC_WRE);
    g_parser.AddSection ("wt2b", FAFsmConst::FUNC_WT2B);
    g_parser.AddSection ("b2wt", FAFsmConst::FUNC_B2WT);
    g_parser.AddSection ("tag-dict", FAFsmConst::FUNC_TAG_DICT);
    g_parser.AddSection ("w2h", FAFsmConst::FUNC_W2H);
    g_parser.AddSection ("pos-dict", FAFsmConst::FUNC_POS_DICT);
    g_parser.AddSection ("b2t", FAFsmConst::FUNC_B2T);
    g_parser.AddSection ("t2tb", FAFsmConst::FUNC_T2TB);
    g_parser.AddSection ("tb2t", FAFsmConst::FUNC_TB2T);
    g_parser.AddSection ("w2tp", FAFsmConst::FUNC_W2TP);
    g_parser.AddSection ("w2tpl", FAFsmConst::FUNC_W2TPL);
    g_parser.AddSection ("w2tpr", FAFsmConst::FUNC_W2TPR);
    g_parser.AddSection ("wbd", FAFsmConst::FUNC_WBD);
    g_parser.AddSection ("norm-dict", FAFsmConst::FUNC_NORM_DICT);
    g_parser.AddSection ("global", FAFsmConst::FUNC_GLOBAL);
    g_parser.AddSection ("w2h-alt", FAFsmConst::FUNC_W2H_ALT);
    g_parser.AddSection ("t2p", FAFsmConst::FUNC_T2P);
    g_parser.AddSection ("tt2p", FAFsmConst::FUNC_TT2P);
    g_parser.AddSection ("ttt2p", FAFsmConst::FUNC_TTT2P);
    g_parser.AddSection ("norm-rules", FAFsmConst::FUNC_NORM_RULES);
    g_parser.AddSection ("emit", FAFsmConst::FUNC_EMIT);
    g_parser.AddSection ("oic", FAFsmConst::FUNC_OIC_RULES);
    g_parser.AddSection ("css-rules", FAFsmConst::FUNC_CSS_RULES);
    g_parser.AddSection ("w2v", FAFsmConst::FUNC_W2V);
    g_parser.AddSection ("w2p", FAFsmConst::FUNC_W2P);
    g_parser.AddSection ("n2tp", FAFsmConst::FUNC_N2TP);
    g_parser.AddSection ("lad", FAFsmConst::FUNC_LAD);
    g_parser.AddSection ("u2l", FAFsmConst::FUNC_U2L);
    g_parser.AddSection ("i2w", FAFsmConst::FUNC_I2W);

    // parameters
    g_parser.AddNumParam ("trim", FAFsmConst::PARAM_TRIM);
    g_parser.AddStrParam ("dir", FAFsmConst::PARAM_DIRECTION,
                          "r2l", FAFsmConst::DIR_R2L);
    g_parser.AddStrParam ("dir", FAFsmConst::PARAM_DIRECTION,
                          "l2r", FAFsmConst::DIR_L2R);
    g_parser.AddStrParam ("dir", FAFsmConst::PARAM_DIRECTION,
                          "aff", FAFsmConst::DIR_AFF);
    g_parser.AddNumParam ("fsm", FAFsmConst::PARAM_FSM);
    g_parser.AddNumParam ("action-map", FAFsmConst::PARAM_ACTS);
    g_parser.AddStrParam ("action-map-mode", FAFsmConst::PARAM_MAP_MODE,
                          "triv-dump", FAFsmConst::MODE_PACK_TRIV);
    g_parser.AddStrParam ("action-map-mode", FAFsmConst::PARAM_MAP_MODE,
                          "mph-dump", FAFsmConst::MODE_PACK_MPH);
    g_parser.AddStrParam ("multi-map-mode", FAFsmConst::PARAM_MAP_MODE,
                          "triv-dump", FAFsmConst::MODE_PACK_TRIV);
    g_parser.AddStrParam ("multi-map-mode", FAFsmConst::PARAM_MAP_MODE,
                          "mph-dump", FAFsmConst::MODE_PACK_MPH);
    g_parser.AddStrParam ("multi-map-mode", FAFsmConst::PARAM_MAP_MODE,
                          "fixed-dump", FAFsmConst::MODE_PACK_FIXED);
    g_parser.AddNumParam ("min-len", FAFsmConst::PARAM_MIN_LEN);
    g_parser.AddNumParam ("min-comp-len", FAFsmConst::PARAM_MIN_LEN);
    g_parser.AddNumParam ("min-len2", FAFsmConst::PARAM_MIN_LEN2);
    g_parser.AddNumParam ("min-seg-len", FAFsmConst::PARAM_MIN_LEN2);
    g_parser.AddNumParam ("no-hyph-len", FAFsmConst::PARAM_MIN_LEN2);
    g_parser.AddNumParam ("min-len3", FAFsmConst::PARAM_MIN_LEN3);
    g_parser.AddNumParam ("min-ave-seg-len", FAFsmConst::PARAM_MIN_LEN3);
    g_parser.AddNumParam ("default-tag", FAFsmConst::PARAM_DEFAULT_TAG);
    g_parser.AddNumParam ("array", FAFsmConst::PARAM_ARRAY);
    g_parser.AddNumParam ("multi-map", FAFsmConst::PARAM_MULTI_MAP);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "rs-nfa", FAFsmConst::TYPE_RS_NFA);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "rs-dfa", FAFsmConst::TYPE_RS_DFA);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "moore-dfa", FAFsmConst::TYPE_MOORE_DFA);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "moore-mdfa", FAFsmConst::TYPE_MOORE_MULTI_DFA);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "mealy-nfa", FAFsmConst::TYPE_MEALY_NFA);
    g_parser.AddStrParam ("fsm-type", FAFsmConst::PARAM_FSM_TYPE,
                          "mealy-dfa", FAFsmConst::TYPE_MEALY_DFA);
    g_parser.AddNumParam ("left-anchor", FAFsmConst::PARAM_LEFT_ANCHOR);
    g_parser.AddNumParam ("right-anchor", FAFsmConst::PARAM_RIGHT_ANCHOR);
    g_parser.AddStrParam ("hyph-alg", FAFsmConst::PARAM_HYPH_TYPE,
                          "core", FAFsmConst::HYPH_TYPE_CORE);
    g_parser.AddStrParam ("hyph-alg", FAFsmConst::PARAM_HYPH_TYPE,
                          "w2h-w2s", FAFsmConst::HYPH_TYPE_W2H_W2S);
    g_parser.AddStrParam ("hyph-alg", FAFsmConst::PARAM_HYPH_TYPE,
                          "w2s-w2h", FAFsmConst::HYPH_TYPE_W2S_W2H);
    g_parser.AddParam ("normalize", FAFsmConst::PARAM_NORMALIZE);
    g_parser.AddParam ("no-tr", FAFsmConst::PARAM_NO_TR);
    g_parser.AddParam ("ignore-case", FAFsmConst::PARAM_IGNORE_CASE);
    g_parser.AddParam ("dict-mode", FAFsmConst::PARAM_DICT_MODE);
    g_parser.AddNumParam ("max-prob", FAFsmConst::PARAM_MAX_PROB);
    g_parser.AddNumParam ("depth", FAFsmConst::PARAM_DEPTH);
    g_parser.AddNumParam ("max-depth", FAFsmConst::PARAM_DEPTH);
    g_parser.AddNumParam ("max-pass-count", FAFsmConst::PARAM_MAX_PASS_COUNT);
    g_parser.AddNumParam ("max-score", FAFsmConst::PARAM_MAX_SCORE);
    g_parser.AddNumParam ("max-tag", FAFsmConst::PARAM_MAX_TAG);
    g_parser.AddParam ("log-scale", FAFsmConst::PARAM_LOG_SCALE);
    g_parser.AddNumParam ("float-array", FAFsmConst::PARAM_FLOAT_ARRAY);
    g_parser.AddNumParam ("min-max", FAFsmConst::PARAM_FLOAT_ARRAY);
    g_parser.AddParam ("use-nfst", FAFsmConst::PARAM_USE_NFST);
    g_parser.AddNumParam ("wre-conf", FAFsmConst::PARAM_WRE_CONF);
    g_parser.AddNumParam ("act-data", FAFsmConst::PARAM_ACT_DATA);
    g_parser.AddNumParam ("action-data", FAFsmConst::PARAM_ACT_DATA);
    g_parser.AddNumParam ("max-length", FAFsmConst::PARAM_MAX_LENGTH);
    g_parser.AddNumParam ("max-token-length", FAFsmConst::PARAM_MAX_LENGTH);
    g_parser.AddNumParam ("string-array", FAFsmConst::PARAM_STRING_ARRAY);
    g_parser.AddNumParam ("token-id-min", FAFsmConst::PARAM_TOKENID_MIN);
    g_parser.AddNumParam ("token-id-max", FAFsmConst::PARAM_TOKENID_MAX);

    // WRE-compiler related parameters (not used at runtime)
    g_parser.AddNumParam ("fsm-count", FAFsmConst::PARAM_FSM_COUNT);
    g_parser.AddStrParam ("token-type", FAFsmConst::PARAM_TOKEN_TYPE,
                          "txt", FAFsmConst::WRE_TT_TEXT);
    g_parser.AddStrParam ("token-type", FAFsmConst::PARAM_TOKEN_TYPE,
                          "tag", FAFsmConst::WRE_TT_TAGS);
    g_parser.AddStrParam ("token-type", FAFsmConst::PARAM_TOKEN_TYPE,
                          "dct", FAFsmConst::WRE_TT_DCTS);
    g_parser.AddNumParam ("type", FAFsmConst::PARAM_TYPE);
    g_parser.AddNumParam ("tag-ow-base", FAFsmConst::PARAM_TAG_OW_BASE);

    // transformations related parameters
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "hyph-redup", FAFsmConst::TR_HYPH_REDUP);
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "hyph-redup-rev", FAFsmConst::TR_HYPH_REDUP_REV);
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "pref", FAFsmConst::TR_PREFIX);
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "pref-rev", FAFsmConst::TR_PREFIX_REV);
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "ucf", FAFsmConst::TR_UCF);
    g_parser.AddStrParam ("in-tr", FAFsmConst::PARAM_IN_TR,
                          "ucf-rev", FAFsmConst::TR_UCF_REV);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "hyph-redup", FAFsmConst::TR_HYPH_REDUP);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "hyph-redup-rev", FAFsmConst::TR_HYPH_REDUP_REV);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "pref", FAFsmConst::TR_PREFIX);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "pref-rev", FAFsmConst::TR_PREFIX_REV);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "ucf", FAFsmConst::TR_UCF);
    g_parser.AddStrParam ("out-tr", FAFsmConst::PARAM_OUT_TR,
                          "ucf-rev", FAFsmConst::TR_UCF_REV);
    g_parser.AddNumParam ("redup-delim", FAFsmConst::PARAM_REDUP_DELIM);
    g_parser.AddNumParam ("pref-delim", FAFsmConst::PARAM_PREF_DELIM);
    g_parser.AddNumParam ("pref-fsm", FAFsmConst::PARAM_PREF_FSM);
    g_parser.AddNumParam ("suff-fsm", FAFsmConst::PARAM_SUFFIX_FSM);
    g_parser.AddNumParam ("ucf-delim", FAFsmConst::PARAM_UCF_DELIM);
    g_parser.AddNumParam ("charmap", FAFsmConst::PARAM_CHARMAP);
    g_parser.AddNumParam ("min-uni-prob", FAFsmConst::PARAM_MIN_UNI_PROB);
    g_parser.AddNumParam ("c2s-map", FAFsmConst::PARAM_C2S_MAP);
    g_parser.AddNumParam ("s2l-map", FAFsmConst::PARAM_S2L_MAP);
    g_parser.AddNumParam ("script-min-tag", FAFsmConst::PARAM_SCRIPT_MIN);
    g_parser.AddNumParam ("script-max-tag", FAFsmConst::PARAM_SCRIPT_MAX);

    // switches on W2B, e.g. reductive stemming for the word-breaker
    g_parser.AddParam ("do-w2b", FAFsmConst::PARAM_DO_W2B);
    g_parser.AddNumParam ("punkt", FAFsmConst::PARAM_PUNKT);
    g_parser.AddNumParam ("word", FAFsmConst::PARAM_WORD);
    g_parser.AddNumParam ("eos", FAFsmConst::PARAM_EOS);
    g_parser.AddNumParam ("eop", FAFsmConst::PARAM_EOP);
    g_parser.AddNumParam ("xword", FAFsmConst::PARAM_XWORD);
    g_parser.AddNumParam ("seg", FAFsmConst::PARAM_SEG);
    g_parser.AddNumParam ("ignore", FAFsmConst::PARAM_IGNORE);

    g_parser.AddNumParam ("order", FAFsmConst::PARAM_ORDER);
    g_parser.AddNumParam ("min-order", FAFsmConst::PARAM_MIN_ORDER);
    g_parser.AddNumParam ("unknown", FAFsmConst::PARAM_UNKNOWN);
    g_parser.AddNumParam ("max-count", FAFsmConst::PARAM_MAX_COUNT);
    g_parser.AddNumParam ("ratio", FAFsmConst::PARAM_RATIO);
    g_parser.AddNumParam ("ratio2", FAFsmConst::PARAM_RATIO2);
    g_parser.AddNumParam ("word-ratio", FAFsmConst::PARAM_RATIO2);
    g_parser.AddNumParam ("max-distance", FAFsmConst::PARAM_MAX_DISTANCE);
    g_parser.AddNumParam ("max-ambiguous-distance", FAFsmConst::PARAM_MAX_DISTANCE);
    g_parser.AddNumParam ("threshold", FAFsmConst::PARAM_THRESHOLD);
    g_parser.AddNumParam ("id-offset", FAFsmConst::PARAM_ID_OFFSET);
    g_parser.AddParam ("use-byte-encoding", FAFsmConst::PARAM_USE_BYTE_ENCODING);
    g_parser.AddParam ("no-dummy-prefix", FAFsmConst::PARAM_NO_DUMMY_PREFIX);

    // requires a CRC32-like check for the LDB file to pass
    g_parser.AddParam ("verify-ldb-bin", FAFsmConst::PARAM_VERIFY_LDB_BIN);

    // tokenization algo runtime
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "falex", FAFsmConst::TOKENIZE_DEFAULT);
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "wordpiece", FAFsmConst::TOKENIZE_WORDPIECE);
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "unilm", FAFsmConst::TOKENIZE_UNIGRAM_LM);
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "bpe", FAFsmConst::TOKENIZE_BPE);
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "bpe-opt", FAFsmConst::TOKENIZE_BPE_OPT);
    g_parser.AddStrParam ("tokalgo", FAFsmConst::PARAM_TOKENIZATION_TYPE,
                          "bpe-opt-with-merges", FAFsmConst::TOKENIZE_BPE_OPT_WITH_MERGES);

}