in blingfiretools/fa_ts2ps/fa_ts2ps.cpp [193:426]
int __cdecl main (int argc, char ** argv)
{
__PROG__ = argv [0];
--argc, ++argv;
::FAIOSetup ();
process_args (argc, argv);
try {
// Mem Lfp manager
FAAllocatorA * g_pool;
FAAllocator alloc;
g_pool = & alloc;
// IO
FAMapIOTools g_map_io (&g_alloc);
FACorpusIOTools_utf8 g_txt_io (&g_alloc);
// tagset
FATagSet g_tagset (&g_alloc);
// PRM LDB
FAImageDump g_PrmImg;
FAMorphLDB_t < int > g_ldb;
FADictInterpreter_t < int > g_tag_dict;
/// compiled grammar
FAImageDump g_StageImg;
FAWREConf_pack g_Wre;
FAMultiMap_pack g_Acts;
// parser
FAParserConfKeeper g_conf;
FAWreLexTools_t < int > g_wre_lex;
FAParser_triv_t < int > g_parser_triv (&g_alloc);
FAParser_nest_t < int > g_parser_nest (&g_alloc);
FAParser_base_t < int > * g_pParser = NULL;
// input tagged text
FATaggedText g_text (&g_alloc);
FATaggedTextA * pInText = &g_text;
// output parse tree
FAParseTree g_tree (g_pool);
///
/// initialize
///
g_txt_io.SetTagSet (&g_tagset);
g_txt_io.SetNoPosTags (g_no_pos_tags);
// adjust IO pointers
if (g_pInFile) {
g_ifs.open (g_pInFile, std::ios::in);
FAAssertStream (&g_ifs, g_pInFile);
g_pIs = &g_ifs;
}
if (g_pOutFile) {
g_ofs.open (g_pOutFile, std::ios::out);
g_pOs = &g_ofs;
}
if (FAFsmConst::PARSER_TRIV == g_AlgType) {
g_pParser = &g_parser_triv ;
} else if (FAFsmConst::PARSER_NEST == g_AlgType) {
g_pParser = &g_parser_nest ;
}
// load POS tagset
if (g_pTagsetFile) {
std::ifstream tagset_ifs (g_pTagsetFile, std::ios::in);
FAAssertStream (&tagset_ifs, g_pTagsetFile);
g_map_io.Read (tagset_ifs, &g_tagset);
}
// load PRM LDB
if (g_pInLDBFile) {
g_PrmImg.Load (g_pInLDBFile);
const unsigned char * pImg = g_PrmImg.GetImageDump ();
FAAssert (pImg, FAMsg::IOError);
g_ldb.SetImage (pImg);
const FADictConfKeeper * pConf = g_ldb.GetTagDictConf ();
if (pConf) {
g_tag_dict.SetConf (pConf, g_ldb.GetInTr ());
}
}
// load and set up the compiled grammar
if (g_pStageFile) {
g_StageImg.Load (g_pStageFile);
const unsigned char * pImg = g_StageImg.GetImageDump ();
FAAssert (pImg, FAMsg::IOError);
const int * pA = (const int *) pImg ;
const int Count = *pA;
FAAssert (2 == Count, FAMsg::IOError);
g_Wre.SetImage (pImg + *++pA);
g_Acts.SetImage (pImg + *++pA);
g_conf.SetWre (&g_Wre);
g_conf.SetActs (&g_Acts);
g_conf.SetIgnoreCase (g_ignore_case);
g_conf.SetMaxDepth (g_MaxPassCount);
} else {
const int * pValues = NULL;
const int Size = g_ldb.GetHeader ()->Get (FAFsmConst::FUNC_WRE, &pValues);
FAAssert (0 < Size, FAMsg::IOError);
g_conf.Initialize (&g_ldb, pValues, Size);
}
if (FAFsmConst::PARSER_TRIV == g_AlgType || \
FAFsmConst::PARSER_NEST == g_AlgType) {
const FAWREConfCA * pWre = g_conf.GetWre ();
const FAMultiMapCA * pActs = g_conf.GetActs ();
g_pParser->SetRules (pWre->GetDfa1 (), pWre->GetState2Ows ());
g_pParser->SetDigitizer (pWre->GetTxtDigDfa (), pWre->GetTxtDigOws ());
g_pParser->SetDctDigitizer (&g_tag_dict, pWre->GetDictDig ());
g_pParser->SetActions (pActs);
g_pParser->SetIgnoreCase (g_ignore_case);
g_pParser->SetResume (g_resume);
g_pParser->SetTokenType (pWre->GetTokenType ());
g_pParser->SetTagOwBase (pWre->GetTagOwBase ());
g_pParser->SetMaxPassCount (g_MaxPassCount);
g_pParser->SetParseTree (&g_tree);
} else if (FAFsmConst::PARSER_WRE_LEX == g_AlgType) {
g_wre_lex.Initialize (g_pool, &g_conf, &g_tag_dict);
}
///
/// process input
///
while (!g_pIs->eof ()) {
// read input
if (!g_resume) {
g_txt_io.Read (*g_pIs, &g_text);
} else {
g_txt_io.Read (*g_pIs, &g_text, &g_tree);
}
const int WordCount = pInText->GetWordCount ();
if (!g_no_process && 0 < WordCount) {
if (FAFsmConst::PARSER_TRIV == g_AlgType || \
FAFsmConst::PARSER_NEST == g_AlgType) {
for (int i = 0; i < WordCount; ++i) {
const int * pWord;
const int WordLen = pInText->GetWord (i, &pWord);
DebugLogAssert (0 < WordLen && pWord);
const int Tag = pInText->GetTag (i);
DebugLogAssert (0 < Tag);
g_pParser->AddWord (pWord, WordLen, Tag);
}
g_pParser->Process ();
if (false == g_no_output) {
g_txt_io.SetNoPosTags (false);
g_txt_io.Print (*g_pOs, pInText, &g_tree);
g_txt_io.SetNoPosTags (g_no_pos_tags);
}
} else if (FAFsmConst::PARSER_WRE_LEX == g_AlgType) {
if (false == g_resume) {
g_tree.Init (WordCount);
}
g_wre_lex.Reset (WordCount);
for (int i = 0; i < WordCount; ++i) {
const int * pWord;
const int WordLen = pInText->GetWord (i, &pWord);
DebugLogAssert (0 < WordLen && pWord);
const int Tag = pInText->GetTag (i);
DebugLogAssert (0 < Tag);
g_wre_lex.AddWord (pWord, WordLen, Tag);
}
g_wre_lex.SetParseTree (&g_tree);
g_wre_lex.Process ();
if (g_no_output) {
continue;
}
g_txt_io.SetNoPosTags (false);
g_txt_io.Print (*g_pOs, pInText, &g_tree);
g_txt_io.SetNoPosTags (g_no_pos_tags);
}
} // if (!g_no_process && 0 < WordCount) ...
} // of while (!g_pIs->eof ()) ...
} catch (const FAException & e) {
const char * const pErrMsg = e.GetErrMsg ();
const char * const pFile = e.GetSourceName ();
const int Line = e.GetSourceLine ();
std::cerr << "ERROR: " << pErrMsg << " in " << pFile \
<< " at line " << Line << " in program " << __PROG__ << '\n';
return 2;
} catch (...) {
std::cerr << "ERROR: Unknown error in program " << __PROG__ << '\n';
return 1;
}
// print out memory leaks, if any
FAPrintLeaks(&g_alloc, std::cerr);
return 0;
}