in blingfiretools/fa_line2chain_unicode/fa_line2chain_unicode.cpp [608:836]
int __cdecl main (int argc, char ** argv)
{
__PROG__ = argv [0];
--argc, ++argv;
::FAIOSetup ();
// parse a command line
process_args (argc, argv);
if (g_fUInt8Enc)
{
// Switch stdin to binary mode to avoid premature file truncation problem.
::FAInputIOSetup ();
}
try {
if (-1 == g_num_size) {
if (g_hex_output)
g_num_size = 4;
else
g_num_size = 5;
}
g_Key2Freq.SetAllocator (&g_alloc);
g_Key2Freq.Create ();
// load tagset, if needed
if (NULL != pInTagSetFile) {
std::ifstream tagset_ifs (pInTagSetFile, std::ios::in);
FAAssertStream (&tagset_ifs, pInTagSetFile);
g_map_io.Read (tagset_ifs, &g_tagset);
}
// load prefix automaton, if needed
if (g_pInPrefFsmFile) {
g_pref_dfa_image.Load (g_pInPrefFsmFile);
const unsigned char * pImg = g_pref_dfa_image.GetImageDump ();
DebugLogAssert (pImg);
g_pref_fsm_dump.SetImage (pImg);
g_pPrefFsm = &g_pref_fsm_dump;
g_tr_pref.SetRsDfa (g_pPrefFsm);
}
// load normalization map, if needed
if (g_pCharMapFile) {
g_charmap_image.Load (g_pCharMapFile);
const unsigned char * pImg = g_charmap_image.GetImageDump ();
DebugLogAssert (pImg);
g_charmap.SetImage (pImg);
}
// specify delimiters, if needed
if (-1 != g_pref_delim) {
g_tr_pref.SetDelim (g_pref_delim);
g_tr_pref_rev.SetDelim (g_pref_delim);
}
if (-1 != g_redup_delim) {
g_tr_hyph_redup.SetDelim (g_redup_delim);
g_tr_hyph_redup_rev.SetDelim (g_redup_delim);
}
if (-1 != g_ucf_delim) {
g_tr_ucf.SetDelim (g_ucf_delim);
g_tr_ucf_rev.SetDelim (g_ucf_delim);
}
FAStr2Utf16 cp2utf16 (&g_alloc);
if (!g_fDecEnc && !g_fHexEnc && !g_fUInt8Enc) {
cp2utf16.SetEncodingName (g_pInEnc);
}
const char * pDelim = NULL;
LineNum = 0;
while (!std::cin.eof ()) {
if (!std::getline (std::cin, line))
break;
LineNum++;
std::string::size_type EndOfLine = line.find_last_not_of("\r\n");
if (EndOfLine != std::string::npos) {
line.erase(EndOfLine + 1);
}
const char * pLine = line.c_str ();
int LineLen = (const int) line.length ();
if (!line.empty ()) {
int DataLen = LineLen;
if (MaxChainSize < DataLen) {
std::cerr << "ERROR: Line is too long, #" \
<< LineNum \
<< " in program " << __PROG__ << '\n';
exit (1);
}
if (g_use_keys) {
pDelim = strchr (pLine, '\t');
if (pDelim) {
DataLen = int (pDelim - pLine);
}
}
if (g_use_keypairs) {
pDelim = strchr (pLine, '\t');
if (pDelim) {
pDelim = strchr (pDelim+1, '\t');
if (pDelim) {
DataLen = int (pDelim - pLine);
}
}
}
int Count = 0;
if (g_fDecEnc) {
// make a chain from the decimal numbers in ASCII, e.g. 255 1 238 2
Count = ::FAReadIntegerChain \
(pLine, DataLen, 10, Chain, MaxChainSize);
} else if (g_fHexEnc) {
// make a chain from the hex in ASCII, e.g. FF 01 EE 02
Count = ::FAReadHexChain \
(pLine, DataLen, Chain, MaxChainSize);
} else if (g_fUInt8Enc) {
// make a chain from the binary byte sequence
for (int i = 0; i < DataLen && i < MaxChainSize; ++i) {
Chain [i] = (unsigned char) pLine [i];
}
Count = DataLen;
} else {
// make a UTF-32 chain from the plain-text in other encoding
Count = cp2utf16.Process \
(pLine, DataLen, Chain, MaxChainSize);
}
// input sequence is too long
FAAssert (Count <= MaxChainSize, FAMsg::InternalError);
if (-1 == Count) {
std::cerr << "ERROR: Conversion is not possible in line #"\
<< LineNum \
<< " in program " << __PROG__ << '\n';
exit (1);
}
// lower case, if needed
if (g_ignore_case) {
::FAUtf32StrLower (Chain, Count);
}
// normalize a word (in-place allowed)
if (g_pCharMapFile) {
Count = ::FANormalizeWord (Chain, Count, \
Chain, MaxChainSize, &g_charmap);
}
// apply transformation, if needed
if (g_pInTr && false == g_compounds) {
const int NewCount = \
g_pInTr->Process (Chain, Count, Chain, MaxChainSize);
if (-1 != NewCount) {
DebugLogAssert (NewCount <= MaxChainSize);
Count = NewCount;
}
}
// print the output chain and the key
if (false == g_no_output) {
if (g_use_keypairs && pDelim) {
const char * pKey = pLine + DataLen + 1;
const int KeyLen = LineLen - DataLen - 1;
PrintKey (pKey, KeyLen);
}
if (false == g_compounds) {
PrintChain (Chain, Count);
} else {
PrintCompound (Chain, Count);
}
if (g_use_keys && pDelim) {
const char * pKey = pLine + DataLen + 1;
const int KeyLen = LineLen - DataLen - 1;
PrintKey (pKey, KeyLen);
}
std::cout << '\n';
}
} // of if (!line.empty ()) ...
} // of while (!std::cin.eof ()) ...
// print Key -> Freq array, if needed
if (false == g_no_output && pKey2FreqFile) {
const int Size = g_Key2Freq.size ();
const int * pKey2Freq = g_Key2Freq.begin ();
std::ofstream ofs_key2f (pKey2FreqFile, std::ios::out);
g_map_io.Print (ofs_key2f, pKey2Freq, Size);
}
} catch (const FAException & e) {
const char * const pErrMsg = e.GetErrMsg ();
const char * const pFile = e.GetSourceName ();
const int Line = e.GetSourceLine ();
std::cerr << "ERROR: " << pErrMsg << " in " << pFile \
<< " at line " << Line << " in program " << __PROG__ << '\n';
std::cerr << "ERROR: in data at line: " << LineNum << " in \"" \
<< line << "\"\n";
return 2;
} catch (...) {
std::cerr << "ERROR: Unknown error in program " << __PROG__ << '\n';
return 1;
}
return 0;
}