int __cdecl main()

in blingfiretools/fa_line2chain_unicode/fa_line2chain_unicode.cpp [608:836]


int __cdecl main (int argc, char ** argv)
{
    __PROG__ = argv [0];

    --argc, ++argv;

    ::FAIOSetup ();

    // parse a command line
    process_args (argc, argv);

    if (g_fUInt8Enc)
    {
        // Switch stdin to binary mode to avoid premature file truncation problem.
        ::FAInputIOSetup ();
    }

    try {

        if (-1 == g_num_size) {
          if (g_hex_output)
            g_num_size = 4;
          else
            g_num_size = 5;
        }

        g_Key2Freq.SetAllocator (&g_alloc);
        g_Key2Freq.Create ();

        // load tagset, if needed
        if (NULL != pInTagSetFile) {
            std::ifstream tagset_ifs (pInTagSetFile, std::ios::in);
            FAAssertStream (&tagset_ifs, pInTagSetFile);
            g_map_io.Read (tagset_ifs, &g_tagset);
        }
        // load prefix automaton, if needed
        if (g_pInPrefFsmFile) {
            g_pref_dfa_image.Load (g_pInPrefFsmFile);
            const unsigned char * pImg = g_pref_dfa_image.GetImageDump ();
            DebugLogAssert (pImg);
            g_pref_fsm_dump.SetImage (pImg);
            g_pPrefFsm = &g_pref_fsm_dump;
            g_tr_pref.SetRsDfa (g_pPrefFsm);
        }
        // load normalization map, if needed
        if (g_pCharMapFile) {
            g_charmap_image.Load (g_pCharMapFile);
            const unsigned char * pImg = g_charmap_image.GetImageDump ();
            DebugLogAssert (pImg);
            g_charmap.SetImage (pImg);
        }

        // specify delimiters, if needed
        if (-1 != g_pref_delim) {
            g_tr_pref.SetDelim (g_pref_delim);
            g_tr_pref_rev.SetDelim (g_pref_delim);
        }
        if (-1 != g_redup_delim) {
            g_tr_hyph_redup.SetDelim (g_redup_delim);
            g_tr_hyph_redup_rev.SetDelim (g_redup_delim);
        }
        if (-1 != g_ucf_delim) {
            g_tr_ucf.SetDelim (g_ucf_delim);
            g_tr_ucf_rev.SetDelim (g_ucf_delim);
        }

        FAStr2Utf16 cp2utf16 (&g_alloc);
        if (!g_fDecEnc && !g_fHexEnc && !g_fUInt8Enc) {
            cp2utf16.SetEncodingName (g_pInEnc);
        }

        const char * pDelim = NULL;
        LineNum = 0;

        while (!std::cin.eof ()) {

            if (!std::getline (std::cin, line))
                break;

            LineNum++;

            std::string::size_type EndOfLine = line.find_last_not_of("\r\n");
            if (EndOfLine != std::string::npos) {
                line.erase(EndOfLine + 1);
            }

            const char * pLine = line.c_str ();
            int LineLen = (const int) line.length ();

            if (!line.empty ()) {

                int DataLen = LineLen;

                if (MaxChainSize < DataLen) {
                    std::cerr << "ERROR: Line is too long, #" \
                              << LineNum \
                              << " in program " << __PROG__ << '\n';
                    exit (1);
                }
                if (g_use_keys) {

                    pDelim = strchr (pLine, '\t');
                    if (pDelim) {
                        DataLen = int (pDelim - pLine);
                    }
                }
                if (g_use_keypairs) {
                    pDelim = strchr (pLine, '\t');
                    if (pDelim) {
                        pDelim = strchr (pDelim+1, '\t');
                        if (pDelim) {
                            DataLen = int (pDelim - pLine);
                        }
                    }
                }

                int Count = 0;

                if (g_fDecEnc) {
                    // make a chain from the decimal numbers in ASCII, e.g. 255 1 238 2
                    Count = ::FAReadIntegerChain \
                        (pLine, DataLen, 10, Chain, MaxChainSize);
                } else if (g_fHexEnc) {
                    // make a chain from the hex in ASCII, e.g. FF 01 EE 02
                    Count = ::FAReadHexChain \
                        (pLine, DataLen, Chain, MaxChainSize);
                } else if (g_fUInt8Enc) {
                    // make a chain from the binary byte sequence
                    for (int i = 0; i < DataLen && i < MaxChainSize; ++i) {
                        Chain [i] = (unsigned char) pLine [i];
                    }
                    Count = DataLen;
                } else {
                    // make a UTF-32 chain from the plain-text in other encoding
                    Count = cp2utf16.Process \
                        (pLine, DataLen, Chain, MaxChainSize);
                }

                // input sequence is too long
                FAAssert (Count <= MaxChainSize, FAMsg::InternalError);

                if (-1 == Count) {
                    std::cerr << "ERROR: Conversion is not possible in line #"\
                              << LineNum \
                              << " in program " << __PROG__ << '\n';
                    exit (1);
                }

                // lower case, if needed
                if (g_ignore_case) {
                    ::FAUtf32StrLower (Chain, Count);
                }
                // normalize a word (in-place allowed)
                if (g_pCharMapFile) {
                    Count = ::FANormalizeWord (Chain, Count, \
                        Chain, MaxChainSize, &g_charmap);
                }
                // apply transformation, if needed
                if (g_pInTr && false == g_compounds) {

                    const int NewCount = \
                        g_pInTr->Process (Chain, Count, Chain, MaxChainSize);

                    if (-1 != NewCount) {
                        DebugLogAssert (NewCount <= MaxChainSize);
                        Count = NewCount;
                    }
                }

                // print the output chain and the key
                if (false == g_no_output) {

                    if (g_use_keypairs && pDelim) {
                        const char * pKey = pLine + DataLen + 1;
                        const int KeyLen = LineLen - DataLen - 1;
                        PrintKey (pKey, KeyLen);
                    }

                    if (false == g_compounds) {
                        PrintChain (Chain, Count);
                    } else {
                        PrintCompound (Chain, Count);
                    }

                    if (g_use_keys && pDelim) {
                        const char * pKey = pLine + DataLen + 1;
                        const int KeyLen = LineLen - DataLen - 1;
                        PrintKey (pKey, KeyLen);
                    }

                    std::cout << '\n';
                }

            } // of if (!line.empty ()) ...

        } // of while (!std::cin.eof ()) ...

        // print Key -> Freq array, if needed
        if (false == g_no_output && pKey2FreqFile) {

            const int Size = g_Key2Freq.size ();
            const int * pKey2Freq = g_Key2Freq.begin ();

            std::ofstream ofs_key2f (pKey2FreqFile, std::ios::out);
            g_map_io.Print (ofs_key2f, pKey2Freq, Size);
        }

    } catch (const FAException & e) {

        const char * const pErrMsg = e.GetErrMsg ();
        const char * const pFile = e.GetSourceName ();
        const int Line = e.GetSourceLine ();

        std::cerr << "ERROR: " << pErrMsg << " in " << pFile \
            << " at line " << Line << " in program " << __PROG__ << '\n';

        std::cerr << "ERROR: in data at line: " << LineNum << " in \"" \
            << line << "\"\n";

        return 2;

    } catch (...) {

        std::cerr << "ERROR: Unknown error in program " << __PROG__ << '\n';
        return 1;
    }

    return 0;
}