void LogFileReaderUnittest::TestReadGBK()

in core/unittest/reader/LogFileReaderUnittest.cpp [91:335]


void LogFileReaderUnittest::TestReadGBK() {
    { // buffer size big enough and match pattern
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
    }
    { // buffer size big enough and match pattern, force read
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData, false);
        APSARA_TEST_FALSE_FATAL(moreData);
        char* expectedContentAll = expectedContent.get();
        size_t tmp = strlen(expectedContentAll);
        expectedContentAll[tmp + 1] = '\n';
        APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
        expectedContentAll[tmp + 1] = '\0';
    }
    { // buffer size not big enough and not match pattern
        Json::Value config;
        config["StartPattern"] = "no matching pattern";
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        multilineOpts.Init(config, ctx, "");
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        LogFileReader::BUFFER_SIZE = 14;
        size_t BUFFER_SIZE_UTF8 = 15; // "ilogtail 为可"
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(std::string(expectedContent.get(), BUFFER_SIZE_UTF8).c_str(),
                                logBuffer.rawBuffer.data());
    }
    { // buffer size not big enough and match pattern
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        // reader.mDiscardUnmatch = false;
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 11;
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadGBK(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // exclude tailing \n
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
    }
    { // read twice, multiline
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        // reader.mDiscardUnmatch = false;
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 11;
        LogBuffer logBuffer;
        bool moreData = false;
        // first read, first part should be read
        reader.ReadGBK(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        auto lastFilePos = reader.mLastFilePos;
        // second read, end of second part cannot be determined, nothing read
        reader.ReadGBK(logBuffer, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_EQUAL_FATAL(lastFilePos, reader.mLastFilePos);
    }
    { // read twice, single line
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        // reader.mDiscardUnmatch = false;
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 11;
        LogBuffer logBuffer;
        bool moreData = false;
        // first read, first part should be read
        reader.ReadGBK(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // -1 for \n
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        // second read, second part should be read
        reader.ReadGBK(logBuffer, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        expectedPart = expectedContent.get();
        expectedPart = expectedPart.substr(expectedPart.rfind("iLogtail"));
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_EQUAL_FATAL(0UL, reader.mCache.size());
    }
    { // empty file
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadGBK(logBuffer, 0, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(NULL, logBuffer.rawBuffer.data());
    }
    { // force read + \n, which case read bytes is 0
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
        LogFileReader reader(logPathDir,
                             gbkFile,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        std::string expectedPart(expectedContent.get());
        // first read, read first line without \n and not allowRollback
        int64_t firstReadSize = expectedPart.find("\n");
        expectedPart.resize(firstReadSize);
        reader.ReadGBK(logBuffer, 127, moreData, false); // first line without \n
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
        reader.ReadGBK(logBuffer, 127, moreData, false); // force read, clear cache
        APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
        APSARA_TEST_EQUAL_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());

        // second read, start with \n but with other lines
        reader.ReadGBK(logBuffer, fileSize - 1, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        std::string expectedPart2(expectedContent.get() + firstReadSize + 1); // skip \n
        int64_t secondReadSize = expectedPart2.rfind("iLogtail") - 1;
        expectedPart2.resize(secondReadSize);
        APSARA_TEST_STREQ_FATAL(expectedPart2.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);

        // third read, force read cache
        reader.ReadGBK(logBuffer, fileSize - 1, moreData, false);
        std::string expectedPart3(expectedContent.get() + firstReadSize + 1 + secondReadSize + 1);
        APSARA_TEST_STREQ_FATAL(expectedPart3.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);

        // fourth read, only read \n
        LogBuffer logBuffer2;
        reader.ReadGBK(logBuffer2, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_EQUAL_FATAL(fileSize, reader.mLastFilePos);
        APSARA_TEST_STREQ_FATAL(NULL, logBuffer2.rawBuffer.data());
    }
}