void LogFileReaderUnittest::TestReadUTF8()

in core/unittest/reader/LogFileReaderUnittest.cpp [337:572]


void LogFileReaderUnittest::TestReadUTF8() {
    { // buffer size big enough and match pattern
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
    }
    { // buffer size big enough and match pattern
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData, false);
        APSARA_TEST_FALSE_FATAL(moreData);
        char* expectedContentAll = expectedContent.get();
        size_t tmp = strlen(expectedContentAll);
        expectedContentAll[tmp + 1] = '\n';
        APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
        expectedContentAll[tmp + 1] = '\0';
    }
    { // buffer size not big enough and not match pattern
        // should read buffer size
        Json::Value config;
        config["StartPattern"] = "no matching pattern";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        LogFileReader::BUFFER_SIZE = 15;
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(std::string(expectedContent.get(), LogFileReader::BUFFER_SIZE).c_str(),
                                logBuffer.rawBuffer.data());
    }
    { // buffer size not big enough and match pattern
        // should read to match pattern
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 13;
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadUTF8(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
    }
    { // read twice, multiline
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 13;
        LogBuffer logBuffer;
        bool moreData = false;
        // first read
        reader.ReadUTF8(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // -1 for \n
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        auto lastFilePos = reader.mLastFilePos;
        // second read, end of second part cannot be determined, nothing read
        reader.ReadUTF8(logBuffer, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_EQUAL_FATAL(lastFilePos, reader.mLastFilePos);
    }
    { // read twice, singleline
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogFileReader::BUFFER_SIZE = fileSize - 13;
        LogBuffer logBuffer;
        bool moreData = false;
        // first read
        reader.ReadUTF8(logBuffer, fileSize, moreData);
        APSARA_TEST_TRUE_FATAL(moreData);
        std::string expectedPart(expectedContent.get());
        expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        // second read, second part should be read
        reader.ReadUTF8(logBuffer, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        expectedPart = expectedContent.get();
        expectedPart = expectedPart.substr(expectedPart.rfind("iLogtail"));
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_EQUAL_FATAL(0UL, reader.mCache.size());
    }
    { // empty
        MultilineOptions multilineOpts;
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        LogBuffer logBuffer;
        bool moreData = false;
        reader.ReadUTF8(logBuffer, 0, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_STREQ_FATAL(NULL, logBuffer.rawBuffer.data());
    }
    { // force read + \n, which case read bytes is 0
        Json::Value config;
        config["StartPattern"] = "iLogtail.*";
        MultilineOptions multilineOpts;
        multilineOpts.Init(config, ctx, "");
        FileReaderOptions readerOpts;
        readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
        LogFileReader reader(logPathDir,
                             utf8File,
                             DevInode(),
                             std::make_pair(&readerOpts, &ctx),
                             std::make_pair(&multilineOpts, &ctx),
                             std::make_pair(&fileTagOpts, &ctx));
        reader.UpdateReaderManual();
        reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
        int64_t fileSize = reader.mLogFileOp.GetFileSize();
        reader.CheckFileSignatureAndOffset(true);
        LogBuffer logBuffer;
        bool moreData = false;
        std::string expectedPart(expectedContent.get());
        // first read, read first line without \n and not allowRollback
        int64_t firstReadSize = expectedPart.find("\n");
        expectedPart.resize(firstReadSize);
        reader.mLastForceRead = true;
        reader.ReadUTF8(logBuffer, firstReadSize, moreData, false);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
        reader.ReadUTF8(logBuffer, firstReadSize, moreData, false); // force read, clear cache
        APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
        APSARA_TEST_EQUAL_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());

        // second read, start with \n but with other lines
        reader.ReadUTF8(logBuffer, fileSize - 1, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        std::string expectedPart2(expectedContent.get() + firstReadSize + 1); // skip \n
        int64_t secondReadSize = expectedPart2.rfind("iLogtail") - 1;
        expectedPart2.resize(secondReadSize);
        APSARA_TEST_STREQ_FATAL(expectedPart2.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);

        // third read, force read cache
        reader.ReadUTF8(logBuffer, fileSize - 1, moreData, false);
        std::string expectedPart3(expectedContent.get() + firstReadSize + 1 + secondReadSize + 1);
        APSARA_TEST_STREQ_FATAL(expectedPart3.c_str(), logBuffer.rawBuffer.data());
        APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);

        // fourth read, only read \n
        LogBuffer logBuffer2;
        reader.ReadUTF8(logBuffer2, fileSize, moreData);
        APSARA_TEST_FALSE_FATAL(moreData);
        APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
        APSARA_TEST_EQUAL_FATAL(fileSize, reader.mLastFilePos);
        APSARA_TEST_STREQ_FATAL(NULL, logBuffer2.rawBuffer.data());
    }
}