in core/unittest/reader/LogFileReaderUnittest.cpp [337:572]
void LogFileReaderUnittest::TestReadUTF8() {
{ // buffer size big enough and match pattern
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
}
{ // buffer size big enough and match pattern
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData, false);
APSARA_TEST_FALSE_FATAL(moreData);
char* expectedContentAll = expectedContent.get();
size_t tmp = strlen(expectedContentAll);
expectedContentAll[tmp + 1] = '\n';
APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
expectedContentAll[tmp + 1] = '\0';
}
{ // buffer size not big enough and not match pattern
// should read buffer size
Json::Value config;
config["StartPattern"] = "no matching pattern";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
LogFileReader::BUFFER_SIZE = 15;
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadUTF8(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
APSARA_TEST_TRUE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(std::string(expectedContent.get(), LogFileReader::BUFFER_SIZE).c_str(),
logBuffer.rawBuffer.data());
}
{ // buffer size not big enough and match pattern
// should read to match pattern
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 13;
LogBuffer logBuffer;
bool moreData = false;
reader.ReadUTF8(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
}
{ // read twice, multiline
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 13;
LogBuffer logBuffer;
bool moreData = false;
// first read
reader.ReadUTF8(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // -1 for \n
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
auto lastFilePos = reader.mLastFilePos;
// second read, end of second part cannot be determined, nothing read
reader.ReadUTF8(logBuffer, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_EQUAL_FATAL(lastFilePos, reader.mLastFilePos);
}
{ // read twice, singleline
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 13;
LogBuffer logBuffer;
bool moreData = false;
// first read
reader.ReadUTF8(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
// second read, second part should be read
reader.ReadUTF8(logBuffer, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
expectedPart = expectedContent.get();
expectedPart = expectedPart.substr(expectedPart.rfind("iLogtail"));
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_EQUAL_FATAL(0UL, reader.mCache.size());
}
{ // empty
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadUTF8(logBuffer, 0, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(NULL, logBuffer.rawBuffer.data());
}
{ // force read + \n, which case read bytes is 0
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
LogFileReader reader(logPathDir,
utf8File,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
std::string expectedPart(expectedContent.get());
// first read, read first line without \n and not allowRollback
int64_t firstReadSize = expectedPart.find("\n");
expectedPart.resize(firstReadSize);
reader.mLastForceRead = true;
reader.ReadUTF8(logBuffer, firstReadSize, moreData, false);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
reader.ReadUTF8(logBuffer, firstReadSize, moreData, false); // force read, clear cache
APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
APSARA_TEST_EQUAL_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
// second read, start with \n but with other lines
reader.ReadUTF8(logBuffer, fileSize - 1, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
std::string expectedPart2(expectedContent.get() + firstReadSize + 1); // skip \n
int64_t secondReadSize = expectedPart2.rfind("iLogtail") - 1;
expectedPart2.resize(secondReadSize);
APSARA_TEST_STREQ_FATAL(expectedPart2.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
// third read, force read cache
reader.ReadUTF8(logBuffer, fileSize - 1, moreData, false);
std::string expectedPart3(expectedContent.get() + firstReadSize + 1 + secondReadSize + 1);
APSARA_TEST_STREQ_FATAL(expectedPart3.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
// fourth read, only read \n
LogBuffer logBuffer2;
reader.ReadUTF8(logBuffer2, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_EQUAL_FATAL(fileSize, reader.mLastFilePos);
APSARA_TEST_STREQ_FATAL(NULL, logBuffer2.rawBuffer.data());
}
}