in core/unittest/reader/LogFileReaderUnittest.cpp [91:335]
void LogFileReaderUnittest::TestReadGBK() {
{ // buffer size big enough and match pattern
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
}
{ // buffer size big enough and match pattern, force read
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData, false);
APSARA_TEST_FALSE_FATAL(moreData);
char* expectedContentAll = expectedContent.get();
size_t tmp = strlen(expectedContentAll);
expectedContentAll[tmp + 1] = '\n';
APSARA_TEST_STREQ_FATAL(expectedContent.get(), logBuffer.rawBuffer.data());
expectedContentAll[tmp + 1] = '\0';
}
{ // buffer size not big enough and not match pattern
Json::Value config;
config["StartPattern"] = "no matching pattern";
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
multilineOpts.Init(config, ctx, "");
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
LogFileReader::BUFFER_SIZE = 14;
size_t BUFFER_SIZE_UTF8 = 15; // "ilogtail 为可"
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadGBK(logBuffer, reader.mLogFileOp.GetFileSize(), moreData);
APSARA_TEST_TRUE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(std::string(expectedContent.get(), BUFFER_SIZE_UTF8).c_str(),
logBuffer.rawBuffer.data());
}
{ // buffer size not big enough and match pattern
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
// reader.mDiscardUnmatch = false;
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 11;
LogBuffer logBuffer;
bool moreData = false;
reader.ReadGBK(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // exclude tailing \n
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
}
{ // read twice, multiline
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
// reader.mDiscardUnmatch = false;
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 11;
LogBuffer logBuffer;
bool moreData = false;
// first read, first part should be read
reader.ReadGBK(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1);
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
auto lastFilePos = reader.mLastFilePos;
// second read, end of second part cannot be determined, nothing read
reader.ReadGBK(logBuffer, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_EQUAL_FATAL(lastFilePos, reader.mLastFilePos);
}
{ // read twice, single line
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
// reader.mDiscardUnmatch = false;
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogFileReader::BUFFER_SIZE = fileSize - 11;
LogBuffer logBuffer;
bool moreData = false;
// first read, first part should be read
reader.ReadGBK(logBuffer, fileSize, moreData);
APSARA_TEST_TRUE_FATAL(moreData);
std::string expectedPart(expectedContent.get());
expectedPart.resize(expectedPart.rfind("iLogtail") - 1); // -1 for \n
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
// second read, second part should be read
reader.ReadGBK(logBuffer, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
expectedPart = expectedContent.get();
expectedPart = expectedPart.substr(expectedPart.rfind("iLogtail"));
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_EQUAL_FATAL(0UL, reader.mCache.size());
}
{ // empty file
MultilineOptions multilineOpts;
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
LogBuffer logBuffer;
bool moreData = false;
reader.ReadGBK(logBuffer, 0, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_STREQ_FATAL(NULL, logBuffer.rawBuffer.data());
}
{ // force read + \n, which case read bytes is 0
Json::Value config;
config["StartPattern"] = "iLogtail.*";
MultilineOptions multilineOpts;
multilineOpts.Init(config, ctx, "");
FileReaderOptions readerOpts;
readerOpts.mInputType = FileReaderOptions::InputType::InputFile;
readerOpts.mFileEncoding = FileReaderOptions::Encoding::GBK;
LogFileReader reader(logPathDir,
gbkFile,
DevInode(),
std::make_pair(&readerOpts, &ctx),
std::make_pair(&multilineOpts, &ctx),
std::make_pair(&fileTagOpts, &ctx));
reader.UpdateReaderManual();
reader.InitReader(true, LogFileReader::BACKWARD_TO_BEGINNING);
int64_t fileSize = reader.mLogFileOp.GetFileSize();
reader.CheckFileSignatureAndOffset(true);
LogBuffer logBuffer;
bool moreData = false;
std::string expectedPart(expectedContent.get());
// first read, read first line without \n and not allowRollback
int64_t firstReadSize = expectedPart.find("\n");
expectedPart.resize(firstReadSize);
reader.ReadGBK(logBuffer, 127, moreData, false); // first line without \n
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
reader.ReadGBK(logBuffer, 127, moreData, false); // force read, clear cache
APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
APSARA_TEST_EQUAL_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_STREQ_FATAL(expectedPart.c_str(), logBuffer.rawBuffer.data());
// second read, start with \n but with other lines
reader.ReadGBK(logBuffer, fileSize - 1, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
std::string expectedPart2(expectedContent.get() + firstReadSize + 1); // skip \n
int64_t secondReadSize = expectedPart2.rfind("iLogtail") - 1;
expectedPart2.resize(secondReadSize);
APSARA_TEST_STREQ_FATAL(expectedPart2.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_FALSE_FATAL(reader.mLastForceRead);
// third read, force read cache
reader.ReadGBK(logBuffer, fileSize - 1, moreData, false);
std::string expectedPart3(expectedContent.get() + firstReadSize + 1 + secondReadSize + 1);
APSARA_TEST_STREQ_FATAL(expectedPart3.c_str(), logBuffer.rawBuffer.data());
APSARA_TEST_TRUE_FATAL(reader.mLastForceRead);
// fourth read, only read \n
LogBuffer logBuffer2;
reader.ReadGBK(logBuffer2, fileSize, moreData);
APSARA_TEST_FALSE_FATAL(moreData);
APSARA_TEST_GE_FATAL(reader.mCache.size(), 0UL);
APSARA_TEST_EQUAL_FATAL(fileSize, reader.mLastFilePos);
APSARA_TEST_STREQ_FATAL(NULL, logBuffer2.rawBuffer.data());
}
}