generators/file-generator.ts (59 lines of code) (raw):
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
import { IBatchGenerator, ILogData } from "../core/ext-types.js"
import fs from 'fs';
import readline from 'readline';
const DATA_PATH = ""
/*
* File Line by Line Generator
* This generator reads a file as input and
* sequentially outputs file, line by line
*/
interface IGeneratorConfig {
data: string,
batchSize: number,
loop: boolean,
isJSON: boolean,
logKey: string,
}
const defaultConfig: IGeneratorConfig = {
data: "example/webserver-access.log",
batchSize: 1,
loop: true,
isJSON: false,
logKey: "log",
};
const fileGenerator: IBatchGenerator = {
name: "file",
defaultConfig: defaultConfig,
createConfiguredGenerator: function (config: IGeneratorConfig) {
return {
generatorTemplate: this,
makeInstance: (() => (async function*() {
let hasLooped = false;
while (config.loop || !hasLooped) {
const fileStream = fs.createReadStream(DATA_PATH + config.data);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
// Note: we use the crlfDelay option to recognize all instances of CR LF
// ('\r\n') in input.txt as a single line break.
let batch: Array<ILogData> = [];
for await (const line of rl) {
if (config.isJSON) {
batch.push(JSON.parse(line))
}
// we need to convert text to json
else {
batch.push({
[config.logKey]: line
} as ILogData);
}
if (batch.length === config.batchSize) {
yield batch;
batch = [];
}
// Each line in input.txt will be successively available here as `line`.
// console.log(`Line from file: ${line}`);
}
if (batch.length !== 0) {
yield batch;
}
rl.close();
fileStream.close()
hasLooped = true;
}
})()),
}
}
};
export default fileGenerator;