in sentiment/data.js [49:107]
function loadFeatures(filePath, numWords, maxLen, multihot = false) {
const buffer = fs.readFileSync(filePath);
const numBytes = buffer.byteLength;
let sequences = [];
let seq = [];
let index = 0;
while (index < numBytes) {
const value = buffer.readInt32LE(index);
if (value === 1) {
// A new sequence has started.
if (index > 0) {
sequences.push(seq);
}
seq = [];
} else {
// Sequence continues.
seq.push(value >= numWords ? OOV_INDEX : value);
}
index += 4;
}
if (seq.length > 0) {
sequences.push(seq);
}
// Get some sequence length stats.
let minLength = Infinity;
let maxLength = -Infinity;
sequences.forEach(seq => {
const length = seq.length;
if (length < minLength) {
minLength = length;
}
if (length > maxLength) {
maxLength = length;
}
});
console.log(`Sequence length: min = ${minLength}; max = ${maxLength}`);
if (multihot) {
// If requested by the arg, encode the sequences as multi-hot
// vectors.
const buffer = tf.buffer([sequences.length, numWords]);
sequences.forEach((seq, i) => {
seq.forEach(wordIndex => {
if (wordIndex !== OOV_INDEX) {
buffer.set(1, i, wordIndex);
}
});
});
return buffer.toTensor();
} else {
const paddedSequences =
padSequences(sequences, maxLen, 'pre', 'pre');
return tf.tensor2d(
paddedSequences, [paddedSequences.length, maxLen], 'int32');
}
}