in utils.py [0:0]
def update_buffers(model, sample_bytes, shas, labels, seqlen, nlen, rpbuffer, det_file):
full_yps = np.zeros(len(sample_bytes))
full_labels = np.zeros(len(sample_bytes))
for idx, (s, sha, l) in enumerate(zip(sample_bytes, shas, labels)):
# chunk the sample up into seqlen-byte chunks.
offset_idxs = list(range(int(np.ceil(len(s) / seqlen))))
seqs = [s[o*seqlen:(o+1)*seqlen] for o in offset_idxs]
# align last seq with end of file so we don't have a bunch of blocks
# with empty stuff at the end
if len(seqs) > 1:
seqs[-1] = s[-seqlen:]
# init storage for max score of each chunk
yps = np.zeros(len(seqs))
# keep track of max output and sig associated with output
max_yp = None
max_sig = b""
# iterate through the chunks
for b in range(len(seqs)):
# if the chunk is smaller than the convolutional receptive field, we won't
# get any scores. skip it.
if len(seqs[b]) < nlen:
continue
seq = seqs[b]
# does this do anything for memory/speed? maybe??
with torch.no_grad():
# feed forward the sample
ngrams = torch.from_numpy(unpackbits([seq], seqlen))
yp = model.forward(ngrams)
yp = yp.detach().cpu().numpy().squeeze()[:len(seq)-nlen+1]
# get the sig and max score for the chunk
max_sig_idx = np.argmax(yp)
yp = yp[max_sig_idx]
# update the max score/sig
if max_yp is None or yp > max_yp:
max_yp = yp
max_sig = seq[max_sig_idx:max_sig_idx+nlen]
# fill in the max score for the chunk
yps[b] = yp
# final score of the sample: max score over all the whole sample.
full_yps[idx] = np.max(yps)
full_labels[idx] = l
# add whole sample to replay buffer
rpbuffer.add_block(sha, seqs, l, yps)
# write out information to the detections file
det_file.write("%d %s %0.3f %s %s\n" % (l, sha, max_yp, max_sig.hex(), str(max_sig)))
return full_yps, full_labels