in lama/get_contextual_embeddings.py [0:0]
def main(args):
sentences = [
["the cat is on the table ."], # single-sentence instance
["the dog is sleeping on the sofa .", "he makes happy noises ."], # two-sentence
]
print("Language Models: {}".format(args.models_names))
models = {}
for lm in args.models_names:
models[lm] = build_model_by_name(lm, args)
for model_name, model in models.items():
print("\n{}:".format(model_name))
if args.cuda:
model.try_cuda()
contextual_embeddings, sentence_lengths, tokenized_text_list = model.get_contextual_embeddings(
sentences)
# contextual_embeddings is a list of tensors, one tensor for each layer.
# Each element contains one layer of the representations with shape
# (x, y, z).
# x - the batch size
# y - the sequence length of the batch
# z - the length of each layer vector
print(f'Number of layers: {len(contextual_embeddings)}')
for layer_id, layer in enumerate(contextual_embeddings):
print(f'Layer {layer_id} has shape: {layer.shape}')
print("sentence_lengths: {}".format(sentence_lengths))
print("tokenized_text_list: {}".format(tokenized_text_list))