in sample.py [0:0]
def main(args):
transform = transforms.Compose([
transforms.Resize(args.crop_size),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
with open(args.vocab_path, 'rb') as f:
vocab = pickle.load(f)
upp_size, low_size = vocab.get_shapes()
start = time.time()
encoder = EncoderCNN(args.embed_size).eval()
if args.upp:
decoder = DecoderRNN(args.embed_size, args.hidden_size, upp_size+1, args.num_layers)
elif args.low:
decoder = DecoderRNN(args.embed_size, args.hidden_size, low_size+1, args.num_layers)
else:
print('Please specify upper/lower body model to test')
exit(0)
decoder.train(False)
encoder = encoder.to(device)
decoder = decoder.to(device)
encoder.load_state_dict(torch.load(args.encoder_path))
decoder.load_state_dict(torch.load(args.decoder_path))
video = load_video(args.image_dir, args.seq_length, transform)
video_tensor = video.to(device)
feature = encoder(video_tensor)
homography = load_homography(args.image_dir, args.h_dir, args.seq_length)
openpose = load_openpose(args.image_dir, args.openpose_dir, args.seq_length)
sampled_ids = decoder.sample(feature, homography, openpose)
end = time.time()
print "duration", (end-start)
sampled_ids = sampled_ids[0].cpu().numpy()
sampled_poses = []
for pose_id in sampled_ids:
if args.upp:
pose = vocab.upp_poses[pose_id-1]
elif args.low:
pose = vocab.low_poses[pose_id-1]
else:
print('Please specify upper/lower body model to test')
exit(0)
sampled_poses.append(pose)
if args.visualize:
pose3d = [float(x) for x in pose.split(',')]
pose3d = np.reshape(pose3d, (-1,3))
show_upp(pose3d)
for i in range(0, len(sampled_poses)):
path = args.output + 'r' + str(i+1) + '.txt'
with open(path, 'w') as f:
f.write(sampled_poses[i] + '\n')