def main()

in sample.py [0:0]


def main(args):
	transform = transforms.Compose([
		transforms.Resize(args.crop_size),
		transforms.ToTensor(),
		transforms.Normalize((0.485, 0.456, 0.406),
			(0.229, 0.224, 0.225))])

	with open(args.vocab_path, 'rb') as f:
		vocab = pickle.load(f)

	upp_size, low_size = vocab.get_shapes()
	start = time.time()
	encoder = EncoderCNN(args.embed_size).eval()

	if args.upp:
		decoder = DecoderRNN(args.embed_size, args.hidden_size, upp_size+1, args.num_layers)
	elif args.low:
		decoder = DecoderRNN(args.embed_size, args.hidden_size, low_size+1, args.num_layers)
	else:
		print('Please specify upper/lower body model to test')
		exit(0)

	decoder.train(False)
	encoder = encoder.to(device)
	decoder = decoder.to(device)

	encoder.load_state_dict(torch.load(args.encoder_path))
	decoder.load_state_dict(torch.load(args.decoder_path))

	video = load_video(args.image_dir, args.seq_length, transform)
	video_tensor = video.to(device)
	feature = encoder(video_tensor)
	homography = load_homography(args.image_dir, args.h_dir, args.seq_length)
	openpose = load_openpose(args.image_dir, args.openpose_dir, args.seq_length)
	sampled_ids = decoder.sample(feature, homography, openpose)

	end = time.time()
	print "duration", (end-start)

	sampled_ids = sampled_ids[0].cpu().numpy()
	sampled_poses = []
	for pose_id in sampled_ids:
		if args.upp:
			pose = vocab.upp_poses[pose_id-1]
		elif args.low:
			pose = vocab.low_poses[pose_id-1]
		else:
			print('Please specify upper/lower body model to test')
			exit(0)
		sampled_poses.append(pose)

		if args.visualize:
			pose3d = [float(x) for x in pose.split(',')]
			pose3d = np.reshape(pose3d, (-1,3))
			show_upp(pose3d)

	for i in range(0, len(sampled_poses)):
		path = args.output + 'r' + str(i+1) + '.txt'
		with open(path, 'w') as f:
			f.write(sampled_poses[i] + '\n')