in caffe2_customized_ops/video/customized_video_input_op.h [358:488]
void CustomizedVideoInputOp<Context>::DecodeAndTransform(
const std::string value,
float* clip_data,
int* label_data,
const int crop_size, // -1 is uncrop
const bool mirror,
const float mean,
const float std,
std::mt19937* randgen,
std::bernoulli_distribution* mirror_this_clip,
int* height_out,
int* width_out
) {
float* buffer = nullptr;
// Decode the video from memory or read from a local file
int height_raw = -1;
int width_raw = -1;
int height_scaled = -1;
int width_scaled = -1;
CHECK(GetClipAndLabelFromDBValue(
value, buffer, label_data, randgen, height_raw, width_raw)
);
if ((height_raw <= 0) || (width_raw <= 0)) return;
const int num_clips = 1;
for (int i = 0; i < num_clips; i ++) {
if (use_scale_augmentaiton_) {
const int buffer_sample_size = height_raw * width_raw * length_ * 3;
float* buffer_scaled = nullptr;
ScaleTransform(
buffer + buffer_sample_size * i,
3,
length_,
height_raw,
width_raw,
max_size_,
min_size_,
buffer_scaled,
randgen,
height_scaled,
width_scaled);
// determine the returned output size
if (i == 0) {
if (crop_size > 0) {
*height_out = crop_size;
*width_out = crop_size;
} else {
*height_out = height_scaled;
*width_out = width_scaled;
// avoid extreme size (even if we want "full" image)
const float ratio_max = 1.6f;
const float ratio = (height_scaled > width_scaled) ?
((float)height_scaled / width_scaled) :
((float)width_scaled / height_scaled);
if (ratio > ratio_max) {
if (height_scaled > width_scaled) {
*height_out = (int)(ratio_max * width_scaled);
} else {
*width_out = (int)(ratio_max * height_scaled);
}
// LOG(INFO) << "Truncate image: "
// << "width: " << width_scaled << ", height: " << height_scaled
// << " to width: " << *width_out << ", height: " << *height_out;
} // if ratio > ratio_max
} // else crop_size > 0
} // if i
const int clip_size = *height_out * *width_out * length_ * 3;
int spatial_pos = -1;
if (use_multi_crop_ > 0)
{
// crop along the longer side
TensorProtos protos;
CAFFE_ENFORCE(protos.ParseFromString(value));
const TensorProto& spatial_pos_proto = protos.protos(3);
spatial_pos = spatial_pos_proto.int32_data(0);
}
ClipTransformFlex(
buffer_scaled,
3,
length_,
height_scaled,
width_scaled,
(*height_out),
(*width_out),
mirror,
mean,
std,
clip_data + clip_size * i,
randgen,
mirror_this_clip,
is_test_,
use_bgr_,
spatial_pos
);
if (buffer_scaled != nullptr)
delete[] buffer_scaled;
} else {
LOG(FATAL) << "We don't recommend using unrestricted input size, "
<< "as it is heavily dependent on dataset preparation.";
// const int buffer_sample_size = height * width * length_ * 3;
// it will caused problem is the side < crop_size
// ClipTransform(
// buffer + buffer_sample_size * i,
// 3,
// length_,
// height,
// width,
// crop_size + ,
// mirror,
// mean,
// std,
// clip_data + clip_size * i,
// randgen,
// mirror_this_clip,
// is_test_);
} // else
} // i
delete[] buffer;
}