in caffe2_customized_ops/video/customized_video_io.cc [501:572]
void ClipTransformFlex(
const float* clip_data,
const int channels,
const int length,
const int height,
const int width,
const int h_crop,
const int w_crop,
const bool mirror,
float mean,
float std,
float* transformed_clip,
std::mt19937* randgen,
std::bernoulli_distribution* mirror_this_clip,
const bool use_center_crop,
const bool use_bgr,
const int spatial_pos
) {
int h_off = 0;
int w_off = 0;
assert(height >= h_crop);
assert(width >= w_crop);
if (use_center_crop) {
h_off = (height - h_crop) / 2;
w_off = (width - w_crop) / 2;
if (spatial_pos >= 0)
{
int now_pos = spatial_pos % 3;
if (h_off > 0) h_off = h_off * now_pos;
else w_off = w_off * now_pos;
}
} else {
h_off = std::uniform_int_distribution<>(0, height - h_crop)(*randgen);
w_off = std::uniform_int_distribution<>(0, width - w_crop)(*randgen);
}
float inv_std = 1.f / std;
int top_index, data_index;
bool mirror_me = mirror && (*mirror_this_clip)(*randgen);
if (spatial_pos >= 0)
{
mirror_me = int(spatial_pos / 3);
}
for (int c = 0; c < channels; ++c) {
for (int l = 0; l < length; ++l) {
for (int h = 0; h < h_crop; ++h) {
for (int w = 0; w < w_crop; ++w) {
if (!use_bgr) { // rgb as is
data_index =
((c * length + l) * height + h_off + h) * width + w_off + w;
} else {
data_index =
(((channels - c - 1) * length + l) * height + h_off + h) * width
+ w_off + w;
}
if (mirror_me) {
top_index = ((c * length + l) * h_crop + h) * w_crop +
(w_crop - 1 - w);
} else {
top_index = ((c * length + l) * h_crop + h) * w_crop + w;
}
transformed_clip[top_index] =
(clip_data[data_index] - mean) * inv_std;
}
}
}
}
}