in caffe/src/caffe/util/octree.cpp [601:800]
void merge_octrees(Blob<Dtype>& octree_output, const vector<vector<char> >& octree_buffer) {
/// parse the input octrees
int batch_size = octree_buffer.size();
vector<OctreeParser> octree_parsers(batch_size);
for (int i = 0; i < batch_size; ++i) {
octree_parsers[i].set_cpu(octree_buffer[i].data());
}
// get depth and full_layer information
string err_msg;
const int depth = octree_parsers[0].info().depth();
const int full_layer = octree_parsers[0].info().full_layer();
bool valid = octree_parsers[0].info().check_format(err_msg);
CHECK(valid) << err_msg;
for (int i = 1; i < batch_size; ++i) {
valid = octree_parsers[i].info().check_format(err_msg);
CHECK(valid) << err_msg;
CHECK(octree_parsers[0].info().is_consistent(octree_parsers[i].info()))
<< "The formats of input octrees are not consistent, check the database";
}
/// get the node number information
// node and non-empty node number in each octree
int sz = (depth + 1) * batch_size;
vector<int> nnum(sz), nnum_nempty(sz);
for (int i = 0; i < batch_size; ++i) {
for (int d = 0; d < depth + 1; ++d) {
int p = i * (depth + 1) + d;
nnum[p] = octree_parsers[i].info().node_num(d);
nnum_nempty[p] = octree_parsers[i].info().node_num_nempty(d);
}
}
// cumulative node and non-empty node number in each layers
sz = (depth + 1) * (batch_size + 1);
vector<int> nnum_cum_layer(sz), nnum_cum_nempty_layer(sz);
for (int d = 0; d < depth + 1; ++d) {
nnum_cum_layer[d] = 0;
nnum_cum_nempty_layer[d] = 0;
for (int i = 0; i < batch_size; ++i) {
int p = i * (depth + 1) + d;
int q = p + depth + 1;
nnum_cum_layer[q] = nnum[p] + nnum_cum_layer[p];
nnum_cum_nempty_layer[q] = nnum_nempty[p] + nnum_cum_nempty_layer[p];
}
}
// cumulative node number for each octree
sz = (depth + 1) * batch_size;
vector<int> nnum_cum_octree(sz);
for (int i = 0; i < batch_size; ++i) {
nnum_cum_octree[i * (depth + 1)] = 0;
for (int d = 0; d < depth; ++d) {
int p = i * (depth + 1) + d;
nnum_cum_octree[p + 1] = nnum_cum_octree[p] + nnum[p];
}
}
// node and non-empty node number of the batch
vector<int> nnum_batch(depth + 1), nnum_nempty_batch(depth + 1);
for (int d = 0; d < depth + 1; ++d) {
int p = batch_size * (depth + 1) + d;
nnum_batch[d] = nnum_cum_layer[p];
nnum_nempty_batch[d] = nnum_cum_nempty_layer[p];
}
// cumulative node number of the batch
vector<int> nnum_cum_batch(depth + 2);
nnum_cum_batch[0] = 0;
for (int d = 0; d < depth + 1; ++d) {
nnum_cum_batch[d + 1] = nnum_cum_batch[d] + nnum_batch[d];
}
/// set the octinfo
OctreeInfo info_batch = octree_parsers[0].info();
info_batch.set_batch_size(batch_size);
// add the neighbor property
const int kNeighChannel = 8;
info_batch.set_property(OctreeInfo::kNeigh, kNeighChannel, -1);
// update nodenumber
info_batch.set_nnum(nnum_batch.data());
info_batch.set_nempty(nnum_nempty_batch.data());
info_batch.set_nnum_cum();
info_batch.set_ptr_dis();
valid = info_batch.check_format(err_msg);
CHECK(valid) << err_msg;
/// reshape the blobs
sz = info_batch.sizeof_octree() / sizeof(Dtype) + 1;
octree_output.Reshape(vector<int> {sz});
OctreeParser octbatch_parser;
octbatch_parser.set_cpu(octree_output.mutable_cpu_data(), &info_batch);
/// set data
// If the OpenMP is available, the code can be more concise.
//omp_set_num_threads(8);
//#pragma omp parallel for
//for (int i = 0; i < batch_size; ++i) {
auto worker = [&](int thread_id, int thread_num) {
for (int i = thread_id; i < batch_size; i += thread_num) {
// copy key
// TODO: optimize! TODO: IF DEPTH > 8
// the channel and location of key is 1 and -1 (todo: !!! channel 2 for deeper key)
for (int d = 0; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kKey)) break;
int p = i * (depth + 1) + d;
unsigned int* des = octbatch_parser.mutable_key_cpu(d) + nnum_cum_layer[p];
const unsigned int* src = octree_parsers[i].key_cpu(d);
for (int j = 0; j < nnum[p]; ++j) {
des[j] = src[j];
// !!! todo: deal with octree depth > 8
unsigned char* ptr = reinterpret_cast<unsigned char*>(des + j);
ptr[3] = i;
}
}
// copy children
// by default, the channel and location of children is 1 and -1,
for (int d = 0; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kChild)) break;
int p = i * (depth + 1) + d;
int* des = octbatch_parser.mutable_children_cpu(d) + nnum_cum_layer[p];
const int* src = octree_parsers[i].children_cpu(d);
for (int j = 0; j < nnum[p]; ++j) {
des[j] = -1 == src[j] ? src[j] : src[j] + nnum_cum_nempty_layer[p];
}
}
// copy data: !NOTE! the type of signal is float!!!
int feature_channel = info_batch.channel(OctreeInfo::kFeature);
int feature_location = info_batch.locations(OctreeInfo::kFeature);
int depth_start = feature_location == depth ? depth : 0;
for (int d = depth_start; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kFeature)) break;
int p = i * (depth + 1) + d;
for (int c = 0; c < feature_channel; c++) {
float* des = octbatch_parser.mutable_feature_cpu(d) + c * nnum_batch[d] + nnum_cum_layer[p];
const float* src = octree_parsers[i].feature_cpu(d) + c * nnum[p];
for (int j = 0; j < nnum[p]; ++j) { des[j] = src[j]; }
}
}
// copy label: !NOTE! the type of label is float!!!
int label_location = info_batch.locations(OctreeInfo::kLabel);
depth_start = label_location == depth ? depth : 0;
for (int d = depth_start; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kLabel)) break;
int p = i * (depth + 1) + d;
float* des = octbatch_parser.mutable_label_cpu(d) + nnum_cum_layer[p];
const float* src = octree_parsers[i].label_cpu(d);
for (int j = 0; j < nnum[p]; ++j) { des[j] = src[j]; }
}
// copy split label: !NOTE! the type of label is float!!!
int split_location = info_batch.locations(OctreeInfo::kSplit);
depth_start = split_location == depth ? depth : 0;
for (int d = depth_start; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kSplit)) break;
int p = i * (depth + 1) + d;
float* des = octbatch_parser.mutable_split_cpu(d) + nnum_cum_layer[p];
const float* src = octree_parsers[i].split_cpu(d);
for (int j = 0; j < nnum[p]; ++j) des[j] = src[j];
}
}
};
int thread_num = 8;
#ifdef _DEBUG
thread_num = 1; // for debug only
#endif
vector<shared_ptr<boost::thread> > workers(thread_num);
for (int id = 1; id < thread_num; ++id) {
workers[id].reset(new boost::thread(worker, id, thread_num));
}
worker(0, thread_num); // for the master thread
for (int id = 1; id < thread_num; ++id) {
workers[id]->join();
}
// ==== v2 ====
// calc and set neighbor info
for (int d = 1; d < depth + 1; ++d) {
if (!info_batch.has_property(OctreeInfo::kNeigh)) break;
CHECK(info_batch.has_property(OctreeInfo::kChild));
if (d <= full_layer) {
octree::calc_neigh_cpu(
octbatch_parser.mutable_neighbor_cpu(d),
d, batch_size);
} else {
octree::calc_neigh_cpu(
octbatch_parser.mutable_neighbor_cpu(d),
octbatch_parser.neighbor_cpu(d - 1),
octbatch_parser.children_cpu(d - 1),
octbatch_parser.info().node_num(d - 1));
}
}
}