void merge_octrees()

in caffe/src/caffe/util/octree.cpp [601:800]


void merge_octrees(Blob<Dtype>& octree_output, const vector<vector<char> >& octree_buffer) {
  /// parse the input octrees
  int batch_size = octree_buffer.size();
  vector<OctreeParser> octree_parsers(batch_size);
  for (int i = 0; i < batch_size; ++i) {
    octree_parsers[i].set_cpu(octree_buffer[i].data());
  }

  // get depth and full_layer information
  string err_msg;
  const int depth = octree_parsers[0].info().depth();
  const int full_layer = octree_parsers[0].info().full_layer();
  bool valid = octree_parsers[0].info().check_format(err_msg);
  CHECK(valid) << err_msg;
  for (int i = 1; i < batch_size; ++i) {
    valid = octree_parsers[i].info().check_format(err_msg);
    CHECK(valid) << err_msg;
    CHECK(octree_parsers[0].info().is_consistent(octree_parsers[i].info()))
        << "The formats of input octrees are not consistent, check the database";
  }

  /// get the node number information
  // node and non-empty node number in each octree
  int sz = (depth + 1) * batch_size;
  vector<int> nnum(sz), nnum_nempty(sz);
  for (int i = 0; i < batch_size; ++i) {
    for (int d = 0; d < depth + 1; ++d) {
      int p = i * (depth + 1) + d;
      nnum[p] = octree_parsers[i].info().node_num(d);
      nnum_nempty[p] = octree_parsers[i].info().node_num_nempty(d);
    }
  }

  // cumulative node and non-empty node number in each layers
  sz = (depth + 1) * (batch_size + 1);
  vector<int> nnum_cum_layer(sz), nnum_cum_nempty_layer(sz);
  for (int d = 0; d < depth + 1; ++d) {
    nnum_cum_layer[d] = 0;
    nnum_cum_nempty_layer[d] = 0;
    for (int i = 0; i < batch_size; ++i) {
      int p = i * (depth + 1) + d;
      int q = p + depth + 1;
      nnum_cum_layer[q] = nnum[p] + nnum_cum_layer[p];
      nnum_cum_nempty_layer[q] = nnum_nempty[p] + nnum_cum_nempty_layer[p];
    }
  }

  // cumulative node number for each octree
  sz = (depth + 1) * batch_size;
  vector<int> nnum_cum_octree(sz);
  for (int i = 0; i < batch_size; ++i) {
    nnum_cum_octree[i * (depth + 1)] = 0;
    for (int d = 0; d < depth; ++d) {
      int p = i * (depth + 1) + d;
      nnum_cum_octree[p + 1] = nnum_cum_octree[p] + nnum[p];
    }
  }

  // node and non-empty node number of the batch
  vector<int> nnum_batch(depth + 1), nnum_nempty_batch(depth + 1);
  for (int d = 0; d < depth + 1; ++d) {
    int p = batch_size * (depth + 1) + d;
    nnum_batch[d] = nnum_cum_layer[p];
    nnum_nempty_batch[d] = nnum_cum_nempty_layer[p];
  }

  // cumulative node number of the batch
  vector<int> nnum_cum_batch(depth + 2);
  nnum_cum_batch[0] = 0;
  for (int d = 0; d < depth + 1; ++d) {
    nnum_cum_batch[d + 1] = nnum_cum_batch[d] + nnum_batch[d];
  }


  /// set the octinfo
  OctreeInfo info_batch = octree_parsers[0].info();
  info_batch.set_batch_size(batch_size);
  // add the neighbor property
  const int kNeighChannel = 8;
  info_batch.set_property(OctreeInfo::kNeigh, kNeighChannel, -1);
  // update nodenumber
  info_batch.set_nnum(nnum_batch.data());
  info_batch.set_nempty(nnum_nempty_batch.data());
  info_batch.set_nnum_cum();
  info_batch.set_ptr_dis();
  valid = info_batch.check_format(err_msg);
  CHECK(valid) << err_msg;

  /// reshape the blobs
  sz = info_batch.sizeof_octree() / sizeof(Dtype) + 1;
  octree_output.Reshape(vector<int> {sz});
  OctreeParser octbatch_parser;
  octbatch_parser.set_cpu(octree_output.mutable_cpu_data(), &info_batch);


  /// set data
  // If the OpenMP is available, the code can be more concise.
  //omp_set_num_threads(8);
  //#pragma omp parallel for
  //for (int i = 0; i < batch_size; ++i) {
  auto worker = [&](int thread_id, int thread_num) {
    for (int i = thread_id; i < batch_size; i += thread_num) {
      // copy key
      // TODO: optimize! TODO: IF DEPTH > 8
      // the channel and location of key is 1 and -1 (todo: !!! channel 2 for deeper key)
      for (int d = 0; d < depth + 1; ++d) {
        if (!info_batch.has_property(OctreeInfo::kKey)) break;
        int p = i * (depth + 1) + d;
        unsigned int* des = octbatch_parser.mutable_key_cpu(d) + nnum_cum_layer[p];
        const unsigned int* src = octree_parsers[i].key_cpu(d);
        for (int j = 0; j < nnum[p]; ++j) {
          des[j] = src[j];
          // !!! todo: deal with octree depth > 8
          unsigned char* ptr = reinterpret_cast<unsigned char*>(des + j);
          ptr[3] = i;
        }
      }

      // copy children
      // by default, the channel and location of children is 1 and -1,
      for (int d = 0; d < depth + 1; ++d) {
        if (!info_batch.has_property(OctreeInfo::kChild)) break;
        int p = i * (depth + 1) + d;
        int* des = octbatch_parser.mutable_children_cpu(d) + nnum_cum_layer[p];
        const int* src = octree_parsers[i].children_cpu(d);
        for (int j = 0; j < nnum[p]; ++j) {
          des[j] = -1 == src[j] ? src[j] : src[j] + nnum_cum_nempty_layer[p];
        }
      }

      // copy data: !NOTE! the type of signal is float!!!
      int feature_channel = info_batch.channel(OctreeInfo::kFeature);
      int feature_location = info_batch.locations(OctreeInfo::kFeature);
      int depth_start = feature_location == depth ? depth : 0;
      for (int d = depth_start; d < depth + 1; ++d) {
        if (!info_batch.has_property(OctreeInfo::kFeature)) break;
        int p = i * (depth + 1) + d;
        for (int c = 0; c < feature_channel; c++) {
          float* des = octbatch_parser.mutable_feature_cpu(d) + c * nnum_batch[d] + nnum_cum_layer[p];
          const float* src = octree_parsers[i].feature_cpu(d) + c * nnum[p];
          for (int j = 0; j < nnum[p]; ++j) { des[j] = src[j]; }
        }
      }

      // copy label: !NOTE! the type of label is float!!!
      int label_location = info_batch.locations(OctreeInfo::kLabel);
      depth_start = label_location == depth ? depth : 0;
      for (int d = depth_start; d < depth + 1; ++d) {
        if (!info_batch.has_property(OctreeInfo::kLabel)) break;
        int p = i * (depth + 1) + d;
        float* des = octbatch_parser.mutable_label_cpu(d) + nnum_cum_layer[p];
        const float* src = octree_parsers[i].label_cpu(d);
        for (int j = 0; j < nnum[p]; ++j) { des[j] = src[j]; }
      }

      // copy split label: !NOTE! the type of label is float!!!
      int split_location = info_batch.locations(OctreeInfo::kSplit);
      depth_start = split_location == depth ? depth : 0;
      for (int d = depth_start; d < depth + 1; ++d) {
        if (!info_batch.has_property(OctreeInfo::kSplit)) break;
        int p = i * (depth + 1) + d;
        float* des = octbatch_parser.mutable_split_cpu(d) + nnum_cum_layer[p];
        const float* src = octree_parsers[i].split_cpu(d);
        for (int j = 0; j < nnum[p]; ++j) des[j] = src[j];
      }
    }
  };

  int thread_num = 8;
#ifdef _DEBUG
  thread_num = 1;   // for debug only
#endif
  vector<shared_ptr<boost::thread> > workers(thread_num);
  for (int id = 1; id < thread_num; ++id) {
    workers[id].reset(new boost::thread(worker, id, thread_num));
  }
  worker(0, thread_num); // for the master thread
  for (int id = 1; id < thread_num; ++id) {
    workers[id]->join();
  }

  // ==== v2 ====
  // calc and set neighbor info
  for (int d = 1; d < depth + 1; ++d) {
    if (!info_batch.has_property(OctreeInfo::kNeigh)) break;
    CHECK(info_batch.has_property(OctreeInfo::kChild));

    if (d <= full_layer) {
      octree::calc_neigh_cpu(
          octbatch_parser.mutable_neighbor_cpu(d),
          d, batch_size);
    } else {
      octree::calc_neigh_cpu(
          octbatch_parser.mutable_neighbor_cpu(d),
          octbatch_parser.neighbor_cpu(d - 1),
          octbatch_parser.children_cpu(d - 1),
          octbatch_parser.info().node_num(d - 1));
    }
  }
}