void Compute()

in tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc [124:206]


  void Compute(OpKernelContext* ctx) override {
    const Tensor* input_values;
    OP_REQUIRES_OK(ctx, ctx->input("input_values", &input_values));

    const Tensor* labels;
    OP_REQUIRES_OK(ctx, ctx->input("labels", &labels));
    const Tensor* row_splits;
    OP_REQUIRES_OK(ctx, ctx->input("row_splits", &row_splits));
    OP_REQUIRES(ctx, input_values->dim_size(0) == row_splits->dim_size(0) - 1,
                errors::InvalidArgument("Expecting row_splits have ",
                                        input_values->dim_size(0) + 1,
                                        " elements, got ",
                                        row_splits->dim_size(0)));

    std::vector<string> tokens;
    std::vector<int> begin_offset;
    std::vector<int> end_offset;
    std::vector<int> output_row_splits(1, 0);

    // Iterate through all the values and tokenize them.
    const auto& values_vec = input_values->flat<tstring>();
    const auto& row_splits_vec = row_splits->flat<int32>();
    for (int i = 0; i < values_vec.size(); ++i) {
      // Tokenize into tokens and record the offset locations.
      int num_tokens = 0;
      OP_REQUIRES_OK(
          ctx, TokenizeByLabel(
                   values_vec(i),
                   labels->Slice(row_splits_vec(i), row_splits_vec(i + 1)),
                   force_split_at_break_character_, &tokens, &begin_offset,
                   &end_offset, &num_tokens));

      // Record the row splits.
      output_row_splits.push_back(num_tokens + output_row_splits.back());
    }

    std::vector<int64> output_tokens_shape;
    output_tokens_shape.push_back(tokens.size());

    std::vector<int64> output_row_splits_shape;
    output_row_splits_shape.push_back(output_row_splits.size());

    Tensor* output_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("output_values",
                                             TensorShape(output_tokens_shape),
                                             &output_values));
    auto output_values_vec = output_values->vec<tstring>();

    Tensor* output_row_splits_tensor;
    OP_REQUIRES_OK(ctx,
                   ctx->allocate_output("output_row_splits",
                                        TensorShape(output_row_splits_shape),
                                        &output_row_splits_tensor));
    auto output_row_splits_vec = output_row_splits_tensor->vec<int64>();

    Tensor* start_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values",
                                             TensorShape(output_tokens_shape),
                                             &start_values));
    auto start_values_vec = start_values->vec<int64>();

    Tensor* limit_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values",
                                             TensorShape(output_tokens_shape),
                                             &limit_values));
    auto limit_values_vec = limit_values->vec<int64>();

    for (int i = 0; i < tokens.size(); ++i) {
      output_values_vec(i) = tokens[i];
    }

    for (int i = 0; i < output_row_splits.size(); ++i) {
      output_row_splits_vec(i) = output_row_splits[i];
    }

    for (int i = 0; i < begin_offset.size(); ++i) {
      start_values_vec(i) = begin_offset[i];
    }

    for (int i = 0; i < end_offset.size(); ++i) {
      limit_values_vec(i) = end_offset[i];
    }
  }