void Compute()

in tensorflow_text/core/kernels/wordpiece_kernel.cc [198:286]


  void Compute(OpKernelContext* ctx) override {
    const Tensor* input_values;
    OP_REQUIRES_OK(ctx, ctx->input("input_values", &input_values));
    const auto& values_vec = input_values->flat<tstring>();

    lookup::LookupInterface* lookup_table;
    OP_REQUIRES_OK(ctx,
                   GetLookupTable("vocab_lookup_table", ctx, &lookup_table));
    core::ScopedUnref unref_me(lookup_table);
    LookupTableVocab vocab_map(lookup_table, ctx);

    std::vector<string> subwords;
    std::vector<int> begin_offset;
    std::vector<int> end_offset;
    std::vector<int> row_partition;

    if (row_partition_type_ == ROW_SPLITS) {
      row_partition.push_back(0);
    }

    // Iterate through all the values and wordpiece tokenize them.
    for (int i = 0; i < values_vec.size(); ++i) {
      // Tokenize into subwords and record the offset locations.
      int num_wordpieces = 0;
      OP_REQUIRES_OK(
          ctx, ToStatus(WordpieceTokenize(
                   values_vec(i), max_bytes_per_word_, max_chars_per_token_,
                   suffix_indicator_, use_unknown_token_, unknown_token_,
                   split_unknown_characters_, &vocab_map, &subwords,
                   &begin_offset, &end_offset, &num_wordpieces)));

      // Record the row splits.
      switch (row_partition_type_) {
        case ROW_LENGTHS:
          row_partition.push_back(num_wordpieces);
          break;
        case ROW_SPLITS:
          row_partition.push_back(num_wordpieces + row_partition.back());
          break;
      }
    }

    std::vector<int64> output_subwords_shape;
    output_subwords_shape.push_back(subwords.size());

    std::vector<int64> output_row_partition_shape;
    output_row_partition_shape.push_back(row_partition.size());

    Tensor* output_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("output_values",
                                             TensorShape(output_subwords_shape),
                                             &output_values));
    auto output_values_vec = output_values->vec<tstring>();

    Tensor* output_row_partition;
    OP_REQUIRES_OK(ctx,
                   ctx->allocate_output("output_row_lengths",
                                        TensorShape(output_row_partition_shape),
                                        &output_row_partition));
    auto output_row_partition_vec = output_row_partition->vec<int64>();

    Tensor* start_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values",
                                             TensorShape(output_subwords_shape),
                                             &start_values));
    auto start_values_vec = start_values->vec<int64>();

    Tensor* limit_values;
    OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values",
                                             TensorShape(output_subwords_shape),
                                             &limit_values));
    auto limit_values_vec = limit_values->vec<int64>();

    for (int i = 0; i < subwords.size(); ++i) {
      output_values_vec(i) = subwords[i];
    }

    for (int i = 0; i < row_partition.size(); ++i) {
      output_row_partition_vec(i) = row_partition[i];
    }

    for (int i = 0; i < begin_offset.size(); ++i) {
      start_values_vec(i) = begin_offset[i];
    }

    for (int i = 0; i < end_offset.size(); ++i) {
      limit_values_vec(i) = end_offset[i];
    }
  }