in tensorflow_text/core/kernels/regex_split_kernels.cc [34:110]
void Compute(tensorflow::OpKernelContext* ctx) override {
bool should_keep_delim;
std::shared_ptr<RE2> delim_re;
std::shared_ptr<RE2> keep_delim_re;
GetRegexFromInput(ctx, &delim_re, &keep_delim_re);
should_keep_delim = keep_delim_re->pattern().empty() ? false : true;
const Tensor* input_tensor;
OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor));
const auto& input_flat = input_tensor->flat<tstring>();
std::vector<int64> begin_offsets;
std::vector<int64> end_offsets;
std::vector<absl::string_view> tokens;
std::vector<int64> row_splits;
row_splits.push_back(0);
for (size_t i = 0; i < input_flat.size(); ++i) {
RegexSplit(absl::string_view(input_flat(i).data()), *delim_re,
should_keep_delim, *keep_delim_re, &tokens, &begin_offsets,
&end_offsets);
row_splits.push_back(begin_offsets.size());
}
// Emit the flat Tensors needed to construct RaggedTensors for tokens,
// start, end offsets.
std::vector<int64> tokens_shape;
tokens_shape.push_back(tokens.size());
std::vector<int64> offsets_shape;
offsets_shape.push_back(begin_offsets.size());
std::vector<int64> row_splits_shape;
row_splits_shape.push_back(row_splits.size());
Tensor* output_tokens_tensor = nullptr;
OP_REQUIRES_OK(ctx,
ctx->allocate_output("tokens", TensorShape(tokens_shape),
&output_tokens_tensor));
auto output_tokens = output_tokens_tensor->flat<tstring>();
Tensor* output_begin_offsets_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("begin_offsets", TensorShape(offsets_shape),
&output_begin_offsets_tensor));
auto output_begin_offsets = output_begin_offsets_tensor->flat<int64>();
Tensor* output_end_offsets_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("end_offsets", TensorShape(offsets_shape),
&output_end_offsets_tensor));
auto output_end_offsets = output_end_offsets_tensor->flat<int64>();
Tensor* output_row_splits_tensor = nullptr;
OP_REQUIRES_OK(
ctx, ctx->allocate_output("row_splits", TensorShape(row_splits_shape),
&output_row_splits_tensor));
auto output_row_splits = output_row_splits_tensor->flat<int64>();
// Copy outputs to Tensors.
for (size_t i = 0; i < tokens.size(); ++i) {
const auto& token = tokens[i];
output_tokens(i) = tstring(token.data(), token.length());
}
for (size_t i = 0; i < begin_offsets.size(); ++i) {
output_begin_offsets(i) = begin_offsets[i];
}
for (size_t i = 0; i < end_offsets.size(); ++i) {
output_end_offsets(i) = end_offsets[i];
}
for (size_t i = 0; i < row_splits.size(); ++i) {
output_row_splits(i) = row_splits[i];
}
}