in tensorflow_text/core/kernels/normalize_kernels.cc [222:289]
void Compute(tensorflow::OpKernelContext* context) override {
const tensorflow::Tensor* input_tensor;
OP_REQUIRES_OK(context, context->input("input", &input_tensor));
const auto& input_vec = input_tensor->flat<tstring>();
tensorflow::Tensor* output_tensor;
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor->shape(),
&output_tensor));
tensorflow::Tensor* output_offsets_map_tensor;
OP_REQUIRES_OK(context,
context->allocate_output(1, input_tensor->shape(),
&output_offsets_map_tensor));
auto output_vec = output_tensor->flat<tstring>();
auto output_offsets_map_vec = output_offsets_map_tensor->flat<Variant>();
icu::ErrorCode icu_error;
const icu::Normalizer2* normalizer = nullptr;
if (normalization_form_ == "NFKC") {
normalizer = icu::Normalizer2::getNFKCInstance(icu_error);
OP_REQUIRES(context, icu_error.isSuccess(),
errors::Internal(absl::StrCat(
icu_error.errorName(),
": Could not retrieve ICU NFKC normalizer")));
} else if (normalization_form_ == "NFC") {
normalizer = icu::Normalizer2::getNFCInstance(icu_error);
OP_REQUIRES(context, icu_error.isSuccess(),
errors::Internal(
absl::StrCat(icu_error.errorName(),
": Could not retrieve ICU NFC normalizer")));
} else if (normalization_form_ == "NFD") {
normalizer = icu::Normalizer2::getNFDInstance(icu_error);
OP_REQUIRES(context, icu_error.isSuccess(),
errors::Internal(
absl::StrCat(icu_error.errorName(),
": Could not retrieve ICU NFD normalizer")));
} else if (normalization_form_ == "NFKD") {
normalizer = icu::Normalizer2::getNFKDInstance(icu_error);
OP_REQUIRES(context, icu_error.isSuccess(),
errors::Internal(absl::StrCat(
icu_error.errorName(),
": Could not retrieve ICU NFKD normalizer")));
} else {
OP_REQUIRES(context, false,
errors::InvalidArgument(absl::StrCat(
"Offset not supported for this normalization form: ",
normalization_form_)));
}
for (int64 i = 0; i < input_vec.size(); ++i) {
OffsetMapVariant variant;
string output_text;
icu::Edits edits;
icu::StringByteSink<string> byte_sink(&output_text);
const auto& input = input_vec(i);
normalizer->normalizeUTF8(0, icu::StringPiece(input.data(), input.size()),
byte_sink, &edits, icu_error);
OP_REQUIRES(
context, icu_error.isSuccess(),
errors::Internal(absl::StrCat(icu_error.errorName(),
": Could not normalize input string: ",
absl::string_view(input_vec(i)))));
output_vec(i) = output_text;
variant.edits_ = std::move(edits);
output_offsets_map_vec(i) = variant;
}
}