std::shared_ptr make_compute_options()

in r/src/compute.cpp [129:583]


std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
    std::string func_name, cpp11::list options) {
  if (func_name == "filter") {
    using Options = arrow::compute::FilterOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    SEXP keep_na = options["keep_na"];
    if (!Rf_isNull(keep_na) && cpp11::as_cpp<bool>(keep_na)) {
      out->null_selection_behavior = Options::EMIT_NULL;
    }
    return out;
  }

  if (func_name == "take") {
    using Options = arrow::compute::TakeOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    return out;
  }

  if (func_name == "array_sort_indices") {
    using Order = arrow::compute::SortOrder;
    using Options = arrow::compute::ArraySortOptions;
    // false means descending, true means ascending
    auto order = cpp11::as_cpp<bool>(options["order"]);
    auto out =
        std::make_shared<Options>(Options(order ? Order::Descending : Order::Ascending));
    return out;
  }

  if (func_name == "sort_indices") {
    using Key = arrow::compute::SortKey;
    using Order = arrow::compute::SortOrder;
    using Options = arrow::compute::SortOptions;
    auto names = cpp11::as_cpp<std::vector<std::string>>(options["names"]);
    // false means descending, true means ascending
    // cpp11 does not support bool here so use int
    auto orders = cpp11::as_cpp<std::vector<int>>(options["orders"]);
    std::vector<Key> keys;
    for (size_t i = 0; i < names.size(); i++) {
      keys.push_back(
          Key(names[i], (orders[i] > 0) ? Order::Descending : Order::Ascending));
    }
    auto out = std::make_shared<Options>(Options(keys));
    return out;
  }

  if (func_name == "all" || func_name == "hash_all" || func_name == "any" ||
      func_name == "hash_any" || func_name == "approximate_median" ||
      func_name == "hash_approximate_median" || func_name == "mean" ||
      func_name == "hash_mean" || func_name == "min_max" || func_name == "hash_min_max" ||
      func_name == "min" || func_name == "hash_min" || func_name == "max" ||
      func_name == "hash_max" || func_name == "sum" || func_name == "hash_sum" ||
      func_name == "product" || func_name == "hash_product") {
    using Options = arrow::compute::ScalarAggregateOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["min_count"])) {
      out->min_count = cpp11::as_cpp<int>(options["min_count"]);
    }
    if (!Rf_isNull(options["skip_nulls"])) {
      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return out;
  }

  if (func_name == "tdigest" || func_name == "hash_tdigest") {
    using Options = arrow::compute::TDigestOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["q"])) {
      out->q = cpp11::as_cpp<std::vector<double>>(options["q"]);
    }
    if (!Rf_isNull(options["skip_nulls"])) {
      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return out;
  }

  if (func_name == "count") {
    using Options = arrow::compute::CountOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    out->mode =
        cpp11::as_cpp<bool>(options["na.rm"]) ? Options::ONLY_VALID : Options::ONLY_NULL;
    return out;
  }

  if (func_name == "count_distinct" || func_name == "hash_count_distinct") {
    using Options = arrow::compute::CountOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    out->mode =
        cpp11::as_cpp<bool>(options["na.rm"]) ? Options::ONLY_VALID : Options::ALL;
    return out;
  }

  if (func_name == "min_element_wise" || func_name == "max_element_wise") {
    using Options = arrow::compute::ElementWiseAggregateOptions;
    bool skip_nulls = true;
    if (!Rf_isNull(options["skip_nulls"])) {
      skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return std::make_shared<Options>(skip_nulls);
  }

  if (func_name == "quantile") {
    using Options = arrow::compute::QuantileOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    SEXP q = options["q"];
    if (!Rf_isNull(q) && TYPEOF(q) == REALSXP) {
      out->q = cpp11::as_cpp<std::vector<double>>(q);
    }
    SEXP interpolation = options["interpolation"];
    if (!Rf_isNull(interpolation) && TYPEOF(interpolation) == INTSXP &&
        XLENGTH(interpolation) == 1) {
      out->interpolation =
          cpp11::as_cpp<enum arrow::compute::QuantileOptions::Interpolation>(
              interpolation);
    }
    if (!Rf_isNull(options["min_count"])) {
      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
    }
    if (!Rf_isNull(options["skip_nulls"])) {
      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return out;
  }

  if (func_name == "is_in" || func_name == "index_in") {
    using Options = arrow::compute::SetLookupOptions;
    return std::make_shared<Options>(cpp11::as_cpp<arrow::Datum>(options["value_set"]),
                                     cpp11::as_cpp<bool>(options["skip_nulls"]));
  }

  if (func_name == "index") {
    using Options = arrow::compute::IndexOptions;
    return std::make_shared<Options>(
        cpp11::as_cpp<std::shared_ptr<arrow::Scalar>>(options["value"]));
  }

  if (func_name == "is_null") {
    using Options = arrow::compute::NullOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["nan_is_null"])) {
      out->nan_is_null = cpp11::as_cpp<bool>(options["nan_is_null"]);
    }
    return out;
  }

  if (func_name == "dictionary_encode") {
    using Options = arrow::compute::DictionaryEncodeOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["null_encoding_behavior"])) {
      out->null_encoding_behavior = cpp11::as_cpp<
          enum arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>(
          options["null_encoding_behavior"]);
    }
    return out;
  }

  if (func_name == "cast") {
    return make_cast_options(options);
  }

  if (func_name == "binary_join_element_wise") {
    using Options = arrow::compute::JoinOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["null_handling"])) {
      out->null_handling =
          cpp11::as_cpp<enum arrow::compute::JoinOptions::NullHandlingBehavior>(
              options["null_handling"]);
    }
    if (!Rf_isNull(options["null_replacement"])) {
      out->null_replacement = cpp11::as_cpp<std::string>(options["null_replacement"]);
    }
    return out;
  }

  if (func_name == "make_struct") {
    using Options = arrow::compute::MakeStructOptions;
    // TODO (ARROW-13371): accept `field_nullability` and `field_metadata` options
    return std::make_shared<Options>(
        cpp11::as_cpp<std::vector<std::string>>(options["field_names"]));
  }

  if (func_name == "match_substring" || func_name == "match_substring_regex" ||
      func_name == "find_substring" || func_name == "find_substring_regex" ||
      func_name == "match_like" || func_name == "starts_with" ||
      func_name == "ends_with" || func_name == "count_substring" ||
      func_name == "count_substring_regex") {
    using Options = arrow::compute::MatchSubstringOptions;
    bool ignore_case = false;
    if (!Rf_isNull(options["ignore_case"])) {
      ignore_case = cpp11::as_cpp<bool>(options["ignore_case"]);
    }
    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
                                     ignore_case);
  }

  if (func_name == "replace_substring" || func_name == "replace_substring_regex") {
    using Options = arrow::compute::ReplaceSubstringOptions;
    int64_t max_replacements = -1;
    if (!Rf_isNull(options["max_replacements"])) {
      max_replacements = cpp11::as_cpp<int64_t>(options["max_replacements"]);
    }
    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
                                     cpp11::as_cpp<std::string>(options["replacement"]),
                                     max_replacements);
  }

  if (func_name == "extract_regex") {
    using Options = arrow::compute::ExtractRegexOptions;
    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]));
  }

  if (func_name == "day_of_week") {
    using Options = arrow::compute::DayOfWeekOptions;
    bool count_from_zero = false;
    if (!Rf_isNull(options["count_from_zero"])) {
      count_from_zero = cpp11::as_cpp<bool>(options["count_from_zero"]);
    }
    return std::make_shared<Options>(count_from_zero,
                                     cpp11::as_cpp<uint32_t>(options["week_start"]));
  }

  if (func_name == "iso_week") {
    return std::make_shared<arrow::compute::WeekOptions>(
        arrow::compute::WeekOptions::ISODefaults());
  }

  if (func_name == "us_week") {
    return std::make_shared<arrow::compute::WeekOptions>(
        arrow::compute::WeekOptions::USDefaults());
  }

  if (func_name == "week") {
    using Options = arrow::compute::WeekOptions;
    bool week_starts_monday = true;
    bool count_from_zero = false;
    bool first_week_is_fully_in_year = false;
    if (!Rf_isNull(options["week_starts_monday"])) {
      week_starts_monday = cpp11::as_cpp<bool>(options["week_starts_monday"]);
    }
    if (!Rf_isNull(options["count_from_zero"])) {
      count_from_zero = cpp11::as_cpp<bool>(options["count_from_zero"]);
    }
    if (!Rf_isNull(options["first_week_is_fully_in_year"])) {
      count_from_zero = cpp11::as_cpp<bool>(options["first_week_is_fully_in_year"]);
    }
    return std::make_shared<Options>(week_starts_monday, count_from_zero,
                                     first_week_is_fully_in_year);
  }

  if (func_name == "strptime") {
    using Options = arrow::compute::StrptimeOptions;
    bool error_is_null = false;
    if (!Rf_isNull(options["error_is_null"])) {
      error_is_null = cpp11::as_cpp<bool>(options["error_is_null"]);
    }
    return std::make_shared<Options>(
        cpp11::as_cpp<std::string>(options["format"]),
        cpp11::as_cpp<arrow::TimeUnit::type>(options["unit"]), error_is_null);
  }

  if (func_name == "strftime") {
    using Options = arrow::compute::StrftimeOptions;
    return std::make_shared<Options>(
        Options(cpp11::as_cpp<std::string>(options["format"]),
                cpp11::as_cpp<std::string>(options["locale"])));
  }

  if (func_name == "assume_timezone") {
    using Options = arrow::compute::AssumeTimezoneOptions;
    enum Options::Ambiguous ambiguous = Options::AMBIGUOUS_RAISE;
    enum Options::Nonexistent nonexistent = Options::NONEXISTENT_RAISE;

    if (!Rf_isNull(options["ambiguous"])) {
      ambiguous = cpp11::as_cpp<enum Options::Ambiguous>(options["ambiguous"]);
    }
    if (!Rf_isNull(options["nonexistent"])) {
      nonexistent = cpp11::as_cpp<enum Options::Nonexistent>(options["nonexistent"]);
    }

    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["timezone"]),
                                     ambiguous, nonexistent);
  }

  if (func_name == "split_pattern" || func_name == "split_pattern_regex") {
    using Options = arrow::compute::SplitPatternOptions;
    int64_t max_splits = -1;
    if (!Rf_isNull(options["max_splits"])) {
      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
    }
    bool reverse = false;
    if (!Rf_isNull(options["reverse"])) {
      reverse = cpp11::as_cpp<bool>(options["reverse"]);
    }
    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["pattern"]),
                                     max_splits, reverse);
  }

  if (func_name == "utf8_lpad" || func_name == "utf8_rpad" ||
      func_name == "utf8_center" || func_name == "ascii_lpad" ||
      func_name == "ascii_rpad" || func_name == "ascii_center") {
    using Options = arrow::compute::PadOptions;
    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["width"]),
                                     cpp11::as_cpp<std::string>(options["padding"]));
  }

  if (func_name == "utf8_split_whitespace" || func_name == "ascii_split_whitespace") {
    using Options = arrow::compute::SplitOptions;
    int64_t max_splits = -1;
    if (!Rf_isNull(options["max_splits"])) {
      max_splits = cpp11::as_cpp<int64_t>(options["max_splits"]);
    }
    bool reverse = false;
    if (!Rf_isNull(options["reverse"])) {
      reverse = cpp11::as_cpp<bool>(options["reverse"]);
    }
    return std::make_shared<Options>(max_splits, reverse);
  }

  if (func_name == "utf8_trim" || func_name == "utf8_ltrim" ||
      func_name == "utf8_rtrim" || func_name == "ascii_trim" ||
      func_name == "ascii_ltrim" || func_name == "ascii_rtrim") {
    using Options = arrow::compute::TrimOptions;
    return std::make_shared<Options>(cpp11::as_cpp<std::string>(options["characters"]));
  }

  if (func_name == "utf8_slice_codeunits" || func_name == "binary_slice") {
    using Options = arrow::compute::SliceOptions;

    int64_t step = 1;
    if (!Rf_isNull(options["step"])) {
      step = cpp11::as_cpp<int64_t>(options["step"]);
    }

    int64_t stop = std::numeric_limits<int32_t>::max();
    if (!Rf_isNull(options["stop"])) {
      stop = cpp11::as_cpp<int64_t>(options["stop"]);
    }

    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["start"]), stop,
                                     step);
  }

  if (func_name == "utf8_replace_slice" || func_name == "binary_replace_slice") {
    using Options = arrow::compute::ReplaceSliceOptions;

    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["start"]),
                                     cpp11::as_cpp<int64_t>(options["stop"]),
                                     cpp11::as_cpp<std::string>(options["replacement"]));
  }

  if (func_name == "variance" || func_name == "stddev" || func_name == "hash_variance" ||
      func_name == "hash_stddev") {
    using Options = arrow::compute::VarianceOptions;
    auto out = std::make_shared<Options>();
    out->ddof = cpp11::as_cpp<int>(options["ddof"]);
    if (!Rf_isNull(options["min_count"])) {
      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
    }
    if (!Rf_isNull(options["skip_nulls"])) {
      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return out;
  }

  if (func_name == "mode") {
    using Options = arrow::compute::ModeOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["n"])) {
      out->n = cpp11::as_cpp<int64_t>(options["n"]);
    }
    if (!Rf_isNull(options["min_count"])) {
      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
    }
    if (!Rf_isNull(options["skip_nulls"])) {
      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
    }
    return out;
  }

  if (func_name == "partition_nth_indices") {
    using Options = arrow::compute::PartitionNthOptions;
    return std::make_shared<Options>(cpp11::as_cpp<int64_t>(options["pivot"]));
  }

  if (func_name == "round") {
    using Options = arrow::compute::RoundOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["ndigits"])) {
      out->ndigits = cpp11::as_cpp<int64_t>(options["ndigits"]);
    }
    SEXP round_mode = options["round_mode"];
    if (!Rf_isNull(round_mode)) {
      out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
    }
    return out;
  }

  if (func_name == "round_temporal" || func_name == "floor_temporal" ||
      func_name == "ceil_temporal") {
    using Options = arrow::compute::RoundTemporalOptions;

    int64_t multiple = 1;
    enum arrow::compute::CalendarUnit unit = arrow::compute::CalendarUnit::DAY;
    bool week_starts_monday = true;
    bool ceil_is_strictly_greater = true;
    bool calendar_based_origin = true;

    if (!Rf_isNull(options["multiple"])) {
      multiple = cpp11::as_cpp<int64_t>(options["multiple"]);
    }
    if (!Rf_isNull(options["unit"])) {
      unit = cpp11::as_cpp<enum arrow::compute::CalendarUnit>(options["unit"]);
    }
    if (!Rf_isNull(options["week_starts_monday"])) {
      week_starts_monday = cpp11::as_cpp<bool>(options["week_starts_monday"]);
    }
    if (!Rf_isNull(options["ceil_is_strictly_greater"])) {
      ceil_is_strictly_greater = cpp11::as_cpp<bool>(options["ceil_is_strictly_greater"]);
    }
    if (!Rf_isNull(options["calendar_based_origin"])) {
      calendar_based_origin = cpp11::as_cpp<bool>(options["calendar_based_origin"]);
    }
    return std::make_shared<Options>(multiple, unit, week_starts_monday,
                                     ceil_is_strictly_greater, calendar_based_origin);
  }

  if (func_name == "round_to_multiple") {
    using Options = arrow::compute::RoundToMultipleOptions;
    auto out = std::make_shared<Options>(Options::Defaults());
    if (!Rf_isNull(options["multiple"])) {
      out->multiple = std::make_shared<arrow::DoubleScalar>(
          cpp11::as_cpp<double>(options["multiple"]));
    }
    SEXP round_mode = options["round_mode"];
    if (!Rf_isNull(round_mode)) {
      out->round_mode = cpp11::as_cpp<enum arrow::compute::RoundMode>(round_mode);
    }
    return out;
  }

  if (func_name == "struct_field") {
    using Options = arrow::compute::StructFieldOptions;
    if (!Rf_isNull(options["indices"])) {
      return std::make_shared<Options>(
          cpp11::as_cpp<std::vector<int>>(options["indices"]));
    } else {
      // field_ref
      return std::make_shared<Options>(
          *cpp11::as_cpp<std::shared_ptr<arrow::compute::Expression>>(
               options["field_ref"])
               ->field_ref());
    }
  }

  return nullptr;
}