Status GetType()

in python/pyarrow/src/arrow/python/inference.cc [454:550]


  Status GetType(std::shared_ptr<DataType>* out) {
    // TODO(wesm): handling forming unions
    if (make_unions_) {
      return Status::NotImplemented("Creating union types not yet supported");
    }

    RETURN_NOT_OK(Validate());

    if (arrow_scalar_count_ > 0 && arrow_scalar_count_ + none_count_ != total_count_) {
      return Status::Invalid(
          "pyarrow scalars cannot be mixed "
          "with other Python scalar values currently");
    }

    if (numpy_dtype_count_ > 0) {
      // All NumPy scalars and Nones/nulls
      if (numpy_dtype_count_ + none_count_ == total_count_) {
        return NumPyDtypeToArrow(numpy_unifier_.current_dtype()).Value(out);
      }

      // The "bad path": data contains a mix of NumPy scalars and
      // other kinds of scalars. Note this can happen innocuously
      // because numpy.nan is not a NumPy scalar (it's a built-in
      // PyFloat)

      // TODO(ARROW-5564): Merge together type unification so this
      // hack is not necessary
      switch (numpy_unifier_.current_type_num()) {
        case NPY_BOOL:
          bool_count_ += numpy_dtype_count_;
          break;
        case NPY_INT8:
        case NPY_INT16:
        case NPY_INT32:
        case NPY_INT64:
        case NPY_UINT8:
        case NPY_UINT16:
        case NPY_UINT32:
        case NPY_UINT64:
          int_count_ += numpy_dtype_count_;
          break;
        case NPY_FLOAT32:
        case NPY_FLOAT64:
          float_count_ += numpy_dtype_count_;
          break;
        case NPY_DATETIME:
          return Status::Invalid(
              "numpy.datetime64 scalars cannot be mixed "
              "with other Python scalar values currently");
      }
    }

    if (list_count_) {
      std::shared_ptr<DataType> value_type;
      RETURN_NOT_OK(list_inferrer_->GetType(&value_type));
      *out = list(value_type);
    } else if (struct_count_) {
      RETURN_NOT_OK(GetStructType(out));
    } else if (decimal_count_) {
      if (max_decimal_metadata_.precision() > Decimal128Type::kMaxPrecision) {
        // the default constructor does not validate the precision and scale
        ARROW_ASSIGN_OR_RAISE(*out,
                              Decimal256Type::Make(max_decimal_metadata_.precision(),
                                                   max_decimal_metadata_.scale()));
      } else {
        ARROW_ASSIGN_OR_RAISE(*out,
                              Decimal128Type::Make(max_decimal_metadata_.precision(),
                                                   max_decimal_metadata_.scale()));
      }
    } else if (float_count_) {
      // Prioritize floats before integers
      *out = float64();
    } else if (int_count_) {
      *out = int64();
    } else if (date_count_) {
      *out = date32();
    } else if (time_count_) {
      *out = time64(TimeUnit::MICRO);
    } else if (timestamp_micro_count_) {
      *out = timestamp(TimeUnit::MICRO, timezone_);
    } else if (duration_count_) {
      *out = duration(TimeUnit::MICRO);
    } else if (bool_count_) {
      *out = boolean();
    } else if (binary_count_) {
      *out = binary();
    } else if (unicode_count_) {
      *out = utf8();
    } else if (interval_count_) {
      *out = month_day_nano_interval();
    } else if (arrow_scalar_count_) {
      *out = scalar_type_;
    } else {
      *out = null();
    }
    return Status::OK();
  }