func()

in spark/sql/dataframe.go [1622:1664]


func (df *dataFrameImpl) ApproxQuantile(ctx context.Context, probabilities []float64,
	relativeError float64, cols ...string,
) ([][]float64, error) {
	rel := &proto.Relation{
		Common: &proto.RelationCommon{
			PlanId: newPlanId(),
		},
		RelType: &proto.Relation_ApproxQuantile{
			ApproxQuantile: &proto.StatApproxQuantile{
				Input:         df.relation,
				Probabilities: probabilities,
				RelativeError: relativeError,
				Cols:          cols,
			},
		},
	}
	data := NewDataFrame(df.session, rel)
	rows, err := data.Collect(ctx)
	if err != nil {
		return nil, err
	}

	// The result structure is a bit weird here, essentially it returns exactly one row with
	// the quantiles.
	// Inside the row is a list of nested arroys that contain the quantiles. The first column is the
	// first nested array, the second column is the second nested array and so on.

	nested := rows[0].At(0).([]interface{})
	result := make([][]float64, len(nested))
	for i := 0; i < len(nested); i++ {
		tmp := nested[i].([]interface{})
		result[i] = make([]float64, len(tmp))
		for j := 0; j < len(tmp); j++ {
			f, ok := tmp[j].(float64)
			if !ok {
				return nil, sparkerrors.WithType(fmt.Errorf(
					"failed to cast to float64"), sparkerrors.ExecutionError)
			}
			result[i][j] = f
		}
	}
	return result, nil
}