in spark/sql/dataframe.go [1622:1664]
func (df *dataFrameImpl) ApproxQuantile(ctx context.Context, probabilities []float64,
relativeError float64, cols ...string,
) ([][]float64, error) {
rel := &proto.Relation{
Common: &proto.RelationCommon{
PlanId: newPlanId(),
},
RelType: &proto.Relation_ApproxQuantile{
ApproxQuantile: &proto.StatApproxQuantile{
Input: df.relation,
Probabilities: probabilities,
RelativeError: relativeError,
Cols: cols,
},
},
}
data := NewDataFrame(df.session, rel)
rows, err := data.Collect(ctx)
if err != nil {
return nil, err
}
// The result structure is a bit weird here, essentially it returns exactly one row with
// the quantiles.
// Inside the row is a list of nested arroys that contain the quantiles. The first column is the
// first nested array, the second column is the second nested array and so on.
nested := rows[0].At(0).([]interface{})
result := make([][]float64, len(nested))
for i := 0; i < len(nested); i++ {
tmp := nested[i].([]interface{})
result[i] = make([]float64, len(tmp))
for j := 0; j < len(tmp); j++ {
f, ok := tmp[j].(float64)
if !ok {
return nil, sparkerrors.WithType(fmt.Errorf(
"failed to cast to float64"), sparkerrors.ExecutionError)
}
result[i][j] = f
}
}
return result, nil
}