public static Query clusteringSliceQuery()

in harry-core/src/harry/operations/Query.java [292:402]


    public static Query clusteringSliceQuery(SchemaSpec schema, long pd, long cd, long queryDescriptor, boolean isGt, boolean isEquals, boolean reverse)
    {
        List<Relation> relations = new ArrayList<>();

        long[] sliced = schema.ckGenerator.slice(cd);
        long min;
        long max;
        int nonEqFrom = RngUtils.asInt(queryDescriptor, 0, sliced.length - 1);

        long[] minBound = new long[sliced.length];
        long[] maxBound = new long[sliced.length];

        // Algorithm that determines boundaries for a clustering slice.
        //
        // Basic principles are not hard but there are a few edge cases. I haven't figured out how to simplify
        // those, so there might be some room for improvement. In short, what we want to achieve is:
        //
        // 1. Every part that is restricted with an EQ relation goes into the bound verbatim.
        // 2. Every part that is restricted with a non-EQ relation (LT, GT, LTE, GTE) is taken into the bound
        //    if it is required to satisfy the relationship. For example, in `ck1 = 0 AND ck2 < 5`, ck2 will go
        //    to the _max_ boundary, and minimum value will go to the _min_ boundary, since we can select every
        //    descriptor that is prefixed with ck1.
        // 3. Every other part (e.g., ones that are not explicitly mentioned in the query) has to be restricted
        //    according to equality. For example, in `ck1 = 0 AND ck2 < 5`, ck3 that is present in schema but not
        //    mentioned in query, makes sure that any value between [0, min_value, min_value] and [0, 5, min_value]
        //    is matched.
        //
        // One edge case is a query on the first clustering key: `ck1 < 5`. In this case, we have to fixup the lower
        // value to the minimum possible value. We could really just do Long.MIN_VALUE, but in case we forget to
        // adjust entropy elsewhere, it'll be caught correctly here.
        for (int i = 0; i < sliced.length; i++)
        {
            long v = sliced[i];
            DataGenerators.KeyGenerator gen = schema.ckGenerator;
            ColumnSpec column = schema.clusteringKeys.get(i);
            int idx = i;
            LongSupplier maxSupplier = () -> gen.maxValue(idx);
            LongSupplier minSupplier = () -> gen.minValue(idx);

            if (i < nonEqFrom)
            {
                relations.add(Relation.eqRelation(schema.clusteringKeys.get(i), v));
                minBound[i] = v;
                maxBound[i] = v;
            }
            else if (i == nonEqFrom)
            {
                relations.add(Relation.relation(relationKind(isGt, isEquals), schema.clusteringKeys.get(i), v));

                if (column.isReversed())
                {
                    minBound[i] = isGt ? minSupplier.getAsLong() : v;
                    maxBound[i] = isGt ? v : maxSupplier.getAsLong();
                }
                else
                {
                    minBound[i] = isGt ? v : minSupplier.getAsLong();
                    maxBound[i] = isGt ? maxSupplier.getAsLong() : v;
                }
            }
            else
            {
                if (isEquals)
                {
                    minBound[i] = minSupplier.getAsLong();
                    maxBound[i] = maxSupplier.getAsLong();
                }
                // If we have a non-eq case, all subsequent bounds have to correspond to the maximum in normal case,
                // or minimum in case the last bound locked with a relation was reversed.
                //
                // For example, if we have (ck1, ck2, ck3) as (ASC, DESC, ASC), and query ck1 > X, we'll have:
                //  [xxxxx | max_value | max_value]
                //    ck1       ck2         ck3
                // which will exclude xxxx, but take every possible (ck1 > xxxxx) prefixed value.
                //
                // Similarly, if we have (ck1, ck2, ck3) as (ASC, DESC, ASC), and query ck1 <= X, we'll have:
                //  [xxxxx | max_value | max_value]
                // which will include every (ck1 < xxxxx), and any clustering prefixed with xxxxx.
                else if (schema.clusteringKeys.get(nonEqFrom).isReversed())
                    maxBound[i] = minBound[i] = isGt ? minSupplier.getAsLong() : maxSupplier.getAsLong();
                else
                    maxBound[i] = minBound[i] = isGt ? maxSupplier.getAsLong() : minSupplier.getAsLong();
            }
        }

        if (schema.clusteringKeys.get(nonEqFrom).isReversed())
            isGt = !isGt;

        min = schema.ckGenerator.stitch(minBound);
        max = schema.ckGenerator.stitch(maxBound);

        if (nonEqFrom == 0)
        {
            min = isGt ? min : schema.ckGenerator.minValue();
            max = !isGt ? max : schema.ckGenerator.maxValue();
        }

        // if we're about to create an "impossible" query, just bump the modifier and re-generate
        if (min == max && !isEquals)
            throw new IllegalArgumentException("Impossible Query");

        return new Query.ClusteringSliceQuery(Query.QueryKind.CLUSTERING_SLICE,
                                              pd,
                                              min,
                                              max,
                                              relationKind(true, isGt ? isEquals : true),
                                              relationKind(false, !isGt ? isEquals : true),
                                              reverse,
                                              relations,
                                              schema);
    }