public static Query clusteringRangeQuery()

in harry-core/src/harry/operations/Query.java [404:481]
47 lines of code
13 McCabe index (conditional complexity)

    public static Query clusteringRangeQuery(SchemaSpec schema, long pd, long cd1, long cd2, long queryDescriptor, boolean isMinEq, boolean isMaxEq, boolean reverse)
    {
        List<Relation> relations = new ArrayList<>();

        long[] minBound = schema.ckGenerator.slice(cd1);
        long[] maxBound = schema.ckGenerator.slice(cd2);

        int nonEqFrom = RngUtils.asInt(queryDescriptor, 0, schema.clusteringKeys.size() - 1);

        // Logic here is similar to how clustering slices are implemented, except for both lower and upper bound
        // get their values from sliced value in (1) and (2) cases:
        //
        // 1. Every part that is restricted with an EQ relation, takes its value from the min bound.
        //    TODO: this can actually be improved, since in case of hierarchical clustering generation we can
        //          pick out of the keys that are already locked. That said, we'll exercise more cases the way
        //          it is implemented right now.
        // 2. Every part that is restricted with a non-EQ relation is taken into the bound, if it is used in
        //    the query. For example in, `ck1 = 0 AND ck2 > 2 AND ck2 < 5`, ck2 values 2 and 5 will be placed,
        //    correspondingly, to the min and max bound.
        // 3. Every other part has to be restricted according to equality. Similar to clustering slice, we have
        //    to decide whether we use a min or the max value for the bound. Foe example `ck1 = 0 AND ck2 > 2 AND ck2 <= 5`,
        //    assuming we have ck3 that is present in schema but not mentioned in the query, we'll have bounds
        //    created as follows: [0, 2, max_value] and [0, 5, max_value]. Idea here is that since ck2 = 2 is excluded,
        //    we also disallow all ck3 values for [0, 2] prefix. Similarly, since ck2 = 5 is included, we allow every
        //    ck3 value with a prefix of [0, 5].
        for (int i = 0; i < schema.clusteringKeys.size(); i++)
        {
            ColumnSpec<?> col = schema.clusteringKeys.get(i);
            if (i < nonEqFrom)
            {
                relations.add(Relation.eqRelation(col, minBound[i]));
                maxBound[i] = minBound[i];
            }
            else if (i == nonEqFrom)
            {
                long minLocked = Math.min(minBound[nonEqFrom], maxBound[nonEqFrom]);
                long maxLocked = Math.max(minBound[nonEqFrom], maxBound[nonEqFrom]);
                relations.add(Relation.relation(relationKind(true, col.isReversed() ? isMaxEq : isMinEq), col,
                                                col.isReversed() ? maxLocked : minLocked));
                relations.add(Relation.relation(relationKind(false, col.isReversed() ? isMinEq : isMaxEq), col,
                                                col.isReversed() ? minLocked : maxLocked));
                minBound[i] = minLocked;
                maxBound[i] = maxLocked;

                // Impossible query
                if (i == 0 && minLocked == maxLocked)
                    throw new IllegalArgumentException("impossible query");
            }
            else
            {
                minBound[i] = isMinEq ? schema.ckGenerator.minValue(i) : schema.ckGenerator.maxValue(i);
                maxBound[i] = isMaxEq ? schema.ckGenerator.maxValue(i) : schema.ckGenerator.minValue(i);
            }
        }

        long stitchedMin = schema.ckGenerator.stitch(minBound);
        long stitchedMax = schema.ckGenerator.stitch(maxBound);

        // if we're about to create an "impossible" query, just bump the modifier and re-generate
        // TODO: this isn't considered "normal" that we do it this way, but I'd rather fix it with
        //       a refactoring that's mentioned below
        if (stitchedMin == stitchedMax)
            throw new IllegalArgumentException("impossible query");

        // TODO: one of the ways to get rid of garbage here, and potentially even simplify the code is to
        //       simply return bounds here. After bounds are created, we slice them and generate query right
        //       from the bounds. In this case, we can even say that things like -inf/+inf are special values,
        //       and use them as placeholders. Also, it'll be easier to manipulate relations.
        return new Query.ClusteringRangeQuery(Query.QueryKind.CLUSTERING_RANGE,
                                              pd,
                                              stitchedMin,
                                              stitchedMax,
                                              relationKind(true, isMinEq),
                                              relationKind(false, isMaxEq),
                                              reverse,
                                              relations,
                                              schema);
    }