void addNumericField()

in x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene54/Lucene54DocValuesConsumer.java [129:332]


    void addNumericField(FieldInfo field, Iterable<Number> values, NumberType numberType) throws IOException {
        long count = 0;
        long minValue = Long.MAX_VALUE;
        long maxValue = Long.MIN_VALUE;
        long gcd = 0;
        long missingCount = 0;
        long zeroCount = 0;
        // TODO: more efficient?
        HashSet<Long> uniqueValues = null;
        long missingOrdCount = 0;
        if (numberType == NumberType.VALUE) {
            uniqueValues = new HashSet<>();

            for (Number nv : values) {
                final long v;
                if (nv == null) {
                    v = 0;
                    missingCount++;
                    zeroCount++;
                } else {
                    v = nv.longValue();
                    if (v == 0) {
                        zeroCount++;
                    }
                }

                if (gcd != 1) {
                    if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
                        // in that case v - minValue might overflow and make the GCD computation return
                        // wrong results. Since these extreme values are unlikely, we just discard
                        // GCD computation for them
                        gcd = 1;
                    } else if (count != 0) { // minValue needs to be set first
                        gcd = MathUtil.gcd(gcd, v - minValue);
                    }
                }

                minValue = Math.min(minValue, v);
                maxValue = Math.max(maxValue, v);

                if (uniqueValues != null) {
                    if (uniqueValues.add(v)) {
                        if (uniqueValues.size() > 256) {
                            uniqueValues = null;
                        }
                    }
                }

                ++count;
            }
        } else {
            for (Number nv : values) {
                long v = nv.longValue();
                if (v == -1L) {
                    missingOrdCount++;
                }
                minValue = Math.min(minValue, v);
                maxValue = Math.max(maxValue, v);
                ++count;
            }
        }

        final long delta = maxValue - minValue;
        final int deltaBitsRequired = LegacyDirectWriter.unsignedBitsRequired(delta);
        final int tableBitsRequired = uniqueValues == null ? Integer.MAX_VALUE : LegacyDirectWriter.bitsRequired(uniqueValues.size() - 1);

        final boolean sparse; // 1% of docs or less have a value
        switch (numberType) {
            case VALUE:
                sparse = (double) missingCount / count >= 0.99;
                break;
            case ORDINAL:
                sparse = (double) missingOrdCount / count >= 0.99;
                break;
            default:
                throw new AssertionError();
        }

        final int format;
        if (uniqueValues != null
            && count <= Integer.MAX_VALUE
            && (uniqueValues.size() == 1 || (uniqueValues.size() == 2 && missingCount > 0 && zeroCount == missingCount))) {
            // either one unique value C or two unique values: "missing" and C
            format = CONST_COMPRESSED;
        } else if (sparse && count >= 1024) {
            // require at least 1024 docs to avoid flipping back and forth when doing NRT search
            format = SPARSE_COMPRESSED;
        } else if (uniqueValues != null && tableBitsRequired < deltaBitsRequired) {
            format = TABLE_COMPRESSED;
        } else if (gcd != 0 && gcd != 1) {
            final long gcdDelta = (maxValue - minValue) / gcd;
            final long gcdBitsRequired = LegacyDirectWriter.unsignedBitsRequired(gcdDelta);
            format = gcdBitsRequired < deltaBitsRequired ? GCD_COMPRESSED : DELTA_COMPRESSED;
        } else {
            format = DELTA_COMPRESSED;
        }
        meta.writeVInt(field.number);
        meta.writeByte(Lucene54DocValuesFormat.NUMERIC);
        meta.writeVInt(format);
        if (format == SPARSE_COMPRESSED) {
            meta.writeLong(data.getFilePointer());
            final long numDocsWithValue;
            switch (numberType) {
                case VALUE:
                    numDocsWithValue = count - missingCount;
                    break;
                case ORDINAL:
                    numDocsWithValue = count - missingOrdCount;
                    break;
                default:
                    throw new AssertionError();
            }
            final long maxDoc = writeSparseMissingBitset(values, numberType, numDocsWithValue);
            assert maxDoc == count;
        } else if (missingCount == 0) {
            meta.writeLong(ALL_LIVE);
        } else if (missingCount == count) {
            meta.writeLong(ALL_MISSING);
        } else {
            meta.writeLong(data.getFilePointer());
            writeMissingBitset(values);
        }
        meta.writeLong(data.getFilePointer());
        meta.writeVLong(count);

        switch (format) {
            case CONST_COMPRESSED:
                // write the constant (nonzero value in the n=2 case, singleton value otherwise)
                meta.writeLong(minValue < 0 ? Collections.min(uniqueValues) : Collections.max(uniqueValues));
                break;
            case GCD_COMPRESSED:
                meta.writeLong(minValue);
                meta.writeLong(gcd);
                final long maxDelta = (maxValue - minValue) / gcd;
                final int bits = LegacyDirectWriter.unsignedBitsRequired(maxDelta);
                meta.writeVInt(bits);
                final LegacyDirectWriter quotientWriter = LegacyDirectWriter.getInstance(data, count, bits);
                for (Number nv : values) {
                    long value = nv == null ? 0 : nv.longValue();
                    quotientWriter.add((value - minValue) / gcd);
                }
                quotientWriter.finish();
                break;
            case DELTA_COMPRESSED:
                final long minDelta = delta < 0 ? 0 : minValue;
                meta.writeLong(minDelta);
                meta.writeVInt(deltaBitsRequired);
                final LegacyDirectWriter writer = LegacyDirectWriter.getInstance(data, count, deltaBitsRequired);
                for (Number nv : values) {
                    long v = nv == null ? 0 : nv.longValue();
                    writer.add(v - minDelta);
                }
                writer.finish();
                break;
            case TABLE_COMPRESSED:
                final Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
                Arrays.sort(decode);
                final HashMap<Long, Integer> encode = new HashMap<>();
                meta.writeVInt(decode.length);
                for (int i = 0; i < decode.length; i++) {
                    meta.writeLong(decode[i]);
                    encode.put(decode[i], i);
                }
                meta.writeVInt(tableBitsRequired);
                final LegacyDirectWriter ordsWriter = LegacyDirectWriter.getInstance(data, count, tableBitsRequired);
                for (Number nv : values) {
                    ordsWriter.add(encode.get(nv == null ? 0 : nv.longValue()));
                }
                ordsWriter.finish();
                break;
            case SPARSE_COMPRESSED:
                final Iterable<Number> filteredMissingValues;
                switch (numberType) {
                    case VALUE:
                        meta.writeByte((byte) 0);
                        filteredMissingValues = new Iterable<Number>() {
                            @Override
                            public Iterator<Number> iterator() {
                                return StreamSupport.stream(values.spliterator(), false).filter(value -> value != null).iterator();
                            }
                        };
                        break;
                    case ORDINAL:
                        meta.writeByte((byte) 1);
                        filteredMissingValues = new Iterable<Number>() {
                            @Override
                            public Iterator<Number> iterator() {
                                return StreamSupport.stream(values.spliterator(), false)
                                    .filter(value -> value.longValue() != -1L)
                                    .iterator();
                            }
                        };
                        break;
                    default:
                        throw new AssertionError();
                }
                // Write non-missing values as a numeric field
                addNumericField(field, filteredMissingValues, numberType);
                break;
            default:
                throw new AssertionError();
        }
        meta.writeLong(data.getFilePointer());
    }