experiments/quantile_experiment.py (61 lines of code) (raw):
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
import sys
import os
pp = os.path.abspath('..')
p = os.path.abspath('.')
sys.path.insert(0, p)
sys.path.insert(0, pp)
from DataGenerator import DistributionDataGenerator, Workload
from SyntheticDataGenerators import SyntheticStreamMaker
from QueryGenerator import *
import SketchExperiment
import logging
import Oracle
import random
import numpy as np
import scipy as sp
import copy
import types
import sketches.Sketches as Sketches
logging.getLogger().setLevel(logging.WARNING)
if __name__ == '__main__':
# query for quantiles 5, 10, ..., 95
qg1 = ConfigQueryGenerator(
queries='quantile',
parameters=np.arange(0.05, 1, 0.05),
indices=DataGeneratorSeq(length=10),
)
qg2 = ConfigQueryGenerator(
queries='cdf',
parameters=np.arange(0.05, 1, 0.05),
indices=DataGeneratorSeq(length=10),
)
qg1.name = 'quantile'
qg2.name = 'cdf'
qg = ChainQueryGenerators(generators=[qg1, qg2])
dg1 = DistributionDataGenerator(length=int(1e5), distribution=sp.stats.beta(1,1), name='Uniform')
w1 = Workload(data_generator=dg1, query_generator=qg)
dg2 = DistributionDataGenerator(length=int(1e5), distribution=sp.stats.norm(0.5,0.2), name='Normal')
w2 = Workload(data_generator=dg2, query_generator=qg)
dg3 = SyntheticStreamMaker(n=1e5, order='zoomin')
w3 = Workload(data_generator=dg3, query_generator=qg1)
oracle = Oracle.QuantileOracle(save_dir = "/tmp/answers", as_json=True, read_cache=True)
qg.connectDataGenerator(dg2)
opts = SketchExperiment.ExperimentOptions(
nparallel=8,
ndatasets=1,
nrepetitions=8,
save_answers=True,
)
e = SketchExperiment.SketchMetaExperiment(workloads=[w1, w2, w3],
oracle=oracle,
options=opts,
result_file="tmp_quantile_exp_results.csv",
)
#oracle2 = copy.deepcopy(oracle)
# SketchConfig(sketch=Sketches.KLLSketch, size=range(100,1000,100))
KLL_params = [{'size':s} for s in range(100,1000,200)]
REQ_params = [{'size':s} for s in range(10,100,20)]
Tdigest_params = [{'delta':1/s} for s in range(20,200,40)]
sketches = {'KLL': (Sketches.KLLSketch, KLL_params),
'REQ': (Sketches.REQSketch, REQ_params),
'Tdigest': (Sketches.TDigestSketch, Tdigest_params),
#'Oracle': Sketches.SketchFactory(Sketches.OracleSketch, oracle2),
}
e.addSketches(sketches)
# e.prepare()
e.execute()