experiments/quantile_experiment.py (61 lines of code) (raw):

# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # import sys import os pp = os.path.abspath('..') p = os.path.abspath('.') sys.path.insert(0, p) sys.path.insert(0, pp) from DataGenerator import DistributionDataGenerator, Workload from SyntheticDataGenerators import SyntheticStreamMaker from QueryGenerator import * import SketchExperiment import logging import Oracle import random import numpy as np import scipy as sp import copy import types import sketches.Sketches as Sketches logging.getLogger().setLevel(logging.WARNING) if __name__ == '__main__': # query for quantiles 5, 10, ..., 95 qg1 = ConfigQueryGenerator( queries='quantile', parameters=np.arange(0.05, 1, 0.05), indices=DataGeneratorSeq(length=10), ) qg2 = ConfigQueryGenerator( queries='cdf', parameters=np.arange(0.05, 1, 0.05), indices=DataGeneratorSeq(length=10), ) qg1.name = 'quantile' qg2.name = 'cdf' qg = ChainQueryGenerators(generators=[qg1, qg2]) dg1 = DistributionDataGenerator(length=int(1e5), distribution=sp.stats.beta(1,1), name='Uniform') w1 = Workload(data_generator=dg1, query_generator=qg) dg2 = DistributionDataGenerator(length=int(1e5), distribution=sp.stats.norm(0.5,0.2), name='Normal') w2 = Workload(data_generator=dg2, query_generator=qg) dg3 = SyntheticStreamMaker(n=1e5, order='zoomin') w3 = Workload(data_generator=dg3, query_generator=qg1) oracle = Oracle.QuantileOracle(save_dir = "/tmp/answers", as_json=True, read_cache=True) qg.connectDataGenerator(dg2) opts = SketchExperiment.ExperimentOptions( nparallel=8, ndatasets=1, nrepetitions=8, save_answers=True, ) e = SketchExperiment.SketchMetaExperiment(workloads=[w1, w2, w3], oracle=oracle, options=opts, result_file="tmp_quantile_exp_results.csv", ) #oracle2 = copy.deepcopy(oracle) # SketchConfig(sketch=Sketches.KLLSketch, size=range(100,1000,100)) KLL_params = [{'size':s} for s in range(100,1000,200)] REQ_params = [{'size':s} for s in range(10,100,20)] Tdigest_params = [{'delta':1/s} for s in range(20,200,40)] sketches = {'KLL': (Sketches.KLLSketch, KLL_params), 'REQ': (Sketches.REQSketch, REQ_params), 'Tdigest': (Sketches.TDigestSketch, Tdigest_params), #'Oracle': Sketches.SketchFactory(Sketches.OracleSketch, oracle2), } e.addSketches(sketches) # e.prepare() e.execute()