go/zipf_distribution.go (69 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package main import ( "math" "math/rand" ) const ( zipf_taylor_threshold = 1e-8 zipf_f_1_2 = 0.5 zipf_f_1_3 = 1.0 / 3 zipf_f_1_4 = 0.25 ) type zipfDistribution struct { numberOfElements int64 exponent float64 hIntegralX1 float64 hIntegralNumberOfElements float64 s float64 } func newZipfDistribution(numberOfElements int64, exponent float64) *zipfDistribution { d := &zipfDistribution{ numberOfElements: numberOfElements, exponent: exponent, hIntegralX1: hIntegral(1.5, exponent) - 1.0, hIntegralNumberOfElements: hIntegral(float64(numberOfElements)+zipf_f_1_2, exponent), s: 2 - hIntegralInverse(hIntegral(2.5, exponent)-h(2.0, exponent), exponent), } return d } func (z *zipfDistribution) sample() int64 { for true { u := z.hIntegralNumberOfElements + rand.Float64()*(z.hIntegralX1-z.hIntegralNumberOfElements) x := hIntegralInverse(u, z.exponent) k := int64(x + zipf_f_1_2) if k < 1 { k = 1 } else if k > z.numberOfElements { k = z.numberOfElements } if ((float64(k) - x) <= z.s) || (u >= (hIntegral(float64(k)+zipf_f_1_2, z.exponent) - h(float64(k), z.exponent))) { return k } } panic("this cannot happen") } func h(x float64, exponent float64) float64 { return math.Exp(-exponent * math.Log(x)) } func hIntegral(x float64, exponent float64) float64 { return helper2((1-exponent)*math.Log(x)) * math.Log(x) } func helper1(x float64) float64 { if math.Abs(x) > zipf_taylor_threshold { return math.Log1p(x) / x } return 1 - (x * (zipf_f_1_2 - (x * (zipf_f_1_3 - (zipf_f_1_4 * x))))) } func helper2(x float64) float64 { if math.Abs(x) > zipf_taylor_threshold { return math.Expm1(x) / x } return 1 + (x * zipf_f_1_2 * (1 + (x * zipf_f_1_3 * (1 + (zipf_f_1_4 * x))))) } func hIntegralInverse(x float64, exponent float64) float64 { t := x * (1 - exponent) if t < -1 { // Limit value to the reange [-1, +inf). // t could be smaller than -1 in some rare cases due to numerical errors. t = -1 } return math.Exp(helper1(t) * x) }