def _init_internal_params()

in elastic/shared/track_processors/data_generator.py [0:0]


    def _init_internal_params(self):
        # avoid zero seeds because of client_index == 0
        seed = (self._client_index + 1) * self._random_seed if self._random_seed else None
        random.seed(seed)
        self.logger.info(
            "Initializing generator [%d/%d] with seed [%d].",
            self._client_index,
            self._client_count,
            seed,
        )

        self.readers = self._create_readers(self._client_count, self._client_index)
        corpus_stats = self._sample_corpus_stats()
        # we will be sampling our corpora based on required doc ratios to satisfy the total gb.
        # Larger corpus need a smaller ratio of lines to satisfy the original user specified ratios in gb
        corpora_ratios = {
            corpus_name: ratio
            for integration_name, integration in self._integration_ratios.items()
            for corpus_name, ratio in integration["corpora"].items()
        }
        corpora_doc_counts = calculate_corpus_counts(
            corpus_stats,
            corpora_ratios,
            self._data_generation_gb,
            self._max_generation_size_gb,
        )
        self._corpora_doc_ratios = calculate_integration_ratios(corpora_doc_counts)
        self.total_docs = sum(corpora_doc_counts.values())
        if self._client_index == 0:
            self.logger.info("Total Docs: [%s]", self.total_docs)
            self.logger.info("Corpora Counts: [%s]", json.dumps(corpora_doc_counts))
            self.logger.info("Corpora Ratios: [%s]", json.dumps(self._corpora_doc_ratios))

        # last client gets a little more from bounds function
        _, self.docs_per_client = bounds(self.total_docs, self._client_index, self._client_count)