def on_prepare_track()

in elastic/shared/track_processors/data_generator.py [0:0]


    def on_prepare_track(self, track, data_root_dir):
        if not track.selected_challenge_or_default.parameters.get("generate-data", True):
            return []
        track_data_root = os.path.join(data_root_dir, track.name)
        for corpus in track.corpora:
            if not corpus.meta_data.get("generated", False):
                data_root = os.path.join(track_data_root, corpus.name)
                self.logger.info(
                    "Resolved data root directory for document corpus [%s] in track [%s] to [%s].",
                    corpus.name,
                    track.name,
                    data_root,
                )
                # only set for real benchmarks, not in unit tests
                if self.downloader and self.decompressor:
                    prep = DocumentSetPreparator(track.name, self.downloader, self.decompressor)

                    for document_set in corpus.documents:
                        prep.prepare_document_set(document_set, data_root)

        # data is now available locally, proceed with generating data
        client_count = track.selected_challenge_or_default.parameters.get("data-generation-clients", 2)
        track_id = track.selected_challenge_or_default.parameters["track-id"]
        track.selected_challenge_or_default.parameters["output-folder"] = os.path.join(track_data_root, "generated", track_id)
        retval = []
        for client_id in range(client_count):
            generator_params = {
                "track": track,
                "track_data_root": track_data_root,
                "client_index": client_id,
                "client_count": client_count,
            }
            retval.append((generate, generator_params))
        return retval