def __init__()

in bugbug/models/testselect.py [0:0]


    def __init__(self, lemmatization=False, granularity="label", failures_skip=None):
        Model.__init__(self, lemmatization)

        self.granularity = granularity
        self.failures_skip = failures_skip

        self.training_dbs = [repository.COMMITS_DB]
        self.eval_dbs[repository.COMMITS_DB] = (
            repository.COMMITS_DB,
            repository.COMMIT_EXPERIENCES_DB,
        )
        if granularity == "label":
            self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_LABEL_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_LABEL_DB,
                test_scheduling.FAILING_TOGETHER_LABEL_DB,
            )
        elif granularity == "group":
            self.training_dbs.append(test_scheduling.TEST_GROUP_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_GROUP_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_GROUP_DB,
                test_scheduling.TOUCHED_TOGETHER_DB,
            )
            self.eval_dbs[test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB] = (
                test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB,
            )
        elif granularity == "config_group":
            self.training_dbs.append(test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB)
            self.eval_dbs[test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB] = (
                test_scheduling.PAST_FAILURES_CONFIG_GROUP_DB,
                test_scheduling.TOUCHED_TOGETHER_DB,
            )

        self.cross_validation_enabled = False
        self.calculate_importance = False

        self.entire_dataset_training = True

        feature_extractors = [
            test_scheduling_features.PrevFailures(),
        ]

        if granularity == "label":
            feature_extractors += [
                test_scheduling_features.Platform(),
                # test_scheduling_features.chunk(),
                test_scheduling_features.Suite(),
            ]
        elif granularity in ("group", "config_group"):
            feature_extractors += [
                test_scheduling_features.PathDistance(),
                test_scheduling_features.CommonPathComponents(),
                test_scheduling_features.TouchedTogether(),
            ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "commit_extractor",
                    commit_features.CommitExtractor(feature_extractors, []),
                ),
            ]
        )

        self.clf = ImblearnPipeline(
            [
                ("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
                ("sampler", RandomUnderSampler(random_state=0)),
                (
                    "estimator",
                    xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
                ),
            ]
        )