def __init__()

in bugbug/models/backout.py [0:0]


    def __init__(self, lemmatization=False, bug_data=False):
        CommitModel.__init__(self, lemmatization, bug_data)

        self.calculate_importance = False

        feature_extractors = [
            commit_features.SourceCodeFilesModifiedNum(),
            commit_features.OtherFilesModifiedNum(),
            commit_features.TestFilesModifiedNum(),
            commit_features.SourceCodeFileSize(),
            commit_features.OtherFileSize(),
            commit_features.TestFileSize(),
            commit_features.SourceCodeAdded(),
            commit_features.OtherAdded(),
            commit_features.TestAdded(),
            commit_features.SourceCodeDeleted(),
            commit_features.OtherDeleted(),
            commit_features.TestDeleted(),
            commit_features.AuthorExperience(),
            commit_features.ReviewerExperience(),
            commit_features.ReviewersNum(),
            commit_features.ComponentTouchedPrev(),
            commit_features.DirectoryTouchedPrev(),
            commit_features.FileTouchedPrev(),
            commit_features.Types(),
            commit_features.Components(),
            commit_features.Directories(),
            commit_features.Files(),
        ]

        if bug_data:
            feature_extractors += [
                bug_features.Product(),
                bug_features.Component(),
                bug_features.Severity(),
                bug_features.Priority(),
                bug_features.HasCrashSignature(),
                bug_features.HasRegressionRange(),
                bug_features.Whiteboard(),
                bug_features.Keywords(),
                bug_features.NumberOfBugDependencies(),
                bug_features.BlockedBugsNumber(),
            ]

        cleanup_functions = [
            feature_cleanup.fileref(),
            feature_cleanup.url(),
            feature_cleanup.synonyms(),
        ]

        self.extraction_pipeline = Pipeline(
            [
                (
                    "commit_extractor",
                    commit_features.CommitExtractor(
                        feature_extractors, cleanup_functions
                    ),
                ),
            ]
        )

        self.clf = ImblearnPipeline(
            [
                (
                    "union",
                    ColumnTransformer(
                        [
                            ("data", DictVectorizer(), "data"),
                            ("desc", self.text_vectorizer(), "desc"),
                            (
                                "files",
                                CountVectorizer(
                                    analyzer=utils.keep_as_is,
                                    lowercase=False,
                                    min_df=0.0014,
                                ),
                                "files",
                            ),
                        ]
                    ),
                ),
                ("sampler", RandomUnderSampler(random_state=0)),
                (
                    "estimator",
                    xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
                ),
            ]
        )