in bugbug/models/backout.py [0:0]
def __init__(self, lemmatization=False, bug_data=False):
CommitModel.__init__(self, lemmatization, bug_data)
self.calculate_importance = False
feature_extractors = [
commit_features.SourceCodeFilesModifiedNum(),
commit_features.OtherFilesModifiedNum(),
commit_features.TestFilesModifiedNum(),
commit_features.SourceCodeFileSize(),
commit_features.OtherFileSize(),
commit_features.TestFileSize(),
commit_features.SourceCodeAdded(),
commit_features.OtherAdded(),
commit_features.TestAdded(),
commit_features.SourceCodeDeleted(),
commit_features.OtherDeleted(),
commit_features.TestDeleted(),
commit_features.AuthorExperience(),
commit_features.ReviewerExperience(),
commit_features.ReviewersNum(),
commit_features.ComponentTouchedPrev(),
commit_features.DirectoryTouchedPrev(),
commit_features.FileTouchedPrev(),
commit_features.Types(),
commit_features.Components(),
commit_features.Directories(),
commit_features.Files(),
]
if bug_data:
feature_extractors += [
bug_features.Product(),
bug_features.Component(),
bug_features.Severity(),
bug_features.Priority(),
bug_features.HasCrashSignature(),
bug_features.HasRegressionRange(),
bug_features.Whiteboard(),
bug_features.Keywords(),
bug_features.NumberOfBugDependencies(),
bug_features.BlockedBugsNumber(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
]
self.extraction_pipeline = Pipeline(
[
(
"commit_extractor",
commit_features.CommitExtractor(
feature_extractors, cleanup_functions
),
),
]
)
self.clf = ImblearnPipeline(
[
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("desc", self.text_vectorizer(), "desc"),
(
"files",
CountVectorizer(
analyzer=utils.keep_as_is,
lowercase=False,
min_df=0.0014,
),
"files",
),
]
),
),
("sampler", RandomUnderSampler(random_state=0)),
(
"estimator",
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
),
]
)