in bugbug/models/testselect.py [0:0]
def __init__(self, lemmatization=False, granularity="label", failures_skip=None):
Model.__init__(self, lemmatization)
self.granularity = granularity
self.failures_skip = failures_skip
self.training_dbs = [repository.COMMITS_DB]
self.eval_dbs[repository.COMMITS_DB] = (
repository.COMMITS_DB,
repository.COMMIT_EXPERIENCES_DB,
)
if granularity == "label":
self.training_dbs.append(test_scheduling.TEST_LABEL_SCHEDULING_DB)
self.eval_dbs[test_scheduling.TEST_LABEL_SCHEDULING_DB] = (
test_scheduling.PAST_FAILURES_LABEL_DB,
test_scheduling.FAILING_TOGETHER_LABEL_DB,
)
elif granularity == "group":
self.training_dbs.append(test_scheduling.TEST_GROUP_SCHEDULING_DB)
self.eval_dbs[test_scheduling.TEST_GROUP_SCHEDULING_DB] = (
test_scheduling.PAST_FAILURES_GROUP_DB,
test_scheduling.TOUCHED_TOGETHER_DB,
)
self.eval_dbs[test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB] = (
test_scheduling.FAILING_TOGETHER_CONFIG_GROUP_DB,
)
elif granularity == "config_group":
self.training_dbs.append(test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB)
self.eval_dbs[test_scheduling.TEST_CONFIG_GROUP_SCHEDULING_DB] = (
test_scheduling.PAST_FAILURES_CONFIG_GROUP_DB,
test_scheduling.TOUCHED_TOGETHER_DB,
)
self.cross_validation_enabled = False
self.calculate_importance = False
self.entire_dataset_training = True
feature_extractors = [
test_scheduling_features.PrevFailures(),
]
if granularity == "label":
feature_extractors += [
test_scheduling_features.Platform(),
# test_scheduling_features.chunk(),
test_scheduling_features.Suite(),
]
elif granularity in ("group", "config_group"):
feature_extractors += [
test_scheduling_features.PathDistance(),
test_scheduling_features.CommonPathComponents(),
test_scheduling_features.TouchedTogether(),
]
self.extraction_pipeline = Pipeline(
[
(
"commit_extractor",
commit_features.CommitExtractor(feature_extractors, []),
),
]
)
self.clf = ImblearnPipeline(
[
("union", ColumnTransformer([("data", DictVectorizer(), "data")])),
("sampler", RandomUnderSampler(random_state=0)),
(
"estimator",
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
),
]
)