in bugbug/models/worksforme.py [0:0]
def __init__(self, lemmatization=False):
BugModel.__init__(self, lemmatization)
self.calculate_importance = False
feature_extractors = [
bug_features.HasSTR(),
bug_features.HasRegressionRange(),
bug_features.Status(),
bug_features.Severity(),
bug_features.Priority(),
bug_features.HasURL(),
bug_features.Whiteboard(),
bug_features.Product(),
bug_features.Component(),
bug_features.Keywords(),
bug_features.TimeToClose(),
bug_features.HasAttachment(),
bug_features.CommentCount(),
bug_features.CommentLength(),
bug_features.NumWordsComments(),
]
cleanup_functions = [
feature_cleanup.fileref(),
feature_cleanup.url(),
feature_cleanup.synonyms(),
feature_cleanup.hex(),
]
self.extraction_pipeline = Pipeline(
[
(
"bug_extractor",
bug_features.BugExtractor(
feature_extractors,
cleanup_functions,
rollback=True,
rollback_when=self.rollback,
),
),
]
)
self.clf = ImblearnPipeline(
[
(
"union",
ColumnTransformer(
[
("data", DictVectorizer(), "data"),
("title", self.text_vectorizer(min_df=0.0001), "title"),
(
"comments",
self.text_vectorizer(min_df=0.0001),
"comments",
),
]
),
),
("sampler", BorderlineSMOTE(random_state=0)),
(
"estimator",
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
),
]
)