in bugbug/bug_features.py [0:0]
def transform(self, bugs):
bugs_iter = iter(bugs())
reporter_experience_map = defaultdict(int)
author_ids = get_author_ids() if self.commit_data else None
def apply_transform(bug):
data = {}
for feature_extractor in self.feature_extractors:
res = feature_extractor(
bug,
reporter_experience=reporter_experience_map[bug["creator"]],
author_ids=author_ids,
)
if hasattr(feature_extractor, "name"):
feature_extractor_name = feature_extractor.name
else:
feature_extractor_name = feature_extractor.__class__.__name__
if res is None:
continue
if isinstance(res, (list, set)):
for item in res:
data[sys.intern(f"{item} in {feature_extractor_name}")] = True
continue
data[feature_extractor_name] = res
reporter_experience_map[bug["creator"]] += 1
summary = bug["summary"]
comments = [c["text"] for c in bug["comments"]]
for cleanup_function in self.cleanup_functions:
summary = cleanup_function(summary)
comments = [cleanup_function(comment) for comment in comments]
return {
"data": data,
"title": summary,
"first_comment": "" if len(comments) == 0 else comments[0],
"comments": " ".join(comments),
}
def apply_rollback(bugs_iter):
with Pool() as p:
yield from p.imap(
partial(bug_snapshot.rollback, when=self.rollback_when),
bugs_iter,
chunksize=1024,
)
if self.rollback:
bugs_iter = apply_rollback(bugs_iter)
return pd.DataFrame(apply_transform(bug) for bug in bugs_iter)