in mozetl/taar/taar_ensemble.py [0:0]
def build_stacked_datasets(dataset, folds):
# For each of k_folds, we apply the stacking
# function to the training fold.
# Where k_folds = 3, this will yield a list consisting
# of 3 RDDs. Each RDD is defined by the output of the
# `stacking` function.
def stacked_row_closure():
rec_map = load_recommenders()
recommender_list = [
rec_map[COLLABORATIVE].recommend, # Collaborative
rec_map[SIMILARITY].recommend, # Similarity
rec_map[LOCALE].recommend, # Locale
]
def inner(client_row):
return to_stacked_row(recommender_list, client_row)
return inner
wrapped_to_stacked_row = stacked_row_closure()
print("Number of folds: {}".format(len(folds)))
stacked_datasets = []
for fold in folds:
train_set = [f for f in folds if f != fold]
stacking_result = [
df.rdd.map(wrapped_to_stacked_row).filter(lambda x: x is not None)
for df in train_set
]
stacked_datasets.append(stacking_result)
return stacked_datasets