in modules/ml-ext/ml/src/main/java/org/apache/ignite/ml/recommendation/RecommendationTrainer.java [157:208]
private <O extends Serializable, S extends Serializable> RecommendationModel<O, S> train(
Dataset<EmptyContext, RecommendationDatasetData<O, S>> dataset, RecommendationModel<O, S> mdl) {
// Collect total set of objects and subjects (their identifiers).
Set<O> objects = dataset.compute(RecommendationDatasetData::getObjects, RecommendationTrainer::join);
Set<S> subjects = dataset.compute(RecommendationDatasetData::getSubjects, RecommendationTrainer::join);
// Generate initial model (object and subject matrices) initializing them with random values.
Map<O, Vector> objMatrix = mdl == null ?
generateRandomVectorForEach(objects, trainerEnvironment.randomNumbersGenerator()) :
new HashMap<>(mdl.getObjMatrix());
Map<S, Vector> subjMatrix = mdl == null ?
generateRandomVectorForEach(subjects, trainerEnvironment.randomNumbersGenerator()) :
new HashMap<>(mdl.getSubjMatrix());
if (mdl != null) {
for (O o : objects) {
if (!objMatrix.containsKey(o))
objMatrix.put(o, randomVector(k, trainerEnvironment.randomNumbersGenerator()));
}
for (S s : subjects) {
if (!subjMatrix.containsKey(s))
subjMatrix.put(s, randomVector(k, trainerEnvironment.randomNumbersGenerator()));
}
}
// SGD steps.
for (int i = 0; maxIterations == -1 || i < maxIterations; i++) {
int seed = i;
// Calculate gradient on reach partition and aggregate results.
MatrixFactorizationGradient<O, S> grad = dataset.compute(
(data, env) -> data.calculateGradient(
objMatrix,
subjMatrix,
batchSize,
seed ^ env.partition(),
regParam,
learningRate
),
RecommendationTrainer::sum
);
if (minMdlImprovement != 0 && calculateImprovement(grad) < minMdlImprovement)
break;
// Apply aggregated gradient.
grad.applyGradient(objMatrix, subjMatrix);
}
return new RecommendationModel<>(objMatrix, subjMatrix);
}