in lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TrainUtils.scala [92:160]
def trainCore(batchIndex: Int, trainParams: TrainParams, booster: LightGBMBooster,
log: Logger, hasValid: Boolean): Option[Int] = {
var isFinished = false
var iters = 0
val evalNames = booster.getEvalNames()
val evalCounts = evalNames.length
val bestScore = new Array[Double](evalCounts)
val bestScores = new Array[Array[Double]](evalCounts)
val bestIter = new Array[Int](evalCounts)
val partitionId = TaskContext.getPartitionId
var learningRate: Double = trainParams.learningRate
var bestIterResult: Option[Int] = None
while (!isFinished && iters < trainParams.numIterations) {
beforeTrainIteration(batchIndex, partitionId, iters, log, trainParams, booster, hasValid)
val newLearningRate = getLearningRate(batchIndex, partitionId, iters, log, trainParams,
learningRate)
if (newLearningRate != learningRate) {
log.info(s"LightGBM task calling booster.resetParameter to reset learningRate" +
s" (newLearningRate: $newLearningRate)")
booster.resetParameter(s"learning_rate=$newLearningRate")
learningRate = newLearningRate
}
isFinished = updateOneIteration(trainParams, booster, log, iters)
val trainEvalResults: Option[Map[String, Double]] =
if (trainParams.isProvideTrainingMetric.getOrElse(false) && !isFinished) {
val evalResults: Array[(String, Double)] = booster.getEvalResults(evalNames, 0)
evalResults.foreach { case (evalName: String, score: Double) => log.info(s"Train $evalName=$score") }
Option(Map(evalResults:_*))
} else {
None
}
val validEvalResults: Option[Map[String, Double]] = if (hasValid && !isFinished) {
val evalResults: Array[(String, Double)] = booster.getEvalResults(evalNames, 1)
val results: Array[(String, Double)] = evalResults.zipWithIndex.map { case ((evalName, evalScore), index) =>
log.info(s"Valid $evalName=$evalScore")
val cmp =
if (evalName.startsWith("auc") || evalName.startsWith("ndcg@") || evalName.startsWith("map@") ||
evalName.startsWith("average_precision"))
(x: Double, y: Double, tol: Double) => x - y > tol
else
(x: Double, y: Double, tol: Double) => x - y < tol
if (bestScores(index) == null || cmp(evalScore, bestScore(index), trainParams.improvementTolerance)) {
bestScore(index) = evalScore
bestIter(index) = iters
bestScores(index) = evalResults.map(_._2)
} else if (iters - bestIter(index) >= trainParams.earlyStoppingRound) {
isFinished = true
log.info("Early stopping, best iteration is " + bestIter(index))
bestIterResult = Some(bestIter(index))
}
(evalName, evalScore)
}
Option(Map(results:_*))
} else {
None
}
afterTrainIteration(batchIndex, partitionId, iters, log, trainParams, booster, hasValid, isFinished,
trainEvalResults, validEvalResults)
iters = iters + 1
}
bestIterResult
}