def buildTrainValTest()

in models/metrics/nn_score.py [0:0]


def buildTrainValTest(pathAttrib,
                      shareTrain=0.8,
                      shareVal=0.2):

    with open(pathAttrib, 'rb') as file:
        data = json.load(file)

    stats = getStatsOnDataset(data)

    shareTest = max(0., 1. - shareTrain - shareVal)

    targetTrain = {key: {value: stats[key][value] * shareTrain
                         for value in stats[key]} for key in stats}
    targetVal = {key: {value: stats[key][value] * shareVal
                       for value in stats[key]} for key in stats}
    targetTest = {key: {value: stats[key][value] * shareTest
                        for value in stats[key]} for key in stats}

    keys = [key for key in data.keys()]
    random.shuffle(keys)

    outTrain = {}
    outVal = {}
    outTest = {}

    trainStats = {key: {value: 0 for value in stats[key]} for key in stats}
    valStats = {key: {value: 0 for value in stats[key]} for key in stats}
    testStats = {key: {value: 0 for value in stats[key]} for key in stats}

    for name in keys:

        scoreTrain = 0
        scoreVal = 0
        scoreTest = 0

        for category in data[name]:
            label = data[name][category]
            deltaTrain = max(0, targetTrain[category][label] - trainStats[category][label]) / (
                targetTrain[category][label] + 1e-8)
            deltaVal = max(0, targetVal[category][label] - valStats[category]
                           [label]) / (targetVal[category][label] + 1e-8)
            deltaTest = max(0, targetTest[category][label] - testStats[category][label]) / (
                targetTest[category][label] + 1e-8)

            scoreTrain += deltaTrain**2
            scoreVal += deltaVal**2
            scoreTest += deltaTest**2

        if scoreTrain >= 0.999 or scoreTrain >= max(scoreVal, scoreTest):
            outTrain[name] = data[name]
            updateStatsWithData(trainStats, data[name])
        elif scoreVal >= scoreTest:
            outVal[name] = data[name]
            updateStatsWithData(valStats, data[name])
        else:
            outTest[name] = data[name]
            updateStatsWithData(testStats, data[name])

    stats = {"Train": trainStats, "Val": valStats, "Test": testStats}

    return outTrain, outVal, outTest, stats