in models/metrics/nn_score.py [0:0]
def buildTrainValTest(pathAttrib,
shareTrain=0.8,
shareVal=0.2):
with open(pathAttrib, 'rb') as file:
data = json.load(file)
stats = getStatsOnDataset(data)
shareTest = max(0., 1. - shareTrain - shareVal)
targetTrain = {key: {value: stats[key][value] * shareTrain
for value in stats[key]} for key in stats}
targetVal = {key: {value: stats[key][value] * shareVal
for value in stats[key]} for key in stats}
targetTest = {key: {value: stats[key][value] * shareTest
for value in stats[key]} for key in stats}
keys = [key for key in data.keys()]
random.shuffle(keys)
outTrain = {}
outVal = {}
outTest = {}
trainStats = {key: {value: 0 for value in stats[key]} for key in stats}
valStats = {key: {value: 0 for value in stats[key]} for key in stats}
testStats = {key: {value: 0 for value in stats[key]} for key in stats}
for name in keys:
scoreTrain = 0
scoreVal = 0
scoreTest = 0
for category in data[name]:
label = data[name][category]
deltaTrain = max(0, targetTrain[category][label] - trainStats[category][label]) / (
targetTrain[category][label] + 1e-8)
deltaVal = max(0, targetVal[category][label] - valStats[category]
[label]) / (targetVal[category][label] + 1e-8)
deltaTest = max(0, targetTest[category][label] - testStats[category][label]) / (
targetTest[category][label] + 1e-8)
scoreTrain += deltaTrain**2
scoreVal += deltaVal**2
scoreTest += deltaTest**2
if scoreTrain >= 0.999 or scoreTrain >= max(scoreVal, scoreTest):
outTrain[name] = data[name]
updateStatsWithData(trainStats, data[name])
elif scoreVal >= scoreTest:
outVal[name] = data[name]
updateStatsWithData(valStats, data[name])
else:
outTest[name] = data[name]
updateStatsWithData(testStats, data[name])
stats = {"Train": trainStats, "Val": valStats, "Test": testStats}
return outTrain, outVal, outTest, stats