def load_training_data()

in datasets/bbbc-021/scripts/bbbc021-1-train-script.py [0:0]


def load_training_data(prefixListPath):
    sm_channel_prefix = os.environ['SM_CHANNEL_TRAINING'] + "/"
    os.chdir(sm_channel_prefix)
    path = os.getcwd()
    print("cwd path=")
    print(path)
    print("==")
    files = os.listdir('.')
    for f in files:
        print(f)
    print("==")
    prefixListPath = sm_channel_prefix + prefixListPath
    f = open(prefixListPath, "r")
    trainPathList = []
    labelPathList = []
    subclassPathList = []
    x_list = []
    y_list = []
    z_list = []
    
    for prefix in f:
        rprefix = prefix.rstrip()
        trainPath = sm_channel_prefix + rprefix + "-train.npy"
        labelPath = sm_channel_prefix + rprefix + "-label.npy"
        subclassPath = sm_channel_prefix + rprefix + '-subclass.npy'
        trainPathList.append(trainPath)
        labelPathList.append(labelPath)
        subclassPathList.append(subclassPath)
    f.close()
    
    pathListLength = len(trainPathList)
    print("Prefix path list has {} entries".format(pathListLength))
    
    for labelPath in labelPathList:
        print("Loading {}".format(labelPath))
        y_list.append(np.load(labelPath))
    print("Concatenating...")
    y_train = np.concatenate(y_list, axis=0)
    trainCount=y_train.shape[0]
    print("Label count={}".format(trainCount))

    for subclassPath in subclassPathList:
        print("Loading {}".format(subclassPath))
        z_list.append(np.load(subclassPath))
    print("Concatenating...")
    z_train = np.concatenate(z_list, axis=0)
    subclassCount=z_train.shape[0]
    print("Subclass count={}".format(subclassCount))

    ##########################################################################
    # Todo: create dynamic sizing model based on input dimensions.
    # This script assumes input with 4 dimensions. It assumes 2D rather then 3D data, with 3 channels:
    #    image#, channels, y, x 
    #  
    # With channels=3, x and y = 128
    #
    # However, input is 3D and will be <#>, 3, 1, 128, 128
    #
    ##########################################################################
    trainDimArr = [ trainCount, 3, 128, 128 ]
    trainDimTuple = tuple(trainDimArr)
    x_train = np.zeros(trainDimTuple, dtype=np.uint16)
    tIndex=0
    lc=0
    for trainPath in trainPathList:
        if lc%10==0:
            print("Loaded {} of {} train files".format(lc, pathListLength))
        print("Loading {}".format(trainPath))
        x_data = np.load(trainPath)
        xLength = x_data.shape[0]
        for di in range(xLength):
            x_train[tIndex][0]=x_data[di][0][0]
            x_train[tIndex][1]=x_data[di][1][0]
            x_train[tIndex][2]=x_data[di][2][0]
            tIndex+=1
        lc+=1

    x_shape = x_train.shape
    y_shape = y_train.shape
    z_shape = z_train.shape
    print("x_shape=")
    print(x_shape)
    print("==")
    print("y_shape=")
    print(y_shape)
    print("==")
    print("z_shape=")
    print(z_shape)
    print("==")
    return x_train, y_train, z_train