in datasets/bbbc-021/scripts/bbbc021-1-train-script.py [0:0]
def load_training_data(prefixListPath):
sm_channel_prefix = os.environ['SM_CHANNEL_TRAINING'] + "/"
os.chdir(sm_channel_prefix)
path = os.getcwd()
print("cwd path=")
print(path)
print("==")
files = os.listdir('.')
for f in files:
print(f)
print("==")
prefixListPath = sm_channel_prefix + prefixListPath
f = open(prefixListPath, "r")
trainPathList = []
labelPathList = []
subclassPathList = []
x_list = []
y_list = []
z_list = []
for prefix in f:
rprefix = prefix.rstrip()
trainPath = sm_channel_prefix + rprefix + "-train.npy"
labelPath = sm_channel_prefix + rprefix + "-label.npy"
subclassPath = sm_channel_prefix + rprefix + '-subclass.npy'
trainPathList.append(trainPath)
labelPathList.append(labelPath)
subclassPathList.append(subclassPath)
f.close()
pathListLength = len(trainPathList)
print("Prefix path list has {} entries".format(pathListLength))
for labelPath in labelPathList:
print("Loading {}".format(labelPath))
y_list.append(np.load(labelPath))
print("Concatenating...")
y_train = np.concatenate(y_list, axis=0)
trainCount=y_train.shape[0]
print("Label count={}".format(trainCount))
for subclassPath in subclassPathList:
print("Loading {}".format(subclassPath))
z_list.append(np.load(subclassPath))
print("Concatenating...")
z_train = np.concatenate(z_list, axis=0)
subclassCount=z_train.shape[0]
print("Subclass count={}".format(subclassCount))
##########################################################################
# Todo: create dynamic sizing model based on input dimensions.
# This script assumes input with 4 dimensions. It assumes 2D rather then 3D data, with 3 channels:
# image#, channels, y, x
#
# With channels=3, x and y = 128
#
# However, input is 3D and will be <#>, 3, 1, 128, 128
#
##########################################################################
trainDimArr = [ trainCount, 3, 128, 128 ]
trainDimTuple = tuple(trainDimArr)
x_train = np.zeros(trainDimTuple, dtype=np.uint16)
tIndex=0
lc=0
for trainPath in trainPathList:
if lc%10==0:
print("Loaded {} of {} train files".format(lc, pathListLength))
print("Loading {}".format(trainPath))
x_data = np.load(trainPath)
xLength = x_data.shape[0]
for di in range(xLength):
x_train[tIndex][0]=x_data[di][0][0]
x_train[tIndex][1]=x_data[di][1][0]
x_train[tIndex][2]=x_data[di][2][0]
tIndex+=1
lc+=1
x_shape = x_train.shape
y_shape = y_train.shape
z_shape = z_train.shape
print("x_shape=")
print(x_shape)
print("==")
print("y_shape=")
print(y_shape)
print("==")
print("z_shape=")
print(z_shape)
print("==")
return x_train, y_train, z_train