in cost-based-ml/cost_based_ml.py [0:0]
def read_test_predictions(bucket, key):
s3 = boto3.resource('s3')
obj = s3.Object(bucket, key)
predictions_str = zlib.decompress(obj.get()['Body'].read(), 15+32).decode('utf-8')
names = predictions_str.split('\n', 1)[0].split(',')
# print names
# TODO a bit hacky, find a better way to parse
# if tag column is present, skip it
cols = (1, 3) if names[0] == 'tag' else (0, 2)
formats = ('bool', 'float')
names = [names[index] for index in cols]
data = np.loadtxt(StringIO(predictions_str), dtype = {'names': names, 'formats': formats}, delimiter=',', skiprows=1, usecols=cols)
return data