def read_test_predictions()

in cost-based-ml/cost_based_ml.py [0:0]


def read_test_predictions(bucket, key):
	s3 = boto3.resource('s3')
	obj = s3.Object(bucket, key)
	predictions_str = zlib.decompress(obj.get()['Body'].read(), 15+32).decode('utf-8')
        names = predictions_str.split('\n', 1)[0].split(',')
#	print names
	# TODO a bit hacky, find a better way to parse
	# if tag column is present, skip it
	cols = (1, 3) if names[0] == 'tag' else (0, 2)
 	formats = ('bool', 'float')
	names = [names[index] for index in cols]
	data = np.loadtxt(StringIO(predictions_str), dtype = {'names': names, 'formats': formats}, delimiter=',', skiprows=1, usecols=cols)
	return data