in src/pipeline/prepare.py [0:0]
def save_as_protobuf(X, Y, bucket, key):
"""Converts features and predictions matrices to recordio protobuf and
writes to S3
Args:
X:
2D numpy matrix with features
Y:
1D numpy matrix with predictions
bucket:
s3 bucket where recordio protobuf file will be staged
prefix:
s3 url prefix to stage prepared data to use for training the model
key:
protobuf file name to be staged
Returns:
s3 url with key to the protobuf data
"""
buf = io.BytesIO()
smac.write_spmatrix_to_sparse_tensor(buf, X, Y)
buf.seek(0)
obj = '{}'.format(key)
boto3.resource('s3').Bucket(bucket).Object(obj).upload_fileobj(buf)
return 's3://{}/{}'.format(bucket, obj)