in benchmarking/pipemode_benchmark/dataset.py [0:0]
def _upload_to_s3(self):
print "Creating Dataset:", self
def upload(local_file, copy_index):
boto_session = boto3.Session()
s3 = boto_session.resource('s3')
bucket = s3.Bucket(self.bucket_name)
"""Returns a dataset and index if uploading failed"""
key = '{}/{}/file_{}.recordio'.format(self.prefix, self.name, str(copy_index).zfill(6))
try:
bucket.put_object(Key=key, Body=open(local_file, 'rb'))
if copy_index % 50 == 0:
sys.stdout.write('.')
sys.stdout.flush()
except Exception as ex:
print 'Error uploading:', local_file
print ex.message
raise Exception("Upload Failed: " + local_file)
executor = cf.ProcessPoolExecutor()
futures = []
uploaded_file_index = 0
for file_index in range(self.num_files):
for copy_index in range(self.num_copies):
local_file = os.path.join(self.root_dir, '{}-{}.recordio'.format(self.name, str(file_index)))
futures.append(executor.submit(upload(local_file, uploaded_file_index)))
uploaded_file_index += 1