perf_testing/scripts/highspeed_write_nonzero.py (54 lines of code) (raw):
import os
import shutil
import time
import argparse
import subprocess
import mmap
import io
from multiprocessing import Pool, cpu_count
# Function to create files using dd command
def create_file_dd(file_index, folder, source_file, timestamp):
filename = os.path.join(folder, f'ddFile_{timestamp}_{file_index}')
block_size = 1 # in GB
count = 36
file_size_gb = (block_size * count)
command = f"dd if={source_file} of={filename} bs={block_size}G count={count} oflag=direct"
start_time = time.time()
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
end_time = time.time()
if result.returncode != 0:
return (filename, 0, 0.0, f"Error creating file {filename}: {result.stderr.decode().strip()}")
write_time = end_time - start_time
write_speed = (file_size_gb * 1024) / write_time # MB/s
return (filename, write_time, write_speed, file_size_gb, None)
# Main function to handle parallel execution
def main(folder, num_files, source_file):
if not os.path.exists(folder):
os.makedirs(folder)
timestamp = int(time.time()) # Get current timestamp for file naming
start_time = time.time()
results = []
with Pool(processes=cpu_count()) as pool: # Pool of workers based on the CPU count
futures=[]
futures += [pool.apply_async(create_file_dd, (i, folder, source_file, timestamp)) for i in range(num_files)]
# Collect results from async operations
for future in futures:
result = future.get()
if result[4] is None: # No error
results.append(result)
else:
print(result[4]) # Print error messages
end_time = time.time()
total_time = end_time - start_time
total_data_written = sum(r[3] for r in results) # in GB
speed_gbps = (total_data_written *8 ) / total_time # Convert GB to Gigabits (1 GB = 8 Gb)
throughput = (total_data_written * 1024) / total_time
print(f"Number of files written: {num_files}")
print(f"Total amount of data written: {total_data_written:.2f} GB")
print(f"Total time taken: {total_time:.2f} seconds")
print(f"Overall Speed: {speed_gbps:.2f} Gbps")
print(f"Throughput: {throughput:.2f} MiB/s")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Create multiple files using various methods in parallel.')
parser.add_argument('folder', type=str, help='The folder where the files will be written.')
parser.add_argument('num_files', type=int, help='The number of files to create.')
parser.add_argument('source_file', type=str, help='The source file to copy data from.')
args = parser.parse_args()
main(args.folder, args.num_files, args.source_file)
# python3 highspeed_write_nonzero.py <mntPath>~/drs/random_data_test/ <noOfFiles>5 <sourceFile>/mnt/azcopy_test_180GB.log