in src/nanotron/helpers.py [0:0]
def write_to_csv(csv_filename, table_log, model_tflops, slurm_job_id):
"""Write benchmark results to a CSV file with file locking using fcntl."""
import fcntl
try:
# Check if csv_filename is valid
if not csv_filename:
logger.warning("No benchmark CSV path specified - skipping CSV output")
return
# Create output directory if needed
csv_dir = os.path.dirname(csv_filename)
if csv_dir: # Only try to create directory if path has a directory component
os.makedirs(csv_dir, exist_ok=True)
# Format row data
header = [item.tag for item in table_log]
row = [get_formatted_value(item) for item in table_log]
# Use fcntl for file locking
max_attempts = 10
attempt = 0
log_rank(
f"Attempting to write benchmark results to CSV file: {csv_filename}",
logger=logger,
level=logging.INFO,
)
while attempt < max_attempts:
try:
with open(csv_filename, mode="a+", newline="") as f:
# Get exclusive lock
fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
log_rank(
f"Acquired lock for CSV file: {csv_filename}",
logger=logger,
level=logging.INFO,
)
try:
# Check if file is empty/new
f.seek(0)
first_char = f.read(1)
if not first_char: # Empty file
writer = csv.writer(f)
writer.writerow(header)
# Go to end of file for append
f.seek(0, os.SEEK_END)
writer = csv.writer(f)
writer.writerow(row)
f.flush()
os.fsync(f.fileno())
break
finally:
# Release lock
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
log_rank(
f"Successfully wrote to CSV file: {csv_filename}. Releasing lock...",
logger=logger,
level=logging.INFO,
)
except BlockingIOError:
# Another process has the lock, wait and retry
log_rank(
f"Another process has the lock for CSV file: {csv_filename}, waiting and retrying attempt {attempt + 1} of {max_attempts}...",
logger=logger,
level=logging.INFO,
)
attempt += 1
time.sleep(0.1) # Wait 100ms before retrying
if attempt == max_attempts:
logger.error(f"Failed to acquire lock for {csv_filename} after {max_attempts} attempts")
except Exception as e:
logger.error(f"Unexpected error writing to {csv_filename}: {str(e)}")
return