in dags/inference/utils/maxtext_gpu_microbenchmark_jsonl_converter.py [0:0]
def extract_and_write_to_jsonl_pattern(input_file, output_file):
"""
Extracts AutoRegressive results from a text file using patterns
and writes them to a JSONL file.
Args:
input_file (str): Path to the input text file.
output_file (str): Path to the output JSONL file.
"""
extraction_patterns = {
"ar_step_average_time_ms": r"AR step average time: (\d+\.\d+) ms",
"ar_step_average_time_per_seq_ms": r"AR step average time per seq: (\d+\.\d+) ms",
"ar_global_batch_size": r"AR global batch size: (\d+)",
"ar_throughput_tokens_per_second": r"AR throughput: (\d+\.\d+) tokens/second",
"ar_memory_bandwidth_gb_per_second": r"AR memory bandwidth per device: (\d+\.\d+) GB/s",
}
results = dict()
results["dimensions"] = dict()
results["metrics"] = dict()
try:
with open(input_file, "r") as f:
for line in f:
line = line.strip()
for key, pattern in extraction_patterns.items():
match = re.search(pattern, line)
if match:
if "." in match.group(1):
results["metrics"][key] = float(match.group(1))
else:
results["metrics"][key] = int(match.group(1))
break # Move to the next line once a match is found
if results:
with jsonlines.open(output_file, "w") as writter:
writter.write(results)
print(f"Extracted results written to {output_file}")
else:
print("No AutoRegressive results found in the input file.")
except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found.")
except Exception as e:
print(f"An error occurred: {e}")