in scripts/weka.py [0:0]
def get_warmup_percentages(input_paths: Union[str, List[str]]) -> dict:
"""
Calculate the warm-up percentage for each folder by checking all files within.
Args:
input_paths: Either a list of paths or a YAML-style string with paths
Returns:
dict: Dictionary mapping folder paths to their warm-up percentages
"""
folder_paths = parse_input_paths(input_paths)
print(f"Checking warmup status for {len(folder_paths)} folders...")
results = {}
for folder_path in folder_paths:
try:
folder_path = folder_path.rstrip("/")
# print(f"\n=== Processing {folder_path} ===")
# Get all files and pass them directly to weka command
all_files = glob.glob(f"{folder_path}/*")
if not all_files:
print(f"No files found in {folder_path}")
results[folder_path] = 0.0
continue
# print(f"Found {len(all_files)} files")
# Pass all files as separate arguments
cmd = (
["weka", "fs", "tier", "location"]
+ all_files
+ ["--no-header", "--raw-units", "-o", "path,size,ssdRead"]
)
output = subprocess.check_output(cmd, text=True)
# print("First line of output:", output.split("\n")[0]) # Fixed debug print
total_size = 0
total_cached = 0
for line in output.strip().split("\n"):
if line:
parts = line.split()
# Format is: path size_value B cached_value B
# Example: /path/to/file 1234 B 1234 B
if len(parts) >= 5: # Make sure we have all parts
size = float(parts[-4]) # size value is 2nd to last before 'B'
cached = float(parts[-2]) # cached value is 2nd to last before 'B'
total_size += size
total_cached += cached
if total_size > 0:
warmup_percentage = (total_cached / total_size) * 100
results[folder_path] = round(warmup_percentage, 2)
print(
f"{folder_path}: {results[folder_path]}% warmed up ({total_cached/1e9:.2f}GB / {total_size/1e9:.2f}GB)"
)
else:
results[folder_path] = 0.0
print(f"{folder_path}: No data found")
except subprocess.CalledProcessError as e:
print(f"Error processing {folder_path}: {str(e)}")
results[folder_path] = -1
return results