in scheduler.py [0:0]
def extract_movement_rep_and_sample(log_file):
"""Extracts data movement ingress and egress bytes from log.txt"""
movement_ingress_bytes = movement_egress_bytes = 0
replication_size = None
sample_rate = None
if os.path.exists(log_file):
with open(log_file, "r") as f:
for line in f:
# Extract data movement bytes
movement_match = re.search(r"Data movement:\s([\d.]+\s*[A-Z]*B) ingress,\s([\d.]+\s*[A-Z]*B) .* egress",
line)
if movement_match:
movement_ingress_bytes = parse_size(movement_match.group(1))
movement_egress_bytes = parse_size(movement_match.group(2))
# Extract replication size (overlap)
replication_match = re.search(r"Storage: .*?(\d+\.\d+\s*[A-Z]B)\s+overlap", line)
if replication_match:
replication_size = parse_size(replication_match.group(1))
# Extract sample rate (k=0.XXX) or k=1.000
sample_match = re.search(r"k=(1\.000)", line)
if sample_match:
sample_rate = 1.0
sample_match = re.search(r"k=(0\.\d+)", line)
if sample_match:
sample_rate = float(sample_match.group(1)) # Convert to float
return movement_ingress_bytes, movement_egress_bytes, replication_size, sample_rate