in aiops/RCRank/model/modules/QueryFormer/utils.py [0:0]
def get_job_table_sample(workload_file_name, num_materialized_samples = 1000):
tables = []
samples = []
with open(workload_file_name + ".csv", 'r') as f:
data_raw = list(list(rec) for rec in csv.reader(f, delimiter='#'))
for row in data_raw:
tables.append(row[0].split(','))
if int(row[3]) < 1:
print("Queries must have non-zero cardinalities")
exit(1)
print("Loaded queries with len ", len(tables))
num_bytes_per_bitmap = int((num_materialized_samples + 7) >> 3)
with open(workload_file_name + ".bitmaps", 'rb') as f:
for i in range(len(tables)):
four_bytes = f.read(4)
if not four_bytes:
print("Error while reading 'four_bytes'")
exit(1)
num_bitmaps_curr_query = int.from_bytes(four_bytes, byteorder='little')
bitmaps = np.empty((num_bitmaps_curr_query, num_bytes_per_bitmap * 8), dtype=np.uint8)
for j in range(num_bitmaps_curr_query):
# Read bitmap
bitmap_bytes = f.read(num_bytes_per_bitmap)
if not bitmap_bytes:
print("Error while reading 'bitmap_bytes'")
exit(1)
bitmaps[j] = np.unpackbits(np.frombuffer(bitmap_bytes, dtype=np.uint8))
samples.append(bitmaps)
print("Loaded bitmaps")
table_sample = []
for ts, ss in zip(tables,samples):
d = {}
for t, s in zip(ts,ss):
tf = t.split(' ')[0]
d[tf] = s
table_sample.append(d)
return table_sample