in torchci/log_classifier/classify_log.py [0:0]
def run(self, lines):
"""Find the highest-priority matching rule from this log.
This uses multiple processes to match lines in parallel. Certain logs
(long logs with long lines, e.g. windows logs) cause a non-parallel
implementation to timeout on lambda.
"""
# Split the work into buckets so we can parallelize.
num_buckets = 6 # hard-coded because AWS Lambda supports max 6 vcpus.
buckets = [[] for _ in range(num_buckets)]
lines_with_num = list(enumerate(lines))
for elem, bucket in zip(lines_with_num, cycle(buckets)):
bucket.append(elem)
# create a list to keep all processes
processes = []
# create a list to keep connections
parent_connections = []
# create a process per bucket
for bucket in buckets:
# create a pipe for communication
# we are doing this manually because AWS lambda doesn't have shm
# (and thus can't use most higher-order multiprocessing primitives)
parent_conn, child_conn = Pipe()
parent_connections.append(parent_conn)
# send the work over
process = Process(
target=self.process_bucket,
args=(
bucket,
child_conn,
),
)
processes.append(process)
for process in processes:
process.start()
for process in processes:
process.join()
# get the best match from all the processes
for parent_conn in parent_connections:
match = parent_conn.recv()
if match == None:
continue
if match.rule.priority > self._best_match.rule.priority:
self._best_match = match