def run()

in torchci/log_classifier/classify_log.py [0:0]


    def run(self, lines):
        """Find the highest-priority matching rule from this log.

        This uses multiple processes to match lines in parallel. Certain logs
        (long logs with long lines, e.g. windows logs) cause a non-parallel
        implementation to timeout on lambda.
        """
        # Split the work into buckets so we can parallelize.
        num_buckets = 6  # hard-coded because AWS Lambda supports max 6 vcpus.
        buckets = [[] for _ in range(num_buckets)]
        lines_with_num = list(enumerate(lines))
        for elem, bucket in zip(lines_with_num, cycle(buckets)):
            bucket.append(elem)

        # create a list to keep all processes
        processes = []

        # create a list to keep connections
        parent_connections = []

        # create a process per bucket
        for bucket in buckets:
            # create a pipe for communication
            # we are doing this manually because AWS lambda doesn't have shm
            # (and thus can't use most higher-order multiprocessing primitives)
            parent_conn, child_conn = Pipe()
            parent_connections.append(parent_conn)

            # send the work over
            process = Process(
                target=self.process_bucket,
                args=(
                    bucket,
                    child_conn,
                ),
            )
            processes.append(process)

        for process in processes:
            process.start()
        for process in processes:
            process.join()

        # get the best match from all the processes
        for parent_conn in parent_connections:
            match = parent_conn.recv()
            if match == None:
                continue
            if match.rule.priority > self._best_match.rule.priority:
                self._best_match = match