def gaudi_launcher()

in integration_tests/gaudi/conftest.py [0:0]


def gaudi_launcher(event_loop):
    @contextlib.contextmanager
    def docker_launcher(
        model_id: str,
        test_name: str,
    ):
        logger.info(
            f"Starting docker launcher for model {model_id} and test {test_name}"
        )


        port = 8080

        client = docker.from_env()

        container_name = f"tei-hpu-test-{test_name.replace('/', '-')}"

        try:
            container = client.containers.get(container_name)
            logger.info(
                f"Stopping existing container {container_name} for test {test_name}"
            )
            container.stop()
            container.wait()
        except NotFound:
            pass
        except Exception as e:
            logger.error(f"Error handling existing container: {str(e)}")

        tei_args = TEST_CONFIGS[test_name]["args"].copy()

        # add model_id to tei args
        tei_args.append("--model-id")
        tei_args.append(model_id)

        env = BASE_ENV.copy()
        env["HF_TOKEN"] = os.getenv("HF_TOKEN")

        # Add env config that is definied in the fixture parameter
        if "env_config" in TEST_CONFIGS[test_name]:
            env.update(TEST_CONFIGS[test_name]["env_config"].copy())

        volumes = [f"{DOCKER_VOLUME}:/data"]
        logger.debug(f"Using volume {volumes}")

        try:
            logger.info(f"Creating container with name {container_name}")

            # Log equivalent docker run command for debugging, this is not actually executed
            container = client.containers.run(
                DOCKER_IMAGE,
                command=tei_args,
                name=container_name,
                environment=env,
                detach=True,
                volumes=volumes,
                ports={"80/tcp": port},
                **HABANA_RUN_ARGS,
            )

            logger.info(f"Container {container_name} started successfully")

            # Start log streaming in a background thread
            log_thread = threading.Thread(
                target=stream_container_logs,
                args=(container, test_name),
                daemon=True,  # This ensures the thread will be killed when the main program exits
            )
            log_thread.start()

            # Add a small delay to allow container to initialize
            time.sleep(2)

            # Check container status after creation
            status = container.status
            logger.debug(f"Initial container status: {status}")
            if status not in ["running", "created"]:
                logs = container.logs().decode("utf-8")
                logger.error(f"Container failed to start properly. Logs:\n{logs}")

            yield ContainerLauncherHandle(client, container.name, port)

        except Exception as e:
            logger.error(f"Error starting container: {str(e)}")
            # Get full traceback for debugging
            import traceback

            logger.error(f"Full traceback:\n{traceback.format_exc()}")
            raise
        finally:
            try:
                container = client.containers.get(container_name)
                logger.info(f"Stopping container {container_name}")
                container.stop()
                container.wait()

                container_output = container.logs().decode("utf-8")
                print(container_output, file=sys.stderr)

                container.remove()
                logger.info(f"Container {container_name} removed successfully")
            except NotFound:
                pass
            except Exception as e:
                logger.warning(f"Error cleaning up container: {str(e)}")

    return docker_launcher