#  Copyright 2024 Alibaba, Inc. or its affiliates.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       https://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import os
import posixpath
import time
import typing
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, ConfigDict, Field
from pydantic.alias_generators import to_pascal
from Tea.exceptions import TeaException

from ..api.base import PaginatedResult
from ..common.consts import StoragePathCategory
from ..common.logging import get_logger
from ..common.oss_utils import OssUriObj, is_oss_uri, upload
from ..common.utils import (
    is_dataset_id,
    is_filesystem_uri,
    is_odps_table_uri,
    name_from_base,
    print_table,
    random_str,
    retry,
    to_plain_text,
)
from ..exception import UnexpectedStatusException
from ..session import Session, get_default_session

if typing.TYPE_CHECKING:
    from ..estimator import FileSystemInputBase

logger = get_logger(__name__)


def as_oss_dir_uri(uri: str):
    return uri if uri.endswith("/") else uri + "/"


DEFAULT_OUTPUT_MODEL_CHANNEL_NAME = "model"
DEFAULT_CHECKPOINT_CHANNEL_NAME = "checkpoints"
DEFAULT_TENSORBOARD_CHANNEL_NAME = "tensorboard"


class SpotStrategy(str, Enum):
    SpotWithPriceLimit = "SpotWithPriceLimit"
    SpotAsPriceGo = "SpotAsPriceGo"

    def __repr__(self):
        return self.value


class ResourceType(str, Enum):
    Lingjun = "Lingjun"
    General = "General"


class BaseAPIModel(BaseModel):

    model_config = ConfigDict(
        alias_generator=to_pascal,
        populate_by_name=True,
    )

    def model_dump(self, **kwargs) -> Dict[str, Any]:
        kwargs.update({"by_alias": True, "exclude_none": True})
        return super().model_dump(**kwargs)

    def to_dict(self):
        return self.model_dump()


class TrainingJobStatus(object):
    CreateFailed = "CreateFailed"
    InitializeFailed = "InitializeFailed"
    Succeed = "Succeed"
    Failed = "Failed"
    Terminated = "Terminated"
    Creating = "Creating"
    Created = "Created"
    Initializing = "Initializing"
    Submitted = "Submitted"
    Running = "Running"

    @classmethod
    def completed_status(cls):
        return [
            cls.InitializeFailed,
            cls.Succeed,
            cls.Failed,
            cls.Terminated,
        ]

    @classmethod
    def failed_status(cls):
        return [
            cls.InitializeFailed,
            cls.Failed,
            cls.CreateFailed,
        ]


class UserVpcConfig(BaseAPIModel):
    """UserVpcConfig represents the VPC configuration for the training job instance."""

    vpc_id: str = Field(
        ...,
        description="Specifies the ID of the VPC that training job instance connects to.",
    )
    security_group_id: str = Field(
        ...,
        description="The ID of the security group that training job instances belong to.",
    )
    switch_id: Optional[str] = Field(
        None,
        description="The ID of the vSwitch to which the instance belongs. Defaults to None.",
    )
    extended_cidrs: Optional[List[str]] = Field(
        None,
        description="The CIDR blocks configured for the ENI of the training job instance. "
        "If it is not specified, the CIDR block will be configured as the same as the VPC "
        "network segmentation, which means that the training job instance can access all "
        "resources in the VPC. Defaults to None.",
    )


class ExperimentConfig(BaseAPIModel):
    """ExperimentConfig is used to configure the experiment to which the job belongs."""

    experiment_id: str = Field(
        ...,
        description="Specifies the ID of the experiment that training job instance belongs to.",
    )


class OssLocation(BaseAPIModel):
    """OSS location."""

    bucket: str = Field(..., description="OSS bucket name.")
    key: str = Field(..., description="Object key in the OSS bucket.")
    endpoint: Optional[str] = Field(None, description="OSS service endpoint URL.")


class CodeDir(BaseAPIModel):
    """Source code location"""

    location_value: Union[OssLocation, Dict[str, Any]] = Field(
        ..., description="Location of the code directory."
    )
    location_type: str = Field(
        ..., description="Type of the code directory location, e.g., OSS."
    )


# HyperParameter
class HyperParameter(BaseAPIModel):
    """A hyperparameter for a training job."""

    value: str = Field(..., description="Value of the hyperparameter.")
    name: str = Field(..., description="Name of the hyperparameter.")


class InstanceSpec(BaseAPIModel):
    """Instance resource configuration"""

    memory: str = Field(..., description="Memory allocation for the instance.")
    cpu: str = Field(..., alias="CPU", description="CPU allocation for the instance.")
    gpu: str = Field(..., alias="GPU", description="GPU allocation for the instance.")
    shared_memory: Optional[str] = Field(
        None, description="Shared memory allocation, if applicable."
    )


class ComputeResource(BaseAPIModel):
    """Compute Resource Configuration."""

    ecs_count: Optional[int] = Field(None, description="Number of ECS instances.")
    ecs_spec: Optional[str] = Field(None, description="Specification of ECS instances.")
    instance_count: Optional[int] = Field(None, description="Number of instances.")
    instance_spec: Optional[InstanceSpec] = Field(
        None, description="Specification for instances."
    )


# URI Input and Output
class UriInput(BaseAPIModel):
    """URI Input for a training job."""

    name: str = Field(..., description="Name of the input.")
    input_uri: str = Field(..., description="URI of the input data.")


class UriOutput(BaseAPIModel):
    """URI Output for a training job."""

    name: str = Field(..., description="Name of the output.")
    output_uri: str = Field(..., description="URI of the output data.")


class DatasetConfig(BaseAPIModel):
    """Dataset Configuration"""

    dataset_id: str = Field(..., description="Unique ID of the dataset.")
    name: Optional[str] = Field(None, description="Name of the dataset.")
    dataset_name: Optional[str] = Field(
        None, description="Alternative name of the dataset."
    )


class Channel(BaseAPIModel):
    """Channel Configuration."""

    name: str = Field(..., description="Name of the channel.")
    description: Optional[str] = Field(None, description="Description of the channel.")
    required: Optional[bool] = Field(
        None, description="Indicates if the channel is required."
    )
    supported_channel_types: Optional[List[str]] = Field(
        None, description="Supported types for this channel."
    )
    properties: Optional[Dict[str, Any]] = Field(
        None, description="Additional properties of the channel."
    )


# HyperParameter Definition
class HyperParameterDefinition(BaseAPIModel):
    """HyerParameter Definition."""

    name: str = Field(..., description="Name of the hyperparameter.")
    type: Optional[str] = Field(None, description="Type of the hyperparameter.")
    default_value: Optional[str] = Field(
        None, description="Default value of the hyperparameter."
    )
    description: Optional[str] = Field(
        None, description="Description of the hyperparameter."
    )
    required: bool = Field(
        False, description="Indicates if the hyperparameter is required."
    )


class SchedulerConfig(BaseAPIModel):
    max_running_time_in_seconds: Optional[int] = None


class MetricDefinition(BaseAPIModel):
    description: Optional[str] = Field(None, description="Description of the metric.")
    name: str = Field(..., description="Name of the metric.")
    regex: str = Field(
        ..., description="Regular expression used for capturing the metric."
    )


class AlgorithmSpec(BaseAPIModel):
    """Algorithm Specification."""

    command: List[str] = Field(..., description="Command to run the training job.")
    image: str = Field(..., description="Docker image for the training job.")
    supported_channel_types: List[str] = Field(default_factory=list)
    output_channels: List[Channel] = Field(
        default_factory=list, description="Output channels."
    )
    input_channels: List[Channel] = Field(
        default_factory=list, description="Input channels."
    )
    supports_distributed_training: Optional[bool] = Field(
        True, description="Whether the algorithm supports distributed training."
    )
    supported_instance_types: Optional[List[str]] = Field(
        None, description="Supported instance types."
    )
    metric_definitions: Optional[List[MetricDefinition]] = Field(
        None, description="Metric definitions."
    )
    hyperparameter_definitions: List[HyperParameterDefinition] = Field(
        default_factory=list,
        alias="HyperParameters",
        description="Hyperparameter definitions.",
    )
    job_type: str = Field(default="PyTorchJob")
    code_dir: Optional[CodeDir] = Field(None, description="Source code location.")
    customization: Optional[Dict[str, Any]] = Field(
        None, description="Whether the algorithm supports customize code."
    )


class ModelRecipeSpec(BaseAPIModel):
    compute_resource: Optional[ComputeResource] = None
    hyperparameters: List[HyperParameter] = Field(
        default_factory=list, alias="HyperParameters"
    )
    inputs: List[Union[UriInput, DatasetConfig]] = Field(
        default_factory=list, alias="InputChannels"
    )
    scheduler: Optional[SchedulerConfig] = None
    supported_instance_types: Optional[List[str]] = None
    algorithm_spec: Optional[AlgorithmSpec] = None
    algorithm_version: Optional[str] = None
    algorithm_provider: Optional[str] = None
    algorithm_name: Optional[str] = None
    environments: Optional[Dict[str, str]] = None
    requirements: Optional[List[str]] = None


class SpotSpec(BaseAPIModel):
    spot_strategy: SpotStrategy = Field(
        ...,
        description="Spot instance strategy, support 'SpotWithPriceLimit', 'SpotAsPriceGo'",
    )
    spot_discount_limit: Optional[float] = Field(
        None,
        description="Spot instance discount limit, maximum 2 decimal places, "
        "required when spot_strategy is 'SpotWithPriceLimit'."
        "For example, 0.5 means 50% off the original price.",
    )


class TrainingJob(BaseAPIModel):
    """TrainingJob represents a training job in the PAI service."""

    algorithm_id: Optional[str] = None
    algorithm_name: Optional[str] = None
    algorithm_provider: Optional[str] = None
    algorithm_version: Optional[str] = None
    algorithm_spec: Optional[AlgorithmSpec] = None
    compute_resource: Optional[ComputeResource] = None
    scheduler: Optional[SchedulerConfig] = None
    experiment_config: Optional[Dict[str, Any]] = None
    inputs: List[Union[UriInput, DatasetConfig]] = Field(
        default=list, alias="InputChannels"
    )
    outputs: List[Union[UriOutput, DatasetConfig]] = Field(
        default=list, alias="OutputChannels"
    )
    hyperparameters: List[HyperParameter] = Field(
        default_factory=list, alias="HyperParameters"
    )
    labels: Optional[List[Dict[str, str]]] = Field(default_factory=list)
    training_job_description: Optional[str] = None
    training_job_id: Optional[str] = None
    training_job_name: Optional[str] = None
    workspace_id: Optional[str] = None
    training_job_url: Optional[str] = None
    status: Optional[str] = None
    reason_code: Optional[str] = None
    reason_message: Optional[str] = None

    def __hash__(self):
        return hash(self.training_job_id)

    def __eq__(self, other: "TrainingJob"):
        return (
            isinstance(other, TrainingJob)
            and self.training_job_id == other.training_job_id
        )

    @property
    def id(self):
        return self.training_job_id

    @classmethod
    def get(cls, training_job_id, session: Session = None) -> "TrainingJob":
        session = session or get_default_session()
        res = session.training_job_api.get(training_job_id=training_job_id)
        return cls.model_validate(res)

    @classmethod
    def list(
        cls,
        status: Optional[str] = None,
        session: Optional[Session] = None,
        page_size: int = 50,
        page_number: int = 1,
    ):
        session = session or get_default_session()
        res = session.training_job_api.list(
            status=status, page_size=page_size, page_number=page_number
        )
        return [cls.model_validate(item) for item in res.items]

    def output_path(self, channel_name="model"):
        for output_channel in self.outputs:
            if output_channel.name == channel_name:
                return output_channel.output_uri
        raise RuntimeError(
            f"Output channel is not specified: channel_name={channel_name}"
        )

    @property
    def console_uri(self):
        if not self.training_job_id:
            raise ValueError("The TrainingJob is not submitted")

        return self.training_job_url

    def wait(self, interval: int = 5, show_logs: bool = True):
        session = get_default_session()
        self._refresh_status()

        if show_logs:
            job_log_printer = _TrainingJobLogPrinter(
                training_job_id=self.training_job_id, page_size=20, session=session
            )
            job_log_printer.start()
        else:
            job_log_printer = None
        try:
            while not self.is_completed():
                time.sleep(interval)
        finally:
            if job_log_printer:
                job_log_printer.stop(wait=True)

        self._on_job_completed()

    def _on_job_completed(self):
        # Print an empty line to separate the training job logs and the following logs
        print()
        if self.status == TrainingJobStatus.Succeed:
            print(
                f"Training job ({self.training_job_id}) succeeded, you can check the"
                f" logs/metrics/output in  the console:\n{self.console_uri}"
            )
        elif self.status == TrainingJobStatus.Terminated:
            print(
                f"Training job is ended with status {self.status}: "
                f"reason_code={self.reason_code}, reason_message={self.reason_message}."
                f"Check the training job in the console:\n{self.console_uri}"
            )
        elif self.status in TrainingJobStatus.failed_status():
            print(
                f"Training job ({self.training_job_id}) failed, please check the logs"
                f" in the console: \n{self.console_uri}"
            )

            message = f"TrainingJob failed: name={self.training_job_name}, "
            f"training_job_id={self.training_job_id}, "
            f"reason_code={self.reason_code}, status={self.status}, "
            f"reason_message={self.reason_message}"

            raise UnexpectedStatusException(message=message, status=self.status)

    def _refresh_status(self):
        """Reload the training job from the PAI Service,"""
        session = get_default_session()
        training_job = type(self).model_validate(
            session.training_job_api.get(training_job_id=self.training_job_id)
        )
        self.status = training_job.status

    def is_succeeded(self):
        """Return True if the training job is succeeded"""
        self._refresh_status()
        return self.status == TrainingJobStatus.Succeed

    @retry(wait_secs=10)
    def is_completed(self):
        """Return True if the training job is completed, including failed status"""
        if self.status in TrainingJobStatus.completed_status():
            return True
        self._refresh_status()

        return self.status in TrainingJobStatus.completed_status()


class _TrainingJobLogPrinter(object):
    """A class used to print logs for a training job"""

    executor = ThreadPoolExecutor(5)

    def __init__(
        self, training_job_id: str, page_size=10, session: Optional[Session] = None
    ):
        self.training_job_id = training_job_id
        self.session = session
        self.page_size = page_size
        self._future = None
        self._stop = False

    def _list_logs_api(self, page_number: int = 1):
        try:
            res = self.session.training_job_api.list_logs(
                self.training_job_id,
                page_number=page_number,
                page_size=self.page_size,
            )
            return res
        except TeaException as e:
            # hack: Backend service may raise an exception when the training job
            # instance is not found.
            if e.code == "TRAINING_JOB_INSTANCE_NOT_FOUND":
                return PaginatedResult(items=[], total_count=0)
            else:
                raise e

    def _list_logs(self):
        page_number, page_offset = 1, 0
        # print training job logs.
        while not self._stop:
            res = self._list_logs_api(page_number=page_number)
            # 1. move to next page
            if len(res.items) == self.page_size:
                # print new logs starting from page_offset
                self._print_logs(logs=res.items[page_offset:])
                page_number += 1
                page_offset = 0
            # 2. stay at the current page.
            else:
                if len(res.items) > page_offset:
                    # print new logs starting from page_offset
                    self._print_logs(logs=res.items[page_offset:])
                    page_offset = len(res.items)
                time.sleep(1)

        # When _stop is True, wait and print remaining logs.
        time.sleep(10)
        while True:
            res = self._list_logs_api(page_number=page_number)
            # There maybe more logs in the next page
            if len(res.items) == self.page_size:
                self._print_logs(logs=res.items[page_offset:])
                page_number += 1
                page_offset = 0
            # No more logs in the next page.
            else:
                if len(res.items) > page_offset:
                    self._print_logs(logs=res.items[page_offset:])
                break

    def _print_logs(self, logs: List[str]):
        for log in logs:
            print(log)

    def start(self):
        if self._future:
            raise ValueError("The training job log printer is already started")
        self._stop = False
        self._future = self.executor.submit(self._list_logs)

    def stop(self, wait: bool = True):
        self._stop = True
        if self._future:
            self._future.result()


class _TrainingJobSubmitter(object):
    """A class used to submit a training job to the PAI service."""

    def __init__(
        self,
        base_job_name: Optional[str] = None,
        output_path: Optional[str] = None,
        experiment_config: Optional[ExperimentConfig] = None,
        user_vpc_config: Optional[UserVpcConfig] = None,
        max_run_time: Optional[int] = None,
        instance_type: Optional[str] = None,
        instance_spec: Optional[Dict] = None,
        instance_count: Optional[int] = None,
        resource_id: Optional[Dict] = None,
        resource_type: Optional[Union[str, ResourceType]] = None,
        spot_spec: Optional[SpotSpec] = None,
        environments: Optional[Dict] = None,
        requirements: Optional[List[str]] = None,
        labels: Optional[Dict[str, str]] = None,
        settings: Optional[Dict[str, Any]] = None,
    ):
        self.session = get_default_session()
        self._training_jobs = []
        self.base_job_name = base_job_name or type(self).__name__.lower()
        self.output_path = output_path
        self.user_vpc_config = user_vpc_config
        self.spot_spec = spot_spec
        self.experiment_config = experiment_config
        self.max_run_time = max_run_time
        self.instance_type = instance_type
        self.instance_spec = instance_spec
        self.instance_count = instance_count or 1
        self.resource_id = resource_id
        self.resource_type = ResourceType(resource_type) if resource_type else None
        self.environments = environments
        self.requirements = requirements
        self.settings = settings
        self.labels = labels

    def wait(self, interval: int = 5, show_logs: bool = True, all_jobs: bool = False):
        """Block until the jobs is completed.

        Args:
            interval(int): Interval to reload job status
            show_logs(bool): Specifies whether to fetch and print the logs produced by
                the job.
            all_jobs(bool): Wait latest job or wait all jobs in processor, show_logs disabled while
                wait all jobs.

        Raises:
            RuntimeError: If no job is submitted.

        """
        if all_jobs:
            if not self._training_jobs:
                raise RuntimeError("Could not find any submitted job.")
            remains = set(self._training_jobs)
            while remains:
                for job in self._training_jobs:
                    if job in remains and job.is_completed():
                        remains.remove(job)

                time.sleep(interval)
            self._generate_jobs_report()
        else:
            latest_job = self.latest_job
            if not latest_job:
                raise RuntimeError("Could not find a submitted job.")
            latest_job.wait(interval=interval, show_logs=show_logs)
            return latest_job

    def _generate_jobs_report(self):
        """Generate current jobs report and output to stdout"""
        print(f"Jobs status report, total jobs count: {len(self._training_jobs)}")
        rows = []
        headers = ["JobName", "JobID", "Status"]
        for job in self._training_jobs:
            rows.append([job.training_job_name, job.id, job.status])
        print_table(headers, rows)

    def job_name(self, job_name: Optional[str] = None):
        if job_name:
            return job_name
        sep = "-"
        base_name = self.base_job_name
        return name_from_base(base_name, sep)

    def build_inputs(
        self,
        inputs: Dict[str, Any],
        input_channels: List[Channel],
        default_inputs: Optional[Dict[str, Any]] = None,
    ) -> List[Dict[str, str]]:
        res = []
        inputs = inputs or dict()
        input_channels = input_channels or []
        default_inputs = default_inputs or {}

        inputs = {**default_inputs, **inputs}
        requires = {ch.name for ch in input_channels if ch.required} - set(
            inputs.keys()
        )
        if requires:
            raise ValueError(
                "Required input channels are not provided: {}".format(
                    ",".join(requires)
                )
            )
        for name, item in inputs.items():
            input_config = self._get_input_config(name, item)
            res.append(input_config.model_dump())

        return res

    @staticmethod
    def _default_training_output_channels() -> List[Channel]:
        channels = [
            Channel(
                name=DEFAULT_OUTPUT_MODEL_CHANNEL_NAME,
                description="Training output models",
                required=True,
            ),
            Channel(
                name=DEFAULT_CHECKPOINT_CHANNEL_NAME,
                description="Training checkpoints channel",
                required=False,
            ),
            Channel(
                name=DEFAULT_TENSORBOARD_CHANNEL_NAME,
                properties={"ossAppendable": "true"},
                description="TensorBoard logs channel",
                required=False,
            ),
        ]

        return channels

    def _training_job_base_output(self, job_name):
        job_name = to_plain_text(job_name)
        if self.output_path:
            if not is_oss_uri(self.output_path):
                raise ValueError("Output path should be an OSS path.")
            return os.path.join(self.output_path, f"{job_name}_{random_str(6)}")

        session = get_default_session()
        bucket_name = session.oss_bucket.bucket_name
        storage_path = session.get_storage_path_by_category(
            StoragePathCategory.TrainingJob,
            f"{to_plain_text(job_name)}_{random_str(6)}",
        )
        base_output_path = f"oss://{bucket_name}/{storage_path}"
        return base_output_path

    def build_outputs(
        self,
        job_name: str,
        output_channels: List[Channel],
        outputs: Optional[Dict[str, Any]] = None,
    ) -> List[Dict[str, str]]:
        base_output_path = self._training_job_base_output(job_name)
        res = []
        outputs = outputs or dict()

        for ch in output_channels:
            if ch.name in outputs:
                output = self._get_output_config(name=ch.name, item=outputs[ch.name])
            else:
                output_uri = as_oss_dir_uri(posixpath.join(base_output_path, ch.name))
                output = UriOutput(name=ch.name, output_uri=output_uri)
            res.append(output)

        extra_outputs = set(outputs.keys()) - {ch.name for ch in output_channels}

        for name in extra_outputs:
            output = self._get_output_config(
                name=name,
                item=outputs[name],
            )
            res.append(output)

        return [item.model_dump() for item in res]

    # TODO: get arguments, such as VPCConfig, instance_type etc, from self instance.
    def _submit(
        self,
        job_name: str,
        algorithm_spec: Optional[AlgorithmSpec] = None,
        algorithm_name: Optional[str] = None,
        algorithm_version: Optional[str] = None,
        algorithm_provider: Optional[str] = None,
        instance_count: int = 1,
        instance_type: Optional[str] = None,
        instance_spec: Optional[InstanceSpec] = None,
        resource_id: Optional[str] = None,
        inputs: Optional[List[Dict[str, Any]]] = None,
        outputs: Optional[List[Dict[str, Any]]] = None,
        hyperparameters: Optional[Dict[str, str]] = None,
        max_run_time: Optional[int] = None,
        environments: Optional[Dict[str, str]] = None,
        user_vpc_config: Optional[Dict[str, str]] = None,
        requirements: Optional[List[str]] = None,
        experiment_config: Optional[Dict[str, Any]] = None,
        labels: Optional[Dict[str, str]] = None,
        wait: bool = True,
        show_logs: bool = False,
    ):
        session = get_default_session()

        if not self.resource_type or self.resource_type == ResourceType.General:
            resource_type = None
        else:
            resource_type = self.resource_type.value

        if self.spot_spec:
            spot_spec = {
                "SpotStrategy": self.spot_spec.spot_strategy.value,
            }
            if self.spot_spec.spot_discount_limit:
                spot_spec["SpotDiscountLimit"] = self.spot_spec.spot_discount_limit
        else:
            spot_spec = None

        # user vpc
        if self.user_vpc_config:
            user_vpc_config = {
                "VpcId": self.user_vpc_config.vpc_id,
                "SecurityGroupId": self.user_vpc_config.security_group_id,
            }
        else:
            user_vpc_config = None

        training_job_id = session.training_job_api.create(
            instance_count=instance_count,
            instance_spec=instance_spec.model_dump() if instance_spec else None,
            algorithm_name=algorithm_name,
            algorithm_provider=algorithm_provider,
            experiment_config=(
                experiment_config.model_dump()
                if experiment_config and isinstance(experiment_config, ExperimentConfig)
                else experiment_config
            ),
            spot_spec=spot_spec,
            algorithm_version=algorithm_version,
            instance_type=instance_type,
            resource_id=resource_id,
            resource_type=resource_type,
            job_name=job_name,
            hyperparameters=hyperparameters,
            max_running_in_seconds=max_run_time,
            input_channels=inputs,
            output_channels=outputs,
            algorithm_spec=algorithm_spec.model_dump() if algorithm_spec else None,
            requirements=requirements,
            user_vpc_config=user_vpc_config,
            labels=labels,
            environments=environments,
            settings=self.settings,
        )
        training_job = TrainingJob.get(training_job_id)
        self._training_jobs.append(training_job)
        print(
            f"View the job detail by accessing the console URI: {training_job.console_uri}"
        )
        if wait:
            training_job.wait(show_logs=show_logs)
        return training_job

    @classmethod
    def _get_input_config(
        cls, name: str, item: Union[str, "FileSystemInputBase", DatasetConfig]
    ) -> Union[UriInput, DatasetConfig]:
        """Get input uri for training_job from given input."""
        from pai.estimator import FileSystemInputBase

        if not isinstance(item, (str, FileSystemInputBase, DatasetConfig)):
            raise ValueError(f"Input data of type {type(item)} is not supported.")

        if isinstance(item, FileSystemInputBase):
            input_ = UriInput(
                name=name,
                input_uri=item.to_input_uri(),
            )
        elif isinstance(item, DatasetConfig):
            input_ = DatasetConfig(
                name=name,
                dataset_id=item.dataset_id,
            )
        elif is_oss_uri(item) or is_filesystem_uri(item) or is_odps_table_uri(item):
            input_ = UriInput(
                name=name,
                input_uri=item,
            )
        elif isinstance(item, str):
            if os.path.exists(item):
                store_path = Session.get_storage_path_by_category(
                    StoragePathCategory.InputData
                )
                input_ = UriInput(name=name, input_uri=upload(item, store_path))
            else:
                raise ValueError("Invalid input data path, file not found: {item}.")
        else:
            raise ValueError(
                f"Invalid input data, supported inputs are OSS, NAS, MaxCompute "
                f"table or local path: {type(item)}."
            )
        return input_

    @classmethod
    def _get_output_config(
        cls, name: str, item: str
    ) -> Union[UriOutput, DatasetConfig]:
        from pai.estimator import FileSystemInputBase

        if not isinstance(item, (str, FileSystemInputBase, DatasetConfig)):
            raise ValueError(f"Output data of type {type(item)} is not supported.")

        if isinstance(item, FileSystemInputBase):
            output = UriOutput(
                name=name,
                output_uri=item.to_input_uri(),
            )
        elif isinstance(item, DatasetConfig):
            output = DatasetConfig(name=name, dataset_id=item.dataset_id)
        elif is_oss_uri(item) or is_filesystem_uri(item) or is_odps_table_uri(item):
            output = UriOutput(
                name=name,
                output_uri=as_oss_dir_uri(item),
            )
        else:
            raise ValueError(
                "Invalid output data, supported outputs are OSS, NAS, MaxCompute "
            )

        return output

    @property
    def latest_job(self) -> "TrainingJob":
        return self._training_jobs[-1] if self._training_jobs else None

    def _build_code_input(
        self, job_name: str, source_dir: Optional[str], code_dest: Optional[str] = None
    ) -> Optional[CodeDir]:
        """Upload source files to OSS and return the code input for training job."""
        if not source_dir:
            return
        if is_oss_uri(source_dir):
            code_uri = source_dir
        elif not os.path.exists(source_dir):
            raise ValueError(f"Source directory {source_dir} does not exist.")
        else:
            code_dest = code_dest or self.session.get_storage_path_by_category(
                StoragePathCategory.TrainingSrc, to_plain_text(job_name)
            )
            code_uri = upload(
                source_path=source_dir,
                oss_path=code_dest,
                bucket=self.session.oss_bucket,
            )
        oss_uri_obj = OssUriObj(uri=self.session.patch_oss_endpoint(code_uri))
        code_dir = CodeDir(
            location_type="oss",
            location_value=OssLocation(
                bucket=oss_uri_obj.bucket_name,
                key=oss_uri_obj.object_key,
                endpoint=oss_uri_obj.endpoint,
            ),
        )

        return code_dir