# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


from collections import defaultdict
from dataclasses import dataclass
import logging
import os
import pyarrow as pa
import asyncio
import ray
import time

from .friendly import new_friendly_name

from datafusion_ray._datafusion_ray_internal import (
    DFRayContext as DFRayContextInternal,
    DFRayDataFrame as DFRayDataFrameInternal,
    prettify,
)


def setup_logging():
    import logging

    logging.addLevelName(5, "TRACE")

    log_level = os.environ.get("DATAFUSION_RAY_LOG_LEVEL", "WARN").upper()

    # this logger gets captured and routed to rust.   See src/lib.rs
    logging.getLogger("core_py").setLevel(log_level)
    logging.basicConfig()


setup_logging()

_log_level = os.environ.get("DATAFUSION_RAY_LOG_LEVEL", "ERROR").upper()
_rust_backtrace = os.environ.get("RUST_BACKTRACE", "0")
df_ray_runtime_env = {
    "worker_process_setup_hook": setup_logging,
    "env_vars": {
        "DATAFUSION_RAY_LOG_LEVEL": _log_level,
        "RAY_worker_niceness": "0",
        "RUST_BACKTRACE": _rust_backtrace,
    },
}

log = logging.getLogger("core_py")


def call_sync(coro):
    """call a coroutine in the current event loop or run a new one, and synchronously
    return the result"""
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        return asyncio.run(coro)
    else:
        return loop.run_until_complete(coro)


# work around for https://github.com/ray-project/ray/issues/31606
async def _ensure_coro(maybe_obj_ref):
    return await maybe_obj_ref


async def wait_for(coros, name=""):
    """Wait for all coros to complete and return their results.
    Does not preserve ordering."""

    return_values = []
    # wrap the coro in a task to work with python 3.10 and 3.11+ where asyncio.wait semantics
    # changed to not accept any awaitable
    start = time.time()
    done, _ = await asyncio.wait(
        [asyncio.create_task(_ensure_coro(c)) for c in coros]
    )
    end = time.time()
    log.info(f"waiting for {name} took {end - start}s")
    for d in done:
        e = d.exception()
        if e is not None:
            log.error(f"Exception waiting {name}: {e}")
            raise e
        else:
            return_values.append(d.result())
    return return_values


class DFRayProcessorPool:
    """A pool of DFRayProcessor actors that can be acquired and released"""

    # TODO: We can probably manage this set in a better way
    # This is not a threadsafe implementation, though the DFRayContextSupervisor accesses it
    # from a single thread
    #
    # This is simple though and will suffice for now

    def __init__(self, min_processors: int, max_processors: int):
        self.min_processors = min_processors
        self.max_processors = max_processors

        # a map of processor_key (a random identifier) to stage actor reference
        self.pool = {}
        # a map of processor_key to listening address
        self.addrs = {}

        # holds object references from the start_up method for each processor
        # we know all processors are listening when all of these refs have
        # been waited on.  When they are ready we remove them from this set
        self.processors_started = set()

        # an event that is set when all processors are ready to serve
        self.processors_ready = asyncio.Event()

        # processors that are started but we need to get their address
        self.need_address = set()

        # processors that we have the address for but need to start serving
        self.need_serving = set()

        # processors in use
        self.acquired = set()

        # processors available
        self.available = set()

        for _ in range(min_processors):
            self._new_processor()

        log.info(
            f"created ray processor pool (min_processors: {min_processors}, max_processors: {max_processors})"
        )

    async def start(self):
        if not self.processors_ready.is_set():
            await self._wait_for_processors_started()
            await self._wait_for_get_addrs()
            await self._wait_for_serve()
            self.processors_ready.set()

    async def wait_for_ready(self):
        await self.processors_ready.wait()

    async def acquire(self, need=1):
        processor_keys = []

        have = len(self.available)
        total = len(self.available) + len(self.acquired)
        can_make = self.max_processors - total

        need_to_make = need - have

        if need_to_make > can_make:
            raise Exception(
                f"Cannot allocate processors above {self.max_processors}"
            )

        if need_to_make > 0:
            log.debug(f"creating {need_to_make} additional processors")
            for _ in range(need_to_make):
                self._new_processor()
            await wait_for([self.start()], "waiting for created processors")

        assert len(self.available) >= need

        for _ in range(need):
            processor_key = self.available.pop()
            self.acquired.add(processor_key)

            processor_keys.append(processor_key)

        processors = [self.pool[sk] for sk in processor_keys]
        addrs = [self.addrs[sk] for sk in processor_keys]
        return (processors, processor_keys, addrs)

    def release(self, processor_keys: list[str]):
        for processor_key in processor_keys:
            self.acquired.remove(processor_key)
            self.available.add(processor_key)

    def _new_processor(self):
        self.processors_ready.clear()
        processor_key = new_friendly_name()
        log.debug(f"starting processor: {processor_key}")
        processor = DFRayProcessor.options(
            name=f"Processor : {processor_key}"
        ).remote(processor_key)
        self.pool[processor_key] = processor
        self.processors_started.add(processor.start_up.remote())
        self.available.add(processor_key)

    async def _wait_for_processors_started(self):
        log.info("waiting for processors to be ready")
        started_keys = await wait_for(
            self.processors_started, "processors to be started"
        )
        # we need the addresses of these processors still
        self.need_address.update(set(started_keys))
        # we've started all the processors we know about
        self.processors_started = set()
        log.info("processors are all listening")

    async def _wait_for_get_addrs(self):
        # get the addresses in a pipelined fashion
        refs = []
        processor_keys = []
        for processor_key in self.need_address:
            processor = self.pool[processor_key]
            refs.append(processor.addr.remote())
            processor_keys.append(processor_key)

            self.need_serving.add(processor_key)

        addrs = await wait_for(refs, "processor addresses")

        for key, addr in addrs:
            self.addrs[key] = addr

        self.need_address = set()

    async def _wait_for_serve(self):
        log.info("running processors")
        try:
            for processor_key in self.need_serving:
                log.info(f"starting serving of processor {processor_key}")
                processor = self.pool[processor_key]
                processor.serve.remote()
            self.need_serving = set()

        except Exception as e:
            log.error(f"ProcessorPool: Uhandled Exception in serve: {e}")
            raise e

    async def all_done(self):
        log.info("calling processor all done")
        refs = [
            processor.all_done.remote() for processor in self.pool.values()
        ]
        await wait_for(refs, "processors to be all done")
        log.info("all processors shutdown")


@ray.remote(num_cpus=0.01, scheduling_strategy="SPREAD")
class DFRayProcessor:
    def __init__(self, processor_key):
        self.processor_key = processor_key

        # import this here so ray doesn't try to serialize the rust extension
        from datafusion_ray._datafusion_ray_internal import (
            DFRayProcessorService,
        )

        self.processor_service = DFRayProcessorService(processor_key)

    async def start_up(self):
        # this method is sync
        self.processor_service.start_up()
        return self.processor_key

    async def all_done(self):
        await self.processor_service.all_done()

    async def addr(self):
        return (self.processor_key, self.processor_service.addr())

    async def update_plan(
        self,
        stage_id: int,
        stage_addrs: dict[int, dict[int, list[str]]],
        partition_group: list[int],
        plan_bytes: bytes,
    ):
        await self.processor_service.update_plan(
            stage_id,
            stage_addrs,
            partition_group,
            plan_bytes,
        )

    async def serve(self):
        log.info(
            f"[{self.processor_key}] serving on {self.processor_service.addr()}"
        )
        await self.processor_service.serve()
        log.info(f"[{self.processor_key}] done serving")


@dataclass
class StageData:
    stage_id: int
    plan_bytes: bytes
    partition_group: list[int]
    child_stage_ids: list[int]
    num_output_partitions: int
    full_partitions: bool


@dataclass
class InternalStageData:
    stage_id: int
    plan_bytes: bytes
    partition_group: list[int]
    child_stage_ids: list[int]
    num_output_partitions: int
    full_partitions: bool
    remote_processor: ...  # ray.actor.ActorHandle[DFRayProcessor]
    remote_addr: str

    def __str__(self):
        return f"""Stage: {self.stage_id}, pg: {self.partition_group}, child_stages:{self.child_stage_ids}, listening addr:{self.remote_addr}"""


@ray.remote(num_cpus=0.01, scheduling_strategy="SPREAD")
class DFRayContextSupervisor:
    def __init__(
        self,
        processor_pool_min: int,
        processor_pool_max: int,
    ) -> None:
        log.info(
            f"Creating DFRayContextSupervisor processor_pool_min: {processor_pool_min}"
        )
        self.pool = DFRayProcessorPool(processor_pool_min, processor_pool_max)
        self.stages: dict[str, InternalStageData] = {}
        log.info("Created DFRayContextSupervisor")

    async def start(self):
        await self.pool.start()

    async def wait_for_ready(self):
        await self.pool.wait_for_ready()

    async def get_stage_addrs(self, stage_id: int):
        addrs = [
            sd.remote_addr
            for sd in self.stages.values()
            if sd.stage_id == stage_id
        ]
        return addrs

    async def new_query(
        self,
        stage_datas: list[StageData],
    ):
        if len(self.stages) > 0:
            self.pool.release(list(self.stages.keys()))

        remote_processors, remote_processor_keys, remote_addrs = (
            await self.pool.acquire(len(stage_datas))
        )
        self.stages = {}

        for i, sd in enumerate(stage_datas):
            remote_processor = remote_processors[i]
            remote_processor_key = remote_processor_keys[i]
            remote_addr = remote_addrs[i]
            self.stages[remote_processor_key] = InternalStageData(
                sd.stage_id,
                sd.plan_bytes,
                sd.partition_group,
                sd.child_stage_ids,
                sd.num_output_partitions,
                sd.full_partitions,
                remote_processor,
                remote_addr,
            )

        # sort out the mess of who talks to whom and ensure we can supply the correct
        # addresses to each of them
        addrs_by_stage_key = await self.sort_out_addresses()
        if log.level <= logging.DEBUG:
            # TODO: string builder here
            out = ""
            for stage_key, stage in self.stages.items():
                out += f"[{stage_key}]: {stage}\n"
                out += f"child addrs: {addrs_by_stage_key[stage_key]}\n"
            log.debug(out)

        refs = []
        # now tell the stages what they are doing for this query
        for stage_key, isd in self.stages.items():
            log.info(f"going to update plan for {stage_key}")
            kid = addrs_by_stage_key[stage_key]
            refs.append(
                isd.remote_processor.update_plan.remote(
                    isd.stage_id,
                    {
                        stage_id: val["child_addrs"]
                        for (stage_id, val) in kid.items()
                    },
                    isd.partition_group,
                    isd.plan_bytes,
                )
            )
        log.info("that's all of them")

        await wait_for(refs, "updating plans")

    async def sort_out_addresses(self):
        """Iterate through our stages and gather all of their listening addresses.
        Then, provide the addresses to of peer stages to each stage.
        """
        addrs_by_stage_key = {}
        for stage_key, isd in self.stages.items():
            stage_addrs = defaultdict(dict)

            # using "isd" as shorthand to denote InternalStageData as a reminder

            for child_stage_id in isd.child_stage_ids:
                addrs = defaultdict(list)
                child_stage_keys, child_stage_datas = zip(
                    *filter(
                        lambda x: x[1].stage_id == child_stage_id,
                        self.stages.items(),
                    )
                )
                output_partitions = [
                    c_isd.num_output_partitions for c_isd in child_stage_datas
                ]

                # sanity check
                assert all(
                    [op == output_partitions[0] for op in output_partitions]
                )
                output_partitions = output_partitions[0]

                for child_stage_isd in child_stage_datas:
                    if child_stage_isd.full_partitions:
                        for partition in range(output_partitions):
                            # this stage is the definitive place to read this output partition
                            addrs[partition] = [child_stage_isd.remote_addr]
                    else:
                        for partition in range(output_partitions):
                            # this output partition must be gathered from all stages with this stage_id
                            addrs[partition] = [
                                c.remote_addr for c in child_stage_datas
                            ]

                stage_addrs[child_stage_id]["child_addrs"] = addrs
                # not necessary but useful for debug logs
                stage_addrs[child_stage_id]["stage_keys"] = child_stage_keys

            addrs_by_stage_key[stage_key] = stage_addrs

        return addrs_by_stage_key

    async def all_done(self):
        await self.pool.all_done()


class DFRayDataFrame:
    def __init__(
        self,
        internal_df: DFRayDataFrameInternal,
        supervisor,  # ray.actor.ActorHandle[DFRayContextSupervisor],
        batch_size=8192,
        partitions_per_processor: int | None = None,
        prefetch_buffer_size=0,
    ):
        self.df = internal_df
        self.supervisor = supervisor
        self._stages = None
        self._batches = None
        self.batch_size = batch_size
        self.partitions_per_processor = partitions_per_processor
        self.prefetch_buffer_size = prefetch_buffer_size

    def stages(self):
        # create our coordinator now, which we need to create stages
        if not self._stages:
            self._stages = self.df.stages(
                self.batch_size,
                self.prefetch_buffer_size,
                self.partitions_per_processor,
            )

        return self._stages

    def schema(self):
        return self.df.schema()

    def execution_plan(self):
        return self.df.execution_plan()

    def logical_plan(self):
        return self.df.logical_plan()

    def optimized_logical_plan(self):
        return self.df.optimized_logical_plan()

    def collect(self) -> list[pa.RecordBatch]:
        if not self._batches:
            t1 = time.time()
            self.stages()
            t2 = time.time()
            log.debug(f"creating stages took {t2 - t1}s")

            last_stage_id = max([stage.stage_id for stage in self._stages])
            log.debug(f"last stage is {last_stage_id}")

            self.create_ray_stages()

            last_stage_addrs = ray.get(
                self.supervisor.get_stage_addrs.remote(last_stage_id)
            )
            log.debug(f"last stage addrs {last_stage_addrs}")

            reader = self.df.read_final_stage(
                last_stage_id, last_stage_addrs[0]
            )
            log.debug("got reader")
            self._batches = list(reader)
        return self._batches

    def show(self) -> None:
        batches = self.collect()
        print(prettify(batches))

    def create_ray_stages(self):
        stage_datas = []

        # note, whereas the PyDataFrameStage object contained in self.stages()
        # holds information for a numbered stage,
        # when we tell the supervisor about our query, it wants a StageData
        # object per actor that will be created.  Hence the loop over partition_groups
        for stage in self.stages():
            for partition_group in stage.partition_groups:
                stage_datas.append(
                    StageData(
                        stage.stage_id,
                        stage.plan_bytes(),
                        partition_group,
                        stage.child_stage_ids,
                        stage.num_output_partitions,
                        stage.full_partitions,
                    )
                )

        ref = self.supervisor.new_query.remote(stage_datas)
        call_sync(wait_for([ref], "creating ray stages"))


class DFRayContext:
    def __init__(
        self,
        batch_size: int = 8192,
        prefetch_buffer_size: int = 0,
        partitions_per_processor: int | None = None,
        processor_pool_min: int = 1,
        processor_pool_max: int = 100,
    ) -> None:
        self.ctx = DFRayContextInternal()
        self.batch_size = batch_size
        self.partitions_per_processor = partitions_per_processor
        self.prefetch_buffer_size = prefetch_buffer_size

        self.supervisor = DFRayContextSupervisor.options(
            name="RayContextSupersisor",
        ).remote(
            processor_pool_min,
            processor_pool_max,
        )

        # start up our super visor and don't check in on it until its
        # time to query, then we will await this ref
        start_ref = self.supervisor.start.remote()

        # ensure we are ready
        s = time.time()
        call_sync(wait_for([start_ref], "RayContextSupervisor start"))
        e = time.time()
        log.info(
            f"RayContext::__init__ waiting for supervisor to be ready took {e - s}s"
        )

    def register_parquet(self, name: str, path: str):
        """
        Register a Parquet file with the given name and path.
        The path can be a local filesystem path, absolute filesystem path, or a url.

        If the path is a object store url, the appropriate object store will be registered.
        Configuration of the object store will be gathered from the environment.

        For example for s3:// urls, credentials will be looked for by the AWS SDK,
        which will check environment variables, credential files, etc

        Parameters:
        path (str): The file path to the Parquet file.
        name (str): The name to register the Parquet file under.
        """
        self.ctx.register_parquet(name, path)

    def register_csv(self, name: str, path: str):
        """
        Register a csvfile with the given name and path.
        The path can be a local filesystem path, absolute filesystem path, or a url.

        If the path is a object store url, the appropriate object store will be registered.
        Configuration of the object store will be gathered from the environment.

        For example for s3:// urls, credentials will be looked for by the AWS SDK,
        which will check environment variables, credential files, etc

        Parameters:
        path (str): The file path to the csv file.
        name (str): The name to register the Parquet file under.
        """
        self.ctx.register_csv(name, path)

    def register_listing_table(
        self, name: str, path: str, file_extention="parquet"
    ):
        """
        Register a directory of parquet files with the given name.
        The path can be a local filesystem path, absolute filesystem path, or a url.

        If the path is a object store url, the appropriate object store will be registered.
        Configuration of the object store will be gathered from the environment.

        For example for s3:// urls, credentials will be looked for by the AWS SDK,
        which will check environment variables, credential files, etc

        Parameters:
        path (str): The file path to the Parquet file directory
        name (str): The name to register the Parquet file under.
        """

        self.ctx.register_listing_table(name, path, file_extention)

    def sql(self, query: str) -> DFRayDataFrame:

        df = self.ctx.sql(query)

        return DFRayDataFrame(
            df,
            self.supervisor,
            self.batch_size,
            self.partitions_per_processor,
            self.prefetch_buffer_size,
        )

    def set(self, option: str, value: str) -> None:
        self.ctx.set(option, value)

    def __del__(self):
        log.info("DFRayContext, cleaning up remote resources")
        ref = self.supervisor.all_done.remote()
        call_sync(wait_for([ref], "DFRayContextSupervisor all done"))