# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import io
import os
import csv
import json
from datetime import datetime
import time
import itertools
import importlib_metadata
from collections import defaultdict
from decimal import Decimal

import numpy as np
from pytz import timezone, UTC

from . import config
from . import geo
from webservice.algorithms.NexusCalcHandler import NexusCalcHandler
from webservice.webmodel import NexusResults

EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'

from netCDF4 import Dataset
import netCDF4
import tempfile


class BaseDomsQueryCalcHandler(NexusCalcHandler):
    def __init__(self, tile_service_factory):
        NexusCalcHandler.__init__(self, tile_service_factory)

    def getDataSourceByName(self, source):
        for s in config.ENDPOINTS:
            if s["name"] == source:
                return s
        return None

    def _does_datasource_exist(self, ds):
        for endpoint in config.ENDPOINTS:
            if endpoint["name"] == ds:
                return True
        return False


class DomsEncoder(json.JSONEncoder):
    def __init__(self, **args):
        json.JSONEncoder.__init__(self, **args)

    def default(self, obj):
        # print 'MyEncoder.default() called'
        # print type(obj)
        if obj == np.nan:
            return None  # hard code string for now
        elif isinstance(obj, datetime):
            return int((obj - EPOCH).total_seconds())
        elif isinstance(obj, Decimal):
            return str(obj)
        elif isinstance(obj, np.float32):
            return float(obj)
        else:
            return json.JSONEncoder.default(self, obj)


class DomsQueryResults(NexusResults):
    def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
                 executionId=None, status_code=200, page_num=None, page_size=None):
        NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
                              status_code=status_code)
        self.__args = args
        self.__bounds = bounds
        self.__count = count
        self.__details = details
        self.__executionId = str(executionId)

        if self.__details is None:
            self.__details = {}

        # Add page num and size to details block
        self.__details['pageNum'] = page_num
        self.__details['pageSize'] = page_size

    def toJson(self):
        bounds = self.__bounds.toMap() if self.__bounds is not None else {}
        return json.dumps(
            {"executionId": self.__executionId, "data": self.results(), "params": self.__args, "bounds": bounds,
             "count": self.__count, "details": self.__details}, indent=4, cls=DomsEncoder)

    def toCSV(self):
        return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details)

    def toNetCDF(self):
        return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details)

    def filename(self):
        return f'CDMS_{self.__executionId}_page{self.__details["pageNum"]}'


class DomsCSVFormatter:
    @staticmethod
    def create(executionId, results, params, details):

        csv_mem_file = io.StringIO()
        try:
            DomsCSVFormatter.__addConstants(csv_mem_file)
            DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details)
            csv.writer(csv_mem_file).writerow([])

            DomsCSVFormatter.__packValues(csv_mem_file, results)

            csv_out = csv_mem_file.getvalue()
        finally:
            csv_mem_file.close()

        return csv_out

    @staticmethod
    def __get_variable_name(variable):
        def is_empty(s):
            return s is None or s == ''

        name = variable['cf_variable_name']

        return name if not is_empty(name) else variable['variable_name']

    @staticmethod
    def __packValues(csv_mem_file, results):
        primary_headers = list(dict.fromkeys(
            key for result in results for key in result if key not in ['matches', 'primary']
        ))

        primary_headers.extend(list(dict.fromkeys(
            DomsCSVFormatter.__get_variable_name(variable) for result in results for variable in result['primary']
        )))

        secondary_headers = list(dict.fromkeys(
            key for result in results for match in result['matches'] for key in match if key != 'secondary'
        ))

        secondary_headers.extend(list(dict.fromkeys(
            DomsCSVFormatter.__get_variable_name(variable) for result in results for match in result['matches'] for variable in match['secondary']
        )))

        writer = csv.writer(csv_mem_file)
        writer.writerow(list(itertools.chain(primary_headers, secondary_headers)))

        for primaryValue in results:
            for matchup in primaryValue["matches"]:
                # Primary
                primary_row = [None for _ in range(len(primary_headers))]
                for key, value in primaryValue.items():
                    if key == 'matches':
                        continue

                    if key != 'primary':
                        index = primary_headers.index(key)
                        primary_row[index] = value
                    else:
                        for variable in value:
                            index = primary_headers.index(DomsCSVFormatter.__get_variable_name(variable))
                            primary_row[index] = variable['variable_value']
                # Secondary
                secondary_row = [None for _ in range(len(secondary_headers))]
                for key, value in matchup.items():
                    if key != 'secondary':
                        index = secondary_headers.index(key)
                        secondary_row[index] = value
                    else:
                        for variable in value:
                            index = secondary_headers.index(DomsCSVFormatter.__get_variable_name(variable))
                            secondary_row[index] = variable['variable_value']
                writer.writerow(list(itertools.chain(primary_row, secondary_row)))

    @staticmethod
    def __addConstants(csvfile):

        version = importlib_metadata.distribution('nexusanalysis').version

        global_attrs = [
            {"Global Attribute": "product_version", "Value": "1.0"},
            {"Global Attribute": "Conventions", "Value": "CF-1.6, ACDD-1.3"},
            {"Global Attribute": "title", "Value": "CDMS satellite-insitu machup output file"},
            {"Global Attribute": "history",
             "Value": f"Processing_Version = V1.0, Software_Name = CDMS, Software_Version = {version}"},
            {"Global Attribute": "institution", "Value": "JPL, FSU, NCAR, Saildrone"},
            {"Global Attribute": "source", "Value": "doms.jpl.nasa.gov"},
            {"Global Attribute": "standard_name_vocabulary",
             "Value": "CF Standard Name Table v27, BODC controlled vocabulary"},
            {"Global Attribute": "cdm_data_type", "Value": "trajectory, station, point, swath, grid"},
            {"Global Attribute": "processing_level", "Value": "4"},
            {"Global Attribute": "project", "Value": "Cloud-based Data Matchup Service (CDMS)"},
            {"Global Attribute": "keywords_vocabulary",
             "Value": "NASA Global Change Master Directory (GCMD) Science Keywords"},
            # TODO What should the keywords be?
            {"Global Attribute": "keywords", "Value": "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, "
                                                      "NASA/JPL/PODAAC, FSU/COAPS, UCAR/NCAR, SALINITY, "
                                                      "SEA SURFACE TEMPERATURE, SURFACE WINDS"},
            {"Global Attribute": "creator_name", "Value": "Cloud-Based Data Matchup Service (CDMS)"},
            {"Global Attribute": "creator_email", "Value": "cdms@jpl.nasa.gov"},
            {"Global Attribute": "creator_url", "Value": "https://doms.jpl.nasa.gov/"},
            {"Global Attribute": "publisher_name",  "Value": "CDMS"},
            {"Global Attribute": "publisher_email", "Value": "cdms@jpl.nasa.gov"},
            {"Global Attribute": "publisher_url", "Value": "https://doms.jpl.nasa.gov"},
            {"Global Attribute": "acknowledgment", "Value": "CDMS is a NASA/ACCESS funded project with prior support from NASA/AIST"},
        ]

        writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))

        writer.writerows(global_attrs)

    @staticmethod
    def __addDynamicAttrs(csvfile, executionId, results, params, details):

        platforms = set()
        for primaryValue in results:
            platforms.add(primaryValue['platform'])
            for match in primaryValue['matches']:
                platforms.add(match['platform'])

        insituDatasets = params["matchup"]
        insituLinks = set()
        for insitu in insituDatasets:
            if insitu in config.METADATA_LINKS:
                insituLinks.add(config.METADATA_LINKS[insitu])


        global_attrs = [
            {"Global Attribute": "Platform", "Value": ', '.join(platforms)},
            {"Global Attribute": "time_coverage_start",
             "Value": params["startTime"].strftime(ISO_8601)},
            {"Global Attribute": "time_coverage_end",
             "Value": params["endTime"].strftime(ISO_8601)},

            {"Global Attribute": "geospatial_lon_min", "Value": params["bbox"].split(',')[0]},
            {"Global Attribute": "geospatial_lat_min", "Value": params["bbox"].split(',')[1]},
            {"Global Attribute": "geospatial_lon_max", "Value": params["bbox"].split(',')[2]},
            {"Global Attribute": "geospatial_lat_max", "Value": params["bbox"].split(',')[3]},
            {"Global Attribute": "geospatial_lat_units", "Value": "degrees_north"},
            {"Global Attribute": "geospatial_lon_units", "Value": "degrees_east"},

            {"Global Attribute": "geospatial_vertical_min", "Value": params["depthMin"]},
            {"Global Attribute": "geospatial_vertical_max", "Value": params["depthMax"]},
            {"Global Attribute": "geospatial_vertical_units", "Value": "m"},
            {"Global Attribute": "geospatial_vertical_positive", "Value": "down"},

            {"Global Attribute": "CDMS_matchID", "Value": executionId},
            {"Global Attribute": "CDMS_TimeWindow", "Value": params["timeTolerance"] / 60 / 60},
            {"Global Attribute": "CDMS_TimeWindow_Units", "Value": "hours"},

            {"Global Attribute": "CDMS_platforms", "Value": params["platforms"]},
            {"Global Attribute": "CDMS_SearchRadius", "Value": params["radiusTolerance"]},
            {"Global Attribute": "CDMS_SearchRadius_Units", "Value": "m"},

            {"Global Attribute": "CDMS_DatasetMetadata", "Value": ', '.join(insituLinks)},
            {"Global Attribute": "CDMS_primary", "Value": params["primary"]},
            {"Global Attribute": "CDMS_secondary", "Value": ','.join(params['matchup']) if isinstance(params["matchup"], list) else params['matchup']},
            {"Global Attribute": "CDMS_ParameterPrimary", "Value": params.get("parameter", "")},

            {"Global Attribute": "CDMS_time_to_complete", "Value": details["timeToComplete"]},
            {"Global Attribute": "CDMS_time_to_complete_units", "Value": "seconds"},
            {"Global Attribute": "CDMS_num_secondary_matched", "Value": details["numSecondaryMatched"]},
            {"Global Attribute": "CDMS_num_primary_matched", "Value": details["numPrimaryMatched"]},

            {"Global Attribute": "date_modified", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
            {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},

            {"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL

            {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]},
            {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]},
        ]

        writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))

        writer.writerows(global_attrs)


class DomsNetCDFFormatter:
    @staticmethod
    def create(executionId, results, params, details):

        t = tempfile.mkstemp(prefix="cdms_", suffix=".nc")
        tempFileName = t[1]

        dataset = Dataset(tempFileName, "w", format="NETCDF4")
        dataset.CDMS_matchID = executionId
        DomsNetCDFFormatter.__addNetCDFConstants(dataset)

        dataset.date_modified = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)
        dataset.date_created = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)
        dataset.time_coverage_start = params["startTime"].strftime(ISO_8601)
        dataset.time_coverage_end = params["endTime"].strftime(ISO_8601)
        dataset.time_coverage_resolution = "point"
        dataset.CDMS_secondary = params["matchup"]
        dataset.CDMS_num_matchup_matched = details["numSecondaryMatched"]
        dataset.CDMS_num_primary_matched = details["numPrimaryMatched"]

        bbox = geo.BoundingBox(asString=params["bbox"])
        dataset.geospatial_lat_max = bbox.north
        dataset.geospatial_lat_min = bbox.south
        dataset.geospatial_lon_max = bbox.east
        dataset.geospatial_lon_min = bbox.west
        dataset.geospatial_lat_units = "degrees_north"
        dataset.geospatial_lon_units = "degrees_east"
        dataset.geospatial_vertical_min = float(params["depthMin"])
        dataset.geospatial_vertical_max = float(params["depthMax"])
        dataset.geospatial_vertical_units = "m"
        dataset.geospatial_vertical_positive = "down"

        dataset.CDMS_TimeWindow = params["timeTolerance"] / 60 / 60
        dataset.CDMS_TimeWindow_Units = "hours"
        dataset.CDMS_SearchRadius = float(params["radiusTolerance"])
        dataset.CDMS_SearchRadius_Units = "m"
        dataset.URI_Matchup = "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=NETCDF"

        dataset.CDMS_ParameterPrimary = params["parameter"] if ("parameter" in params and params['parameter'] is not None) else ""
        dataset.CDMS_platforms = params["platforms"]
        dataset.CDMS_primary = params["primary"]
        dataset.CDMS_time_to_complete = details["timeToComplete"]
        dataset.CDMS_time_to_complete_units = "seconds"
        dataset.CDMS_page_num = details["pageNum"]
        dataset.CDMS_page_size = details["pageSize"]

        insituDatasets = params["matchup"]
        insituLinks = set()
        for insitu in insituDatasets:
            if insitu in config.METADATA_LINKS:
                insituLinks.add(config.METADATA_LINKS[insitu])
        if insituLinks:
            dataset.CDMS_DatasetMetadata = ', '.join(insituLinks)

        platforms = set()
        for primaryValue in results:
            platforms.add(primaryValue['platform'])
            for match in primaryValue['matches']:
                platforms.add(match['platform'])
        dataset.platform = ', '.join(platforms)

        satellite_group_name = 'PrimaryData'
        insitu_group_name = "SecondaryData"

        #Create Satellite group, variables, and attributes
        satelliteGroup = dataset.createGroup(satellite_group_name)
        satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"])

        # Create InSitu group, variables, and attributes
        insituGroup = dataset.createGroup(insitu_group_name)
        insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"])

        # Add data to Insitu and Satellite groups, generate array of match ID pairs
        matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter)
        dataset.createDimension("MatchedRecords", size=None)
        dataset.createDimension("MatchedGroups", size=2)
        matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups"))
        matchArray[:] = matches

        dataset.close()
        f = open(tempFileName, "rb")
        data = f.read()
        f.close()
        os.unlink(tempFileName)
        return data

    @staticmethod
    def __addNetCDFConstants(dataset):
        dataset.product_version = "1.0"
        dataset.Conventions = "CF-1.8, ACDD-1.3"
        dataset.title = "CDMS satellite-insitu machup output file"
        dataset.history = "Processing_Version = V1.0, Software_Name = CDMS, Software_Version = 1.03"
        dataset.institution = "JPL, FSU, NCAR, Saildrone"
        dataset.source = "doms.jpl.nasa.gov"
        dataset.standard_name_vocabulary = "CF Standard Name Table v27", "BODC controlled vocabulary"
        dataset.cdm_data_type = "Point/Profile, Swath/Grid"
        dataset.processing_level = "4"
        dataset.project = "Cloud-Based Data Matchup Service (CDMS)"
        dataset.keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords"
        dataset.keywords = "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, NASA/JPL/PODAAC, " \
                           "FSU/COAPS, UCAR/NCAR, SALINITY, SEA SURFACE TEMPERATURE, SURFACE WINDS"
        dataset.creator_name = "Cloud-Based Data Matchup Service (CDMS)"
        dataset.creator_email = "cdms@jpl.nasa.gov"
        dataset.creator_url = "https://doms.jpl.nasa.gov/"
        dataset.publisher_name = "Cloud-Based Data Matchup Service (CDMS)"
        dataset.publisher_email = "cdms@jpl.nasa.gov"
        dataset.publisher_url = "https://doms.jpl.nasa.gov"
        dataset.acknowledgment = "CDMS is a NASA/ACCESS funded project with prior support from NASA/AIST"

    @staticmethod
    def __writeResults(results, satelliteWriter, insituWriter):
        ids = {}
        matches = []
        insituIndex = 0

        #
        # Loop through all of the results, add each satellite data point to the array
        #
        for r in range(0, len(results)):
            result = results[r]
            satelliteWriter.addData(result)

            # Add each match only if it is not already in the array of in situ points
            for match in result["matches"]:
                depth_str = ''
                if match['depth'] is not None:
                    depth_str = f'{match["depth"]:.4}'
                key = (match['id'], depth_str)

                if key not in ids:
                    ids[key] = insituIndex
                    insituIndex += 1
                    insituWriter.addData(match)

                # Append an index pait of (satellite, in situ) to the array of matches
                matches.append((r, ids[key]))

        # Add data/write to the netCDF file
        satelliteWriter.writeGroup()
        insituWriter.writeGroup()

        return matches


class DomsNetCDFValueWriter:
    def __init__(self, group, matchup_parameter):
        group.createDimension("dim", size=None)
        self.group = group

        self.lat = []
        self.lon = []
        self.time = []
        self.depth = []

        self.primary_group_name = "PrimaryData"
        self.secondary_group_name = "SecondaryData"
        self.data_map = defaultdict(list)

    def addData(self, result_item):
        """
        Populate DomsNetCDFValueWriter fields from matchup results dict
        """
        non_data_fields = [
            'id', 'lon', 'lat',
            'source', 'device',
            'platform', 'time', 'matches',
            'point', 'fileurl'
        ]
        self.lat.append(result_item.get('lat', None))
        self.lon.append(result_item.get('lon', None))
        self.time.append(time.mktime(result_item.get('time').timetuple()))

        # All other variables are assumed to be science variables.
        # Add DataPoints accordingly.
        for key, value in result_item.items():
            if 'depth' in key:
                self.depth.append(result_item.get(key))
                continue
            if key not in non_data_fields:
                if len(self.data_map[key]) != len(self.lat) - 1:
                    # If the counts mismatch, fill this variable with
                    # None so the data matches the size
                    size_diff = len(self.lat) - len(self.data_map[key]) - 1
                    self.data_map[key].extend([None] * size_diff)
                self.data_map[key].append(value)

        # Check if there are any variables that were not appended to.
        # Append None, meaning that value is empty.
        for var_name in set(self.data_map.keys()) - set(result_item.keys()):
            self.data_map[var_name].append(None)

    def writeGroup(self):
        #
        # Create variables, enrich with attributes, and add data
        #
        lonVar = self.group.createVariable('lon', 'f4', ('dim',), fill_value=-32767.0)
        latVar = self.group.createVariable('lat', 'f4', ('dim',), fill_value=-32767.0)
        timeVar = self.group.createVariable('time', 'f4', ('dim',), fill_value=-32767.0)

        self.__enrichLon(lonVar, min(self.lon), max(self.lon))
        self.__enrichLat(latVar, min(self.lat), max(self.lat))
        self.__enrichTime(timeVar)

        latVar[:] = self.lat
        lonVar[:] = self.lon
        timeVar[:] = self.time

        # Add depth variable, if present
        if self.depth and any(self.depth):
            depthVar = self.group.createVariable('depth', 'f4', ('dim',), fill_value=-32767.0)
            self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth))
            depthVar[:] = self.depth

        for variable_name, data in self.data_map.items():
            units = {}

            variables = dict.fromkeys(
                ((variable['variable_name'], variable['cf_variable_name']) for match in data for variable in match),
                None
            )

            for variable in variables:
                variables[variable] = np.repeat(np.nan, len(data))

            for i, match in enumerate(data):
                for variable in match:
                    key = (variable['variable_name'], variable['cf_variable_name'])
                    unit = variable['variable_unit']
                    units[key] = str(unit) if unit is not None else 'UNKNOWN'
                    variables[key][i] = variable['variable_value']

            for variable in variables:
                # Create a variable for each data point
                name = variable[0]
                cf_name = variable[1]

                data_variable = self.group.createVariable(
                    cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',), fill_value=-32767.0)
                # Find min/max for data variables. It is possible for 'None' to
                # be in this list, so filter those out when doing the calculation.
                min_data = np.nanmin(variables[variable])
                max_data = np.nanmax(variables[variable])
                self.__enrichVariable(data_variable, min_data, max_data, has_depth=None, unit=units[variable])
                data_variable[:] = np.ma.masked_invalid(variables[variable])
                data_variable.long_name = name
                data_variable.standard_name = cf_name

    #
    # Lists may include 'None" values, to calc min these must be filtered out
    #
    @staticmethod
    def __calcMin(var):
        return min(x for x in var if x is not None)

    @staticmethod
    def __enrichVariable(var, var_min, var_max, has_depth, unit='UNKNOWN'):
        coordinates = ['lat', 'lon', 'depth', 'time']

        if not has_depth:
            coordinates = ['lat', 'lon', 'time']

        var.units = unit
        var.valid_min = var_min
        var.valid_max = var_max
        var.coordinates = ' '.join(coordinates)

    #
    # Add attributes to each variable
    #
    @staticmethod
    def __enrichLon(var, var_min, var_max):
        var.long_name = "Longitude"
        var.standard_name = "longitude"
        var.axis = "X"
        var.units = "degrees_east"
        var.valid_min = var_min
        var.valid_max = var_max

    @staticmethod
    def __enrichLat(var, var_min, var_max):
        var.long_name = "Latitude"
        var.standard_name = "latitude"
        var.axis = "Y"
        var.units = "degrees_north"
        var.valid_min = var_min
        var.valid_max = var_max

    @staticmethod
    def __enrichTime(var):
        var.long_name = "Time"
        var.standard_name = "time"
        var.axis = "T"
        var.units = "seconds since 1970-01-01 00:00:00 0:00"

    @staticmethod
    def __enrichDepth(var, var_min, var_max):
        var.valid_min = var_min
        var.valid_max = var_max
        var.units = "m"
        var.long_name = "Depth"
        var.standard_name = "depth"
        var.axis = "Z"
        var.positive = "Down"