# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import base64
import logging
import os

os.environ['master_spark_url'] = ''
os.environ['spark_app_name'] = ''
os.environ['parquet_file_name'] = ''
os.environ['in_situ_schema'] = ''
os.environ['authentication_type'] = ''
os.environ['authentication_key'] = ''
os.environ['parquet_metadata_tbl'] = ''

from parquet_flask.cdms_lambda_func.lambda_func_env import LambdaFuncEnv


class IngestS3Entry:
    BUCKET_NAME_KEY = 'BUCKET_NAME'
    KEY_PREFIX_KEY = 'KEY_PREFIX'

    def __init__(self):
        self.__a = ''

    def __get_args(self) -> argparse.Namespace:
        parser = argparse.ArgumentParser(description="Ingesting 1 or more S3 files into Parquet. Note that AWS environment variables should be set before running this")
        parser.add_argument(f'--{LambdaFuncEnv.CDMS_DOMAIN}',
                            help="CDMS Flask domain where ingestion endpoint resides. Need to include `/insitu` prefix",
                            metavar="http://localhost:9801/insitu",
                            required=True)
        parser.add_argument(f'--{LambdaFuncEnv.CDMS_BEARER_TOKEN}',
                            help="plain-text security token that is set in CDMS Flask pod during K8s deployment. Check in Dockerfile",
                            metavar="mock-token",
                            required=True)
        parser.add_argument(f'--{LambdaFuncEnv.PARQUET_META_TBL_NAME}',
                            help="dynamo DB table where parquet file ingestion records are stored. Check in Values.yaml",
                            metavar="cdms_parquet_meta_dev_v1",
                            required=True)
        parser.add_argument(f'--{self.BUCKET_NAME_KEY}',
                            help="name of S3 bucket",
                            metavar="icoads-bucket",
                            required=True)
        parser.add_argument(f'--{self.KEY_PREFIX_KEY}',
                            help="s3 prefix. It will ingest all files starting with this prefix. If all filees need to be ingested, pass empty value. If only 1 file needs to be ingested, pass the exact file path",
                            metavar='2021/01/01/samplefile.json.gz',
                            required=True)
        parser.add_argument(f'--{LambdaFuncEnv.LOG_LEVEL}',
                            help="python log level in integer.",
                            default='10',
                            metavar='10',
                            required=False)
        return parser.parse_args()

    def start(self):
        options = self.__get_args()
        logging.basicConfig(level=int(getattr(options, LambdaFuncEnv.LOG_LEVEL)),
                            format="%(asctime)s [%(levelname)s] [%(name)s::%(lineno)d] %(message)s")


        os.environ[LambdaFuncEnv.LOG_LEVEL] = getattr(options, LambdaFuncEnv.LOG_LEVEL)
        os.environ[LambdaFuncEnv.CDMS_DOMAIN] = getattr(options, LambdaFuncEnv.CDMS_DOMAIN)
        os.environ[LambdaFuncEnv.CDMS_BEARER_TOKEN] = base64.standard_b64encode(getattr(options, LambdaFuncEnv.CDMS_BEARER_TOKEN).encode()).decode()
        os.environ[LambdaFuncEnv.PARQUET_META_TBL_NAME] = getattr(options, LambdaFuncEnv.PARQUET_META_TBL_NAME)
        bucket_name = getattr(options, self.BUCKET_NAME_KEY)
        key_prefix = getattr(options, self.KEY_PREFIX_KEY)

        from parquet_flask.cdms_lambda_func.ingest_s3_to_cdms.ingest_s3_to_cdms import IngestS3ToCdms
        from parquet_flask.aws.aws_s3 import AwsS3

        s3 = AwsS3()
        for key, size in s3.get_child_s3_files(bucket_name, key_prefix,
                                               lambda x: x['Key'].endswith('.json') or x['Key'].endswith('.json.gz')):
            try:
                print(f'working on: {key}')
                IngestS3ToCdms().start(event={'s3_url': f's3://{bucket_name}/{key}'})
            except Exception as e:
                print(f'error while processing: s3://{bucket_name}/{key}. details: {str(e)}')
        return


if __name__ == '__main__':
    """
    Sample usage: 
    
    python3 -m parquet_cli.ingest_s3 \
      --LOG_LEVEL 30 \
      --CDMS_DOMAIN https://doms.jpl.nasa.gov/insitu  \
      --CDMS_BEARER_TOKEN Mock-Token  \
      --PARQUET_META_TBL_NAME cdms_parquet_meta_dev_v1  \
      --BUCKET_NAME cdms-dev-ncar-in-situ-stage  \
      --KEY_PREFIX cdms_icoads_2017-01-01.json
  
  
    """
    IngestS3Entry().start()