one_offs/trigger.s3.ingest.py (19 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from flask import Flask
from parquet_flask.cdms_lambda_func.lambda_func_env import LambdaFuncEnv
if __name__ == '__main__':
# app = Flask('my_app')
# os.environ['master_spark_url'] = ''
# os.environ['spark_app_name'] = ''
# os.environ['parquet_file_name'] = ''
# os.environ['aws_access_key_id'] = ''
# os.environ['aws_secret_access_key'] = ''
# os.environ['aws_session_token'] = ''
# os.environ['in_situ_schema'] = ''
# os.environ['authentication_type'] = ''
# os.environ['authentication_key'] = ''
# os.environ['parquet_metadata_tbl'] = ''
os.environ[LambdaFuncEnv.LOG_LEVEL] = '20'
os.environ[LambdaFuncEnv.CDMS_DOMAIN] = 'http://localhost:9801/insitu'
os.environ[LambdaFuncEnv.CDMS_BEARER_TOKEN] = 'aaa'
os.environ[LambdaFuncEnv.PARQUET_META_TBL_NAME] = 'cdms_parquet_meta_dev_v1'
from parquet_flask.cdms_lambda_func.ingest_s3_to_cdms.ingest_s3_to_cdms import IngestS3ToCdms
from parquet_flask.aws.aws_s3 import AwsS3
# local_file_path = s3.set_s3_url('s3://cdms-dev-fsu-in-situ-stage/KAOU_20170222.json.gz').download('/tmp')
# bucket_name = 'nasa-cdms.saildrone.com'
# key_prefix = '1021_atlantic'
# bucket_name = 'cdms-dev-fsu-in-situ-stage'
# key_prefix = ''
bucket_name = 'cdms-dev-ncar-in-situ-stage'
key_prefix = ''
s3 = AwsS3()
for key, size in s3.get_child_s3_files(bucket_name, key_prefix, lambda x: x['Key'].endswith('.json') or x['Key'].endswith('.json.gz')):
try:
print(f'working on: {key}')
# print(s3.set_s3_url(f's3://cdms-dev-fsu-in-situ-stage/{key}').get_s3_obj_size())
print(IngestS3ToCdms().start(event={'s3_url': f's3://{bucket_name}/{key}'}))
except Exception as e:
print(e)