In [1]:
import requests
from getpass import getpass
import pandas as pd
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch, helpers

In [2]:
def connect_to_nasa():
    url = "https://api.nasa.gov/neo/rest/v1/feed"
    nasa_api_key = getpass("NASA API Key: ")
    today = datetime.now()
    params = {
        "api_key": nasa_api_key,
        "start_date": today - timedelta(days=7),
        "end_date": datetime.now(),
    }
    return requests.get(url, params).json()

In [3]:
response = connect_to_nasa()

In [4]:
def create_df(response):
    all_objects = []
    for date, objects in response["near_earth_objects"].items():
        for obj in objects:
            obj["close_approach_date"] = date
            all_objects.append(obj)
    df = pd.json_normalize(all_objects)
    return df.drop("close_approach_data", axis=1)

In [5]:
df = create_df(response)
df.head()

Unnamed: 0,id,neo_reference_id,name,nasa_jpl_url,absolute_magnitude_h,is_potentially_hazardous_asteroid,is_sentry_object,close_approach_date,links.self,estimated_diameter.kilometers.estimated_diameter_min,estimated_diameter.kilometers.estimated_diameter_max,estimated_diameter.meters.estimated_diameter_min,estimated_diameter.meters.estimated_diameter_max,estimated_diameter.miles.estimated_diameter_min,estimated_diameter.miles.estimated_diameter_max,estimated_diameter.feet.estimated_diameter_min,estimated_diameter.feet.estimated_diameter_max
0,2137924,2137924,137924 (2000 BD19),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,17.51,False,False,2024-02-16,http://api.nasa.gov/neo/rest/v1/neo/2137924?ap...,0.836672,1.870854,836.671502,1870.854353,0.519883,1.162495,2744.98533,6137.973796
1,2355046,2355046,355046 (2006 SO19),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,19.66,False,False,2024-02-16,http://api.nasa.gov/neo/rest/v1/neo/2355046?ap...,0.310853,0.695088,310.852938,695.088301,0.193155,0.431908,1019.858754,2280.4735
2,3092138,3092138,(1995 FO),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,20.8,False,False,2024-02-16,http://api.nasa.gov/neo/rest/v1/neo/3092138?ap...,0.183889,0.411188,183.888672,411.187571,0.114263,0.2555,603.309311,1349.040631
3,3274166,3274166,(2005 EL169),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,22.04,False,False,2024-02-16,http://api.nasa.gov/neo/rest/v1/neo/3274166?ap...,0.103886,0.232295,103.88551,232.295062,0.064551,0.144341,340.831737,762.122933
4,3743895,3743895,(2016 CK246),https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...,21.74,False,False,2024-02-16,http://api.nasa.gov/neo/rest/v1/neo/3743895?ap...,0.119277,0.26671,119.276525,266.710417,0.074115,0.165726,391.327193,875.034205


In [6]:
def connect_to_elastic():
    elastic_cloud_id = getpass("Elastic Cloud ID: ")
    elastic_api_key = getpass("Elastic API Key: ")
    return Elasticsearch(cloud_id=elastic_cloud_id, api_key=elastic_api_key)

In [7]:
es = connect_to_elastic()

In [8]:
index_name = "asteroid_data_set"
es.indices.create(index=index_name)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'asteroid_data_set'})

In [9]:
def doc_generator(df, index_name):
    for index, document in df.iterrows():
        yield {
            "_index": index_name,
            "_id": f"{document['id']}",
            "_source": document.to_dict(),
        }

In [10]:
helpers.bulk(es, doc_generator(df, index_name))

(146, [])

In [11]:
def updated_last(es, index_name):
    query = {
        "size": 0,
        "aggs": {"last_date": {"max": {"field": "close_approach_date"}}},
    }
    response = es.search(index=index_name, body=query)
    last_updated_date_string = response["aggregations"]["last_date"]["value_as_string"]
    datetime_obj = datetime.strptime(last_updated_date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
    return datetime_obj.strftime("%Y-%m-%d")

In [12]:
last_update_date = updated_last(es, index_name)
print(last_update_date)

2024-02-23


In [13]:
def update_new_data(df, es, last_update_date, index_name):
    if isinstance(last_update_date, str):
        last_update_date = datetime.strptime(last_update_date, "%Y-%m-%d")

    last_update_date = pd.Timestamp(last_update_date).normalize()

    if not df.empty and "close_approach_date" in df.columns:
        df["close_approach_date"] = pd.to_datetime(df["close_approach_date"])

    today = pd.Timestamp(datetime.now().date()).normalize()

    if df is not None and not df.empty:
        update_range = df.loc[
            (df["close_approach_date"] > last_update_date)
            & (df["close_approach_date"] < today)
        ]
        if not update_range.empty:
            helpers.bulk(es, doc_generator(update_range, index_name))
        else:
            print("No new data to update.")
    else:
        print("The DataFrame is None.")

In [14]:
try:
    if df is None:
        raise ValueError("DataFrame is None. There may be a problem.")
    update_new_data(df, es, last_update_date, index_name)
    print(updated_last(es, index_name))
except Exception as e:
    print(f"An error occurred: {e}")

No new data to update.
2024-02-23
