supporting-blog-content/keeping-your-index-current/local_testing.ipynb (507 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "ef344114-c02b-449b-8711-ab9be00c0080",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from getpass import getpass\n",
"import pandas as pd\n",
"from datetime import datetime, timedelta\n",
"from elasticsearch import Elasticsearch, helpers"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6baca258-b116-4bbc-9cba-2841cb754e46",
"metadata": {},
"outputs": [],
"source": [
"def connect_to_nasa():\n",
" url = \"https://api.nasa.gov/neo/rest/v1/feed\"\n",
" nasa_api_key = getpass(\"NASA API Key: \")\n",
" today = datetime.now()\n",
" params = {\n",
" \"api_key\": nasa_api_key,\n",
" \"start_date\": today - timedelta(days=7),\n",
" \"end_date\": datetime.now(),\n",
" }\n",
" return requests.get(url, params).json()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f6e2cfce-505b-4e58-aabc-555d80ccee9e",
"metadata": {},
"outputs": [],
"source": [
"response = connect_to_nasa()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6fcb5d7b-f0c7-4973-a005-c0a724447068",
"metadata": {},
"outputs": [],
"source": [
"def create_df(response):\n",
" all_objects = []\n",
" for date, objects in response[\"near_earth_objects\"].items():\n",
" for obj in objects:\n",
" obj[\"close_approach_date\"] = date\n",
" all_objects.append(obj)\n",
" df = pd.json_normalize(all_objects)\n",
" return df.drop(\"close_approach_data\", axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ad60bb3a-46ad-48d6-849d-d33457a7e0ea",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>neo_reference_id</th>\n",
" <th>name</th>\n",
" <th>nasa_jpl_url</th>\n",
" <th>absolute_magnitude_h</th>\n",
" <th>is_potentially_hazardous_asteroid</th>\n",
" <th>is_sentry_object</th>\n",
" <th>close_approach_date</th>\n",
" <th>links.self</th>\n",
" <th>estimated_diameter.kilometers.estimated_diameter_min</th>\n",
" <th>estimated_diameter.kilometers.estimated_diameter_max</th>\n",
" <th>estimated_diameter.meters.estimated_diameter_min</th>\n",
" <th>estimated_diameter.meters.estimated_diameter_max</th>\n",
" <th>estimated_diameter.miles.estimated_diameter_min</th>\n",
" <th>estimated_diameter.miles.estimated_diameter_max</th>\n",
" <th>estimated_diameter.feet.estimated_diameter_min</th>\n",
" <th>estimated_diameter.feet.estimated_diameter_max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2137924</td>\n",
" <td>2137924</td>\n",
" <td>137924 (2000 BD19)</td>\n",
" <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
" <td>17.51</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2024-02-16</td>\n",
" <td>http://api.nasa.gov/neo/rest/v1/neo/2137924?ap...</td>\n",
" <td>0.836672</td>\n",
" <td>1.870854</td>\n",
" <td>836.671502</td>\n",
" <td>1870.854353</td>\n",
" <td>0.519883</td>\n",
" <td>1.162495</td>\n",
" <td>2744.985330</td>\n",
" <td>6137.973796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2355046</td>\n",
" <td>2355046</td>\n",
" <td>355046 (2006 SO19)</td>\n",
" <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
" <td>19.66</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2024-02-16</td>\n",
" <td>http://api.nasa.gov/neo/rest/v1/neo/2355046?ap...</td>\n",
" <td>0.310853</td>\n",
" <td>0.695088</td>\n",
" <td>310.852938</td>\n",
" <td>695.088301</td>\n",
" <td>0.193155</td>\n",
" <td>0.431908</td>\n",
" <td>1019.858754</td>\n",
" <td>2280.473500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3092138</td>\n",
" <td>3092138</td>\n",
" <td>(1995 FO)</td>\n",
" <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
" <td>20.80</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2024-02-16</td>\n",
" <td>http://api.nasa.gov/neo/rest/v1/neo/3092138?ap...</td>\n",
" <td>0.183889</td>\n",
" <td>0.411188</td>\n",
" <td>183.888672</td>\n",
" <td>411.187571</td>\n",
" <td>0.114263</td>\n",
" <td>0.255500</td>\n",
" <td>603.309311</td>\n",
" <td>1349.040631</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3274166</td>\n",
" <td>3274166</td>\n",
" <td>(2005 EL169)</td>\n",
" <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
" <td>22.04</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2024-02-16</td>\n",
" <td>http://api.nasa.gov/neo/rest/v1/neo/3274166?ap...</td>\n",
" <td>0.103886</td>\n",
" <td>0.232295</td>\n",
" <td>103.885510</td>\n",
" <td>232.295062</td>\n",
" <td>0.064551</td>\n",
" <td>0.144341</td>\n",
" <td>340.831737</td>\n",
" <td>762.122933</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3743895</td>\n",
" <td>3743895</td>\n",
" <td>(2016 CK246)</td>\n",
" <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n",
" <td>21.74</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>2024-02-16</td>\n",
" <td>http://api.nasa.gov/neo/rest/v1/neo/3743895?ap...</td>\n",
" <td>0.119277</td>\n",
" <td>0.266710</td>\n",
" <td>119.276525</td>\n",
" <td>266.710417</td>\n",
" <td>0.074115</td>\n",
" <td>0.165726</td>\n",
" <td>391.327193</td>\n",
" <td>875.034205</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id neo_reference_id name \\\n",
"0 2137924 2137924 137924 (2000 BD19) \n",
"1 2355046 2355046 355046 (2006 SO19) \n",
"2 3092138 3092138 (1995 FO) \n",
"3 3274166 3274166 (2005 EL169) \n",
"4 3743895 3743895 (2016 CK246) \n",
"\n",
" nasa_jpl_url absolute_magnitude_h \\\n",
"0 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 17.51 \n",
"1 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 19.66 \n",
"2 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 20.80 \n",
"3 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 22.04 \n",
"4 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 21.74 \n",
"\n",
" is_potentially_hazardous_asteroid is_sentry_object close_approach_date \\\n",
"0 False False 2024-02-16 \n",
"1 False False 2024-02-16 \n",
"2 False False 2024-02-16 \n",
"3 False False 2024-02-16 \n",
"4 False False 2024-02-16 \n",
"\n",
" links.self \\\n",
"0 http://api.nasa.gov/neo/rest/v1/neo/2137924?ap... \n",
"1 http://api.nasa.gov/neo/rest/v1/neo/2355046?ap... \n",
"2 http://api.nasa.gov/neo/rest/v1/neo/3092138?ap... \n",
"3 http://api.nasa.gov/neo/rest/v1/neo/3274166?ap... \n",
"4 http://api.nasa.gov/neo/rest/v1/neo/3743895?ap... \n",
"\n",
" estimated_diameter.kilometers.estimated_diameter_min \\\n",
"0 0.836672 \n",
"1 0.310853 \n",
"2 0.183889 \n",
"3 0.103886 \n",
"4 0.119277 \n",
"\n",
" estimated_diameter.kilometers.estimated_diameter_max \\\n",
"0 1.870854 \n",
"1 0.695088 \n",
"2 0.411188 \n",
"3 0.232295 \n",
"4 0.266710 \n",
"\n",
" estimated_diameter.meters.estimated_diameter_min \\\n",
"0 836.671502 \n",
"1 310.852938 \n",
"2 183.888672 \n",
"3 103.885510 \n",
"4 119.276525 \n",
"\n",
" estimated_diameter.meters.estimated_diameter_max \\\n",
"0 1870.854353 \n",
"1 695.088301 \n",
"2 411.187571 \n",
"3 232.295062 \n",
"4 266.710417 \n",
"\n",
" estimated_diameter.miles.estimated_diameter_min \\\n",
"0 0.519883 \n",
"1 0.193155 \n",
"2 0.114263 \n",
"3 0.064551 \n",
"4 0.074115 \n",
"\n",
" estimated_diameter.miles.estimated_diameter_max \\\n",
"0 1.162495 \n",
"1 0.431908 \n",
"2 0.255500 \n",
"3 0.144341 \n",
"4 0.165726 \n",
"\n",
" estimated_diameter.feet.estimated_diameter_min \\\n",
"0 2744.985330 \n",
"1 1019.858754 \n",
"2 603.309311 \n",
"3 340.831737 \n",
"4 391.327193 \n",
"\n",
" estimated_diameter.feet.estimated_diameter_max \n",
"0 6137.973796 \n",
"1 2280.473500 \n",
"2 1349.040631 \n",
"3 762.122933 \n",
"4 875.034205 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = create_df(response)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d2bbb54b-0fbe-4c47-bb19-5286f8410779",
"metadata": {},
"outputs": [],
"source": [
"def connect_to_elastic():\n",
" elastic_cloud_id = getpass(\"Elastic Cloud ID: \")\n",
" elastic_api_key = getpass(\"Elastic API Key: \")\n",
" return Elasticsearch(cloud_id=elastic_cloud_id, api_key=elastic_api_key)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8e04aa93-89c1-4719-9912-e09fa401dc0c",
"metadata": {},
"outputs": [],
"source": [
"es = connect_to_elastic()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a675884d-ef9f-4ddd-9c2f-72f577cd6785",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'asteroid_data_set'})"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"index_name = \"asteroid_data_set\"\n",
"es.indices.create(index=index_name)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "91d13bd6-fef5-458d-a533-4508a2096186",
"metadata": {},
"outputs": [],
"source": [
"def doc_generator(df, index_name):\n",
" for index, document in df.iterrows():\n",
" yield {\n",
" \"_index\": index_name,\n",
" \"_id\": f\"{document['id']}\",\n",
" \"_source\": document.to_dict(),\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "aa23b918-b7b7-4d61-975b-1d6d7e6ca2d4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(146, [])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"helpers.bulk(es, doc_generator(df, index_name))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "62be67d5-d880-4d97-882c-b4403dcde4e8",
"metadata": {},
"outputs": [],
"source": [
"def updated_last(es, index_name):\n",
" query = {\n",
" \"size\": 0,\n",
" \"aggs\": {\"last_date\": {\"max\": {\"field\": \"close_approach_date\"}}},\n",
" }\n",
" response = es.search(index=index_name, body=query)\n",
" last_updated_date_string = response[\"aggregations\"][\"last_date\"][\"value_as_string\"]\n",
" datetime_obj = datetime.strptime(last_updated_date_string, \"%Y-%m-%dT%H:%M:%S.%fZ\")\n",
" return datetime_obj.strftime(\"%Y-%m-%d\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7d3e9c0a-af38-4e29-92d6-afe6a1aa489b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-02-23\n"
]
}
],
"source": [
"last_update_date = updated_last(es, index_name)\n",
"print(last_update_date)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c547f76a-8ecf-407a-abce-6295ce6f8445",
"metadata": {},
"outputs": [],
"source": [
"def update_new_data(df, es, last_update_date, index_name):\n",
" if isinstance(last_update_date, str):\n",
" last_update_date = datetime.strptime(last_update_date, \"%Y-%m-%d\")\n",
"\n",
" last_update_date = pd.Timestamp(last_update_date).normalize()\n",
"\n",
" if not df.empty and \"close_approach_date\" in df.columns:\n",
" df[\"close_approach_date\"] = pd.to_datetime(df[\"close_approach_date\"])\n",
"\n",
" today = pd.Timestamp(datetime.now().date()).normalize()\n",
"\n",
" if df is not None and not df.empty:\n",
" update_range = df.loc[\n",
" (df[\"close_approach_date\"] > last_update_date)\n",
" & (df[\"close_approach_date\"] < today)\n",
" ]\n",
" if not update_range.empty:\n",
" helpers.bulk(es, doc_generator(update_range, index_name))\n",
" else:\n",
" print(\"No new data to update.\")\n",
" else:\n",
" print(\"The DataFrame is None.\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c5f94741-47a7-4546-bc3d-5962d293f182",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No new data to update.\n",
"2024-02-23\n"
]
}
],
"source": [
"try:\n",
" if df is None:\n",
" raise ValueError(\"DataFrame is None. There may be a problem.\")\n",
" update_new_data(df, es, last_update_date, index_name)\n",
" print(updated_last(es, index_name))\n",
"except Exception as e:\n",
" print(f\"An error occurred: {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}