supporting-blog-content/keeping-your-index-current/local_testing.ipynb (507 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ef344114-c02b-449b-8711-ab9be00c0080", "metadata": {}, "outputs": [], "source": [ "import requests\n", "from getpass import getpass\n", "import pandas as pd\n", "from datetime import datetime, timedelta\n", "from elasticsearch import Elasticsearch, helpers" ] }, { "cell_type": "code", "execution_count": 2, "id": "6baca258-b116-4bbc-9cba-2841cb754e46", "metadata": {}, "outputs": [], "source": [ "def connect_to_nasa():\n", " url = \"https://api.nasa.gov/neo/rest/v1/feed\"\n", " nasa_api_key = getpass(\"NASA API Key: \")\n", " today = datetime.now()\n", " params = {\n", " \"api_key\": nasa_api_key,\n", " \"start_date\": today - timedelta(days=7),\n", " \"end_date\": datetime.now(),\n", " }\n", " return requests.get(url, params).json()" ] }, { "cell_type": "code", "execution_count": 3, "id": "f6e2cfce-505b-4e58-aabc-555d80ccee9e", "metadata": {}, "outputs": [], "source": [ "response = connect_to_nasa()" ] }, { "cell_type": "code", "execution_count": 4, "id": "6fcb5d7b-f0c7-4973-a005-c0a724447068", "metadata": {}, "outputs": [], "source": [ "def create_df(response):\n", " all_objects = []\n", " for date, objects in response[\"near_earth_objects\"].items():\n", " for obj in objects:\n", " obj[\"close_approach_date\"] = date\n", " all_objects.append(obj)\n", " df = pd.json_normalize(all_objects)\n", " return df.drop(\"close_approach_data\", axis=1)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ad60bb3a-46ad-48d6-849d-d33457a7e0ea", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>neo_reference_id</th>\n", " <th>name</th>\n", " <th>nasa_jpl_url</th>\n", " <th>absolute_magnitude_h</th>\n", " <th>is_potentially_hazardous_asteroid</th>\n", " <th>is_sentry_object</th>\n", " <th>close_approach_date</th>\n", " <th>links.self</th>\n", " <th>estimated_diameter.kilometers.estimated_diameter_min</th>\n", " <th>estimated_diameter.kilometers.estimated_diameter_max</th>\n", " <th>estimated_diameter.meters.estimated_diameter_min</th>\n", " <th>estimated_diameter.meters.estimated_diameter_max</th>\n", " <th>estimated_diameter.miles.estimated_diameter_min</th>\n", " <th>estimated_diameter.miles.estimated_diameter_max</th>\n", " <th>estimated_diameter.feet.estimated_diameter_min</th>\n", " <th>estimated_diameter.feet.estimated_diameter_max</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2137924</td>\n", " <td>2137924</td>\n", " <td>137924 (2000 BD19)</td>\n", " <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n", " <td>17.51</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>2024-02-16</td>\n", " <td>http://api.nasa.gov/neo/rest/v1/neo/2137924?ap...</td>\n", " <td>0.836672</td>\n", " <td>1.870854</td>\n", " <td>836.671502</td>\n", " <td>1870.854353</td>\n", " <td>0.519883</td>\n", " <td>1.162495</td>\n", " <td>2744.985330</td>\n", " <td>6137.973796</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2355046</td>\n", " <td>2355046</td>\n", " <td>355046 (2006 SO19)</td>\n", " <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n", " <td>19.66</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>2024-02-16</td>\n", " <td>http://api.nasa.gov/neo/rest/v1/neo/2355046?ap...</td>\n", " <td>0.310853</td>\n", " <td>0.695088</td>\n", " <td>310.852938</td>\n", " <td>695.088301</td>\n", " <td>0.193155</td>\n", " <td>0.431908</td>\n", " <td>1019.858754</td>\n", " <td>2280.473500</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3092138</td>\n", " <td>3092138</td>\n", " <td>(1995 FO)</td>\n", " <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n", " <td>20.80</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>2024-02-16</td>\n", " <td>http://api.nasa.gov/neo/rest/v1/neo/3092138?ap...</td>\n", " <td>0.183889</td>\n", " <td>0.411188</td>\n", " <td>183.888672</td>\n", " <td>411.187571</td>\n", " <td>0.114263</td>\n", " <td>0.255500</td>\n", " <td>603.309311</td>\n", " <td>1349.040631</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3274166</td>\n", " <td>3274166</td>\n", " <td>(2005 EL169)</td>\n", " <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n", " <td>22.04</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>2024-02-16</td>\n", " <td>http://api.nasa.gov/neo/rest/v1/neo/3274166?ap...</td>\n", " <td>0.103886</td>\n", " <td>0.232295</td>\n", " <td>103.885510</td>\n", " <td>232.295062</td>\n", " <td>0.064551</td>\n", " <td>0.144341</td>\n", " <td>340.831737</td>\n", " <td>762.122933</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>3743895</td>\n", " <td>3743895</td>\n", " <td>(2016 CK246)</td>\n", " <td>https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm...</td>\n", " <td>21.74</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>2024-02-16</td>\n", " <td>http://api.nasa.gov/neo/rest/v1/neo/3743895?ap...</td>\n", " <td>0.119277</td>\n", " <td>0.266710</td>\n", " <td>119.276525</td>\n", " <td>266.710417</td>\n", " <td>0.074115</td>\n", " <td>0.165726</td>\n", " <td>391.327193</td>\n", " <td>875.034205</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " id neo_reference_id name \\\n", "0 2137924 2137924 137924 (2000 BD19) \n", "1 2355046 2355046 355046 (2006 SO19) \n", "2 3092138 3092138 (1995 FO) \n", "3 3274166 3274166 (2005 EL169) \n", "4 3743895 3743895 (2016 CK246) \n", "\n", " nasa_jpl_url absolute_magnitude_h \\\n", "0 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 17.51 \n", "1 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 19.66 \n", "2 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 20.80 \n", "3 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 22.04 \n", "4 https://ssd.jpl.nasa.gov/tools/sbdb_lookup.htm... 21.74 \n", "\n", " is_potentially_hazardous_asteroid is_sentry_object close_approach_date \\\n", "0 False False 2024-02-16 \n", "1 False False 2024-02-16 \n", "2 False False 2024-02-16 \n", "3 False False 2024-02-16 \n", "4 False False 2024-02-16 \n", "\n", " links.self \\\n", "0 http://api.nasa.gov/neo/rest/v1/neo/2137924?ap... \n", "1 http://api.nasa.gov/neo/rest/v1/neo/2355046?ap... \n", "2 http://api.nasa.gov/neo/rest/v1/neo/3092138?ap... \n", "3 http://api.nasa.gov/neo/rest/v1/neo/3274166?ap... \n", "4 http://api.nasa.gov/neo/rest/v1/neo/3743895?ap... \n", "\n", " estimated_diameter.kilometers.estimated_diameter_min \\\n", "0 0.836672 \n", "1 0.310853 \n", "2 0.183889 \n", "3 0.103886 \n", "4 0.119277 \n", "\n", " estimated_diameter.kilometers.estimated_diameter_max \\\n", "0 1.870854 \n", "1 0.695088 \n", "2 0.411188 \n", "3 0.232295 \n", "4 0.266710 \n", "\n", " estimated_diameter.meters.estimated_diameter_min \\\n", "0 836.671502 \n", "1 310.852938 \n", "2 183.888672 \n", "3 103.885510 \n", "4 119.276525 \n", "\n", " estimated_diameter.meters.estimated_diameter_max \\\n", "0 1870.854353 \n", "1 695.088301 \n", "2 411.187571 \n", "3 232.295062 \n", "4 266.710417 \n", "\n", " estimated_diameter.miles.estimated_diameter_min \\\n", "0 0.519883 \n", "1 0.193155 \n", "2 0.114263 \n", "3 0.064551 \n", "4 0.074115 \n", "\n", " estimated_diameter.miles.estimated_diameter_max \\\n", "0 1.162495 \n", "1 0.431908 \n", "2 0.255500 \n", "3 0.144341 \n", "4 0.165726 \n", "\n", " estimated_diameter.feet.estimated_diameter_min \\\n", "0 2744.985330 \n", "1 1019.858754 \n", "2 603.309311 \n", "3 340.831737 \n", "4 391.327193 \n", "\n", " estimated_diameter.feet.estimated_diameter_max \n", "0 6137.973796 \n", "1 2280.473500 \n", "2 1349.040631 \n", "3 762.122933 \n", "4 875.034205 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = create_df(response)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "d2bbb54b-0fbe-4c47-bb19-5286f8410779", "metadata": {}, "outputs": [], "source": [ "def connect_to_elastic():\n", " elastic_cloud_id = getpass(\"Elastic Cloud ID: \")\n", " elastic_api_key = getpass(\"Elastic API Key: \")\n", " return Elasticsearch(cloud_id=elastic_cloud_id, api_key=elastic_api_key)" ] }, { "cell_type": "code", "execution_count": 7, "id": "8e04aa93-89c1-4719-9912-e09fa401dc0c", "metadata": {}, "outputs": [], "source": [ "es = connect_to_elastic()" ] }, { "cell_type": "code", "execution_count": 8, "id": "a675884d-ef9f-4ddd-9c2f-72f577cd6785", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'asteroid_data_set'})" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index_name = \"asteroid_data_set\"\n", "es.indices.create(index=index_name)" ] }, { "cell_type": "code", "execution_count": 9, "id": "91d13bd6-fef5-458d-a533-4508a2096186", "metadata": {}, "outputs": [], "source": [ "def doc_generator(df, index_name):\n", " for index, document in df.iterrows():\n", " yield {\n", " \"_index\": index_name,\n", " \"_id\": f\"{document['id']}\",\n", " \"_source\": document.to_dict(),\n", " }" ] }, { "cell_type": "code", "execution_count": 10, "id": "aa23b918-b7b7-4d61-975b-1d6d7e6ca2d4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(146, [])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "helpers.bulk(es, doc_generator(df, index_name))" ] }, { "cell_type": "code", "execution_count": 11, "id": "62be67d5-d880-4d97-882c-b4403dcde4e8", "metadata": {}, "outputs": [], "source": [ "def updated_last(es, index_name):\n", " query = {\n", " \"size\": 0,\n", " \"aggs\": {\"last_date\": {\"max\": {\"field\": \"close_approach_date\"}}},\n", " }\n", " response = es.search(index=index_name, body=query)\n", " last_updated_date_string = response[\"aggregations\"][\"last_date\"][\"value_as_string\"]\n", " datetime_obj = datetime.strptime(last_updated_date_string, \"%Y-%m-%dT%H:%M:%S.%fZ\")\n", " return datetime_obj.strftime(\"%Y-%m-%d\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "7d3e9c0a-af38-4e29-92d6-afe6a1aa489b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2024-02-23\n" ] } ], "source": [ "last_update_date = updated_last(es, index_name)\n", "print(last_update_date)" ] }, { "cell_type": "code", "execution_count": 13, "id": "c547f76a-8ecf-407a-abce-6295ce6f8445", "metadata": {}, "outputs": [], "source": [ "def update_new_data(df, es, last_update_date, index_name):\n", " if isinstance(last_update_date, str):\n", " last_update_date = datetime.strptime(last_update_date, \"%Y-%m-%d\")\n", "\n", " last_update_date = pd.Timestamp(last_update_date).normalize()\n", "\n", " if not df.empty and \"close_approach_date\" in df.columns:\n", " df[\"close_approach_date\"] = pd.to_datetime(df[\"close_approach_date\"])\n", "\n", " today = pd.Timestamp(datetime.now().date()).normalize()\n", "\n", " if df is not None and not df.empty:\n", " update_range = df.loc[\n", " (df[\"close_approach_date\"] > last_update_date)\n", " & (df[\"close_approach_date\"] < today)\n", " ]\n", " if not update_range.empty:\n", " helpers.bulk(es, doc_generator(update_range, index_name))\n", " else:\n", " print(\"No new data to update.\")\n", " else:\n", " print(\"The DataFrame is None.\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "c5f94741-47a7-4546-bc3d-5962d293f182", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No new data to update.\n", "2024-02-23\n" ] } ], "source": [ "try:\n", " if df is None:\n", " raise ValueError(\"DataFrame is None. There may be a problem.\")\n", " update_new_data(df, es, last_update_date, index_name)\n", " print(updated_last(es, index_name))\n", "except Exception as e:\n", " print(f\"An error occurred: {e}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 5 }