backend/time-series-forecasting/services/dataset_service.py (81 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import List, Optional
from models import dataset
DATASETS = [
dataset.CSVDataset(
"sample_data/sales_forecasting.csv",
display_name="Retail Sales",
time_column="date",
description="This is sales data from a fictional sporting goods company with several stores across the city. It includes sales data for several products, grouped in several categories.",
icon="storefront",
recommended_model_parameters={
"bqml_arimaplus": {
"targetColumn": "sales",
"timeColumn": "date",
"timeSeriesIdentifierColumn": "product_at_store",
"dataGranularityUnit": "day",
"dataGranularityCount": 1,
},
"automl-forecasting": {
"targetColumn": "sales",
"timeColumn": "date",
"timeSeriesIdentifierColumn": "product_at_store",
"dataGranularityUnit": "day",
"dataGranularityCount": 1,
"timeSeriesAttributeColumns": [
"product_type",
"product_category",
"store",
"product",
],
"columnSpecs": {
"date": "timestamp",
"sales": "numeric",
"product_type": "categorical",
"product_category": "categorical",
"product": "categorical",
"store": "categorical",
},
},
},
recommended_prediction_parameters={
"bqml_arimaplus": {
"forecastHorizon": 120,
},
"automl-forecasting": {"forecastHorizon": 30, "contextWindow": 30},
},
),
dataset.CSVDataset(
"sample_data/iowa_liquor_sales.csv",
display_name="Iowa Liquor Sales",
time_column="date",
description="This dataset contains the spirits purchase information of Iowa Class “E” liquor licensees by product and date of purchase. This dataset was simplified for demonstration purposes.",
icon="liquor",
recommended_model_parameters={
"bqml_arimaplus": {
"targetColumn": "sale_dollars",
"timeColumn": "date",
"timeSeriesIdentifierColumn": "county_and_city",
"dataFrequency": "daily",
}
},
recommended_prediction_parameters={
"bqml_arimaplus": {
"forecastHorizon": 120,
}
},
),
]
def get_datasets() -> List[dataset.Dataset]:
return DATASETS
def get_dataset(dataset_id: str) -> Optional[dataset.Dataset]:
"""Get the dataset given the dataset_id.
Args:
dataset_id (str): Dataset id.
Returns:
Optional[dataset.Dataset]: The dataset.
"""
target_dataset = None
for dataset in get_datasets():
if str(dataset.id) == dataset_id:
target_dataset = dataset
break
if target_dataset is not None:
return target_dataset
else:
logging.error(f"Dataset id {dataset_id} does not exist!")
return None