src/gluonts/dataset/artificial/_base.py [47:282]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
)


class DatasetInfo(NamedTuple):
    """
    Information stored on a dataset. When downloading from the repository, the
    dataset repository checks that the obtained version matches the one
    declared in dataset_info/dataset_name.json.
    """

    name: str
    metadata: MetaData
    prediction_length: int
    train_statistics: DatasetStatistics
    test_statistics: DatasetStatistics


class ArtificialDataset:
    """
    Parent class of a dataset that can be generated from code.
    """

    def __init__(self, freq) -> None:
        self.freq = freq

    @property
    def metadata(self) -> MetaData:
        pass

    @property
    def train(self) -> List[DataEntry]:
        pass

    @property
    def test(self) -> List[DataEntry]:
        pass

    # todo return the same type as dataset repo for better usability
    def generate(self) -> TrainDatasets:
        return TrainDatasets(
            metadata=self.metadata,
            train=ListDataset(self.train, self.freq),
            test=ListDataset(self.test, self.freq),
        )


class ConstantDataset(ArtificialDataset):
    def __init__(
        self,
        num_timeseries: int = 10,
        num_steps: int = 30,
        freq: str = "1H",
        start: str = "2000-01-01 00:00:00",
        is_nan: bool = False,  # Generates constant dataset of 0s with explicit NaN missing values
        is_random_constant: bool = False,  # Inserts random constant value for each time series
        is_different_scales: bool = False,  # Generates constants on various scales
        is_piecewise: bool = False,  # Determines whether the time series in the test
        # and train set should have different constant values
        is_noise: bool = False,  # Determines whether to add Gaussian noise to the constant dataset
        is_long: bool = False,  # Determines whether some time series will have very long lengths
        is_short: bool = False,  # Determines whether some time series will have very short lengths
        is_trend: bool = False,  # Determines whether to add linear trends
        num_missing_middle: int = 0,  # Number of missing values in the middle of the time series
        is_promotions: bool = False,  # Determines whether to add promotions to the target time series
        # and to store in metadata
        holidays: Optional[
            List[pd.Timestamp]
        ] = None,  # Determines whether to add holidays to the target time series
        # and to store in metadata
    ) -> None:
        super(ConstantDataset, self).__init__(freq)
        self.num_timeseries = num_timeseries
        self.num_steps = num_steps
        self.num_training_steps = self.num_steps // 10 * 8
        self.prediction_length = self.num_steps - self.num_training_steps
        self.start = start
        self.is_nan = is_nan
        self.is_random_constant = is_random_constant
        self.is_different_scales = is_different_scales
        self.is_piecewise = is_piecewise
        self.is_noise = is_noise
        self.is_long = is_long
        self.is_short = is_short
        self.is_trend = is_trend
        self.num_missing_middle = num_missing_middle
        self.is_promotions = is_promotions
        self.holidays = holidays

    @property
    def metadata(self) -> MetaData:
        metadata = MetaData(
            freq=self.freq,
            feat_static_cat=[
                {
                    "name": "feat_static_cat_000",
                    "cardinality": str(self.num_timeseries),
                }
            ],
            feat_static_real=[{"name": "feat_static_real_000"}],
            prediction_length=self.prediction_length,
        )
        if self.is_promotions or self.holidays:
            metadata = MetaData(
                freq=self.freq,
                feat_static_cat=[
                    {
                        "name": "feat_static_cat_000",
                        "cardinality": str(self.num_timeseries),
                    }
                ],
                feat_static_real=[{"name": "feat_static_real_000"}],
                feat_dynamic_real=[
                    BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL)
                ],
                prediction_length=self.prediction_length,
            )
        return metadata

    def determine_constant(
        self, index: int, constant: Optional[float] = None, seed: int = 1
    ) -> Optional[float]:
        if self.is_random_constant:
            my_random = random.Random(seed)
            constant = (index + 1) * my_random.random()
        elif self.is_different_scales:
            if index == 0:
                constant = 1e-8
            elif constant is not None:
                constant *= 100
        else:
            constant = float(index)
        return constant

    def compute_data_from_recipe(
        self,
        num_steps: int,
        constant: Optional[float] = None,
        one_to_zero: float = 0.1,
        zero_to_one: float = 0.1,
        scale_features: float = 200,
    ) -> TrainDatasets:
        recipe = []
        recipe_type = Constant(constant)
        if self.is_noise:
            recipe_type += RandomGaussian()  # Use default stddev = 1.0
        if self.is_trend:
            recipe_type += LinearTrend()
        if self.is_promotions:
            recipe.append(
                ("binary_causal", BinaryMarkovChain(one_to_zero, zero_to_one))
            )
            recipe.append(
                (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_causal"]))
            )
            recipe_type += scale_features * Lag("binary_causal", lag=0)
        if self.holidays:
            timestamp = self.init_date()
            # Compute dates array
            dates = []
            for i in range(num_steps):
                dates.append(timestamp)
                timestamp += 1
            recipe.append(
                ("binary_holidays", BinaryHolidays(dates, self.holidays))
            )
            recipe.append(
                (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_holidays"]))
            )
            recipe_type += scale_features * Lag("binary_holidays", lag=0)
        recipe.append((FieldName.TARGET, recipe_type))
        max_train_length = num_steps - self.prediction_length
        data = RecipeDataset(
            recipe=recipe,
            metadata=self.metadata,
            max_train_length=max_train_length,
            prediction_length=self.prediction_length,
            num_timeseries=1,  # Add 1 time series at a time in the loop for different constant valus per time series
        )
        generated = data.generate()
        return generated

    def piecewise_constant(self, index: int, num_steps: int) -> List:
        target = []
        for j in range(num_steps):
            if j < self.num_training_steps:
                constant = self.determine_constant(index=index)
            else:
                constant = self.determine_constant(index=index, seed=2)
            target.append(constant)
        return target

    def get_num_steps(
        self,
        index: int,
        num_steps_max: int = 10000,
        long_freq: int = 4,
        num_steps_min: int = 2,
        short_freq: int = 4,
    ) -> int:
        num_steps = self.num_steps
        if self.is_long and index % long_freq == 0:
            num_steps = num_steps_max
        elif self.is_short and index % short_freq == 0:
            num_steps = num_steps_min
        return num_steps

    def init_date(self) -> pd.Timestamp:
        week_dict = {
            0: "MON",
            1: "TUE",
            2: "WED",
            3: "THU",
            4: "FRI",
            5: "SAT",
            6: "SUN",
        }
        timestamp = pd.Timestamp(self.start)
        freq_week_start = self.freq
        if freq_week_start == "W":
            freq_week_start = f"W-{week_dict[timestamp.weekday()]}"
        return pd.Timestamp(self.start, freq=freq_week_start)

    @staticmethod
    def insert_nans_and_zeros(ts_len: int) -> List:
        target = []
        for j in range(ts_len):
            # Place NaNs at even indices. Use convention no NaNs before start date.
            if j != 0 and j % 2 == 0:
                target.append(np.nan)
            # Place zeros at odd indices
            else:
                target.append(0.0)
        return target

    def insert_missing_vals_middle(
        self, ts_len: int, constant: Optional[float]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/gluonts/nursery/SCott/pts/dataset/artificial.py [46:282]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
)
from .stat import DatasetStatistics, calculate_dataset_statistics


class DatasetInfo(NamedTuple):
    """
    Information stored on a dataset. When downloading from the repository, the
    dataset repository checks that the obtained version matches the one
    declared in dataset_info/dataset_name.json.
    """

    name: str
    metadata: MetaData
    prediction_length: int
    train_statistics: DatasetStatistics
    test_statistics: DatasetStatistics


class ArtificialDataset:
    """
    Parent class of a dataset that can be generated from code.
    """

    def __init__(self, freq) -> None:
        self.freq = freq

    @property
    def metadata(self) -> MetaData:
        pass

    @property
    def train(self) -> List[DataEntry]:
        pass

    @property
    def test(self) -> List[DataEntry]:
        pass

    # todo return the same type as dataset repo for better usability
    def generate(self) -> TrainDatasets:
        return TrainDatasets(
            metadata=self.metadata,
            train=ListDataset(self.train, self.freq),
            test=ListDataset(self.test, self.freq),
        )


class ConstantDataset(ArtificialDataset):
    def __init__(
        self,
        num_timeseries: int = 10,
        num_steps: int = 30,
        freq: str = "1H",
        start: str = "2000-01-01 00:00:00",
        is_nan: bool = False,  # Generates constant dataset of 0s with explicit NaN missing values
        is_random_constant: bool = False,  # Inserts random constant value for each time series
        is_different_scales: bool = False,  # Generates constants on various scales
        is_piecewise: bool = False,  # Determines whether the time series in the test
        # and train set should have different constant values
        is_noise: bool = False,  # Determines whether to add Gaussian noise to the constant dataset
        is_long: bool = False,  # Determines whether some time series will have very long lengths
        is_short: bool = False,  # Determines whether some time series will have very short lengths
        is_trend: bool = False,  # Determines whether to add linear trends
        num_missing_middle: int = 0,  # Number of missing values in the middle of the time series
        is_promotions: bool = False,  # Determines whether to add promotions to the target time series
        # and to store in metadata
        holidays: Optional[
            List[pd.Timestamp]
        ] = None,  # Determines whether to add holidays to the target time series
        # and to store in metadata
    ) -> None:
        super(ConstantDataset, self).__init__(freq)
        self.num_timeseries = num_timeseries
        self.num_steps = num_steps
        self.num_training_steps = self.num_steps // 10 * 8
        self.prediction_length = self.num_steps - self.num_training_steps
        self.start = start
        self.is_nan = is_nan
        self.is_random_constant = is_random_constant
        self.is_different_scales = is_different_scales
        self.is_piecewise = is_piecewise
        self.is_noise = is_noise
        self.is_long = is_long
        self.is_short = is_short
        self.is_trend = is_trend
        self.num_missing_middle = num_missing_middle
        self.is_promotions = is_promotions
        self.holidays = holidays

    @property
    def metadata(self) -> MetaData:
        metadata = MetaData(
            freq=self.freq,
            feat_static_cat=[
                {
                    "name": "feat_static_cat_000",
                    "cardinality": str(self.num_timeseries),
                }
            ],
            feat_static_real=[{"name": "feat_static_real_000"}],
            prediction_length=self.prediction_length,
        )
        if self.is_promotions or self.holidays:
            metadata = MetaData(
                freq=self.freq,
                feat_static_cat=[
                    {
                        "name": "feat_static_cat_000",
                        "cardinality": str(self.num_timeseries),
                    }
                ],
                feat_static_real=[{"name": "feat_static_real_000"}],
                feat_dynamic_real=[
                    BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL)
                ],
                prediction_length=self.prediction_length,
            )
        return metadata

    def determine_constant(
        self, index: int, constant: Optional[float] = None, seed: int = 1
    ) -> Optional[float]:
        if self.is_random_constant:
            my_random = random.Random(seed)
            constant = (index + 1) * my_random.random()
        elif self.is_different_scales:
            if index == 0:
                constant = 1e-8
            elif constant is not None:
                constant *= 100
        else:
            constant = float(index)
        return constant

    def compute_data_from_recipe(
        self,
        num_steps: int,
        constant: Optional[float] = None,
        one_to_zero: float = 0.1,
        zero_to_one: float = 0.1,
        scale_features: float = 200,
    ) -> TrainDatasets:
        recipe = []
        recipe_type = Constant(constant)
        if self.is_noise:
            recipe_type += RandomGaussian()  # Use default stddev = 1.0
        if self.is_trend:
            recipe_type += LinearTrend()
        if self.is_promotions:
            recipe.append(
                ("binary_causal", BinaryMarkovChain(one_to_zero, zero_to_one))
            )
            recipe.append(
                (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_causal"]))
            )
            recipe_type += scale_features * Lag("binary_causal", lag=0)
        if self.holidays:
            timestamp = self.init_date()
            # Compute dates array
            dates = []
            for i in range(num_steps):
                dates.append(timestamp)
                timestamp += 1
            recipe.append(
                ("binary_holidays", BinaryHolidays(dates, self.holidays))
            )
            recipe.append(
                (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_holidays"]))
            )
            recipe_type += scale_features * Lag("binary_holidays", lag=0)
        recipe.append((FieldName.TARGET, recipe_type))
        max_train_length = num_steps - self.prediction_length
        data = RecipeDataset(
            recipe=recipe,
            metadata=self.metadata,
            max_train_length=max_train_length,
            prediction_length=self.prediction_length,
            num_timeseries=1,  # Add 1 time series at a time in the loop for different constant valus per time series
        )
        generated = data.generate()
        return generated

    def piecewise_constant(self, index: int, num_steps: int) -> List:
        target = []
        for j in range(num_steps):
            if j < self.num_training_steps:
                constant = self.determine_constant(index=index)
            else:
                constant = self.determine_constant(index=index, seed=2)
            target.append(constant)
        return target

    def get_num_steps(
        self,
        index: int,
        num_steps_max: int = 10000,
        long_freq: int = 4,
        num_steps_min: int = 2,
        short_freq: int = 4,
    ) -> int:
        num_steps = self.num_steps
        if self.is_long and index % long_freq == 0:
            num_steps = num_steps_max
        elif self.is_short and index % short_freq == 0:
            num_steps = num_steps_min
        return num_steps

    def init_date(self) -> pd.Timestamp:
        week_dict = {
            0: "MON",
            1: "TUE",
            2: "WED",
            3: "THU",
            4: "FRI",
            5: "SAT",
            6: "SUN",
        }
        timestamp = pd.Timestamp(self.start)
        freq_week_start = self.freq
        if freq_week_start == "W":
            freq_week_start = f"W-{week_dict[timestamp.weekday()]}"
        return pd.Timestamp(self.start, freq=freq_week_start)

    @staticmethod
    def insert_nans_and_zeros(ts_len: int) -> List:
        target = []
        for j in range(ts_len):
            # Place NaNs at even indices. Use convention no NaNs before start date.
            if j != 0 and j % 2 == 0:
                target.append(np.nan)
            # Place zeros at odd indices
            else:
                target.append(0.0)
        return target

    def insert_missing_vals_middle(
        self, ts_len: int, constant: Optional[float]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



