in src/gluonts/nursery/SCott/pts/dataset/artificial.py [0:0]
def make_timeseries(self, seed: int = 1) -> List[DataEntry]:
res = []
# Fix seed so that the training set is the same
# as the test set from 0:self.prediction_length for the two independent calls
def sigmoid(x: np.ndarray) -> np.ndarray:
return 1.0 / (1.0 + np.exp(-x))
# Ensure same start dates in test and training set
my_random = random.Random(seed)
state = np.random.RandomState(seed)
for i in range(self.num_series):
val_range = self.max_val - self.min_val
length = state.randint(low=self.length_low, high=self.length_high)
start = self._get_start(i, my_random)
envelope = sigmoid((np.arange(length) - 20.0) / 10.0)
level = 0.3 * val_range * (state.random_sample() - 0.5)
phi = 2 * np.pi * state.random_sample()
period = self._get_period()
w = 2 * np.pi / period
t = np.arange(length)
idx = pd.date_range(
start=start, freq=self.freq_str, periods=length
)
special_tp_indicator = self._special_time_point_indicator(idx)
sunday_effect = state.random_sample() * special_tp_indicator
v = np.sin(w * t + phi) + sunday_effect
if self.is_scale:
scale = 0.1 * val_range * state.random_sample()
v *= scale
v += level
if self.is_noise:
noise_range = 0.02 * val_range * state.random_sample()
noise = noise_range * state.normal(size=length)
v += noise
v = envelope * v
if self.clip_values:
np.clip(v, a_min=self.min_val, a_max=self.max_val, out=v)
else:
"""
Rather than mapping [v_min, v_max] to [self.min_val, self.max_val] which would lead to
all the time series having the same min and max, we want to keep the same interval length
(v_max - v_min). We thus shift the interval [v_min, v_max] in [self.min_val, self.max_val]
and clip it if needed.
"""
v_min, v_max = v.min(), v.max()
p_min, p_max = (
max(self.min_val, v_min),
min(self.max_val, v_max),
)
shifted_min = np.clip(
p_min + (p_max - v_max),
a_min=self.min_val,
a_max=self.max_val,
)
shifted_max = np.clip(
p_max + (p_min - v_min),
a_min=self.min_val,
a_max=self.max_val,
)
v = shifted_min + (shifted_max - shifted_min) * (v - v_min) / (
v_max - v_min
)
if self.is_integer:
np.clip(
v,
a_min=np.ceil(self.min_val),
a_max=np.floor(self.max_val),
out=v,
)
v = np.round(v).astype(int)
v = list(v.tolist())
if self.proportion_missing_values > 0:
assert (
self.proportion_missing_values < 1.0
), "Please chose a number 0 < x < 1.0"
idx = np.arange(len(v))
state.shuffle(idx)
num_missing_values = (
int(len(v) * self.proportion_missing_values) + 1
) # Add one in case this gets zero
missing_idx = idx[:num_missing_values]
for j in missing_idx:
# Using convention that there are no missing values before the start date.
if j != 0:
v[j] = None if state.rand() < 0.5 else "NaN"
res.append(
dict(
start=pd.Timestamp(start, freq=self.freq_str),
target=np.array(v),
item_id=i,
)
)
return res