in src/sagemaker_sklearn_extension/feature_extraction/sequences.py [0:0]
def fit(self, X, y=None):
X = check_array(X, dtype=None, force_all_finite="allow-nan")
if self.sequences_lengths_q25 is None:
self.sequences_lengths_q25 = [DEFAULT_INPUT_SEQUENCE_LENGTH] * X.shape[1]
if len(self.sequences_lengths_q25) != X.shape[1]:
raise ValueError(
f"length of sequences_lengths_q25 should be equal to number of columns in X (={X.shape[1]})."
)
ts_flattener = TSFlattener(max_allowed_length=self.max_allowed_length, trim_beginning=self.trim_beginning)
tsfresh_feature_extractors = []
for sequence_column_i, sequence_column in enumerate(X.T):
numeric_sequences = ts_flattener.transform(sequence_column.reshape(-1, 1))
tsfresh_feature_extractor = TSFreshFeatureExtractor(
augment=self.augment,
interpolation_method=self.interpolation_method,
extraction_type=self.extraction_type,
extraction_seed=self.extraction_seed,
sequence_length_q25=self.sequences_lengths_q25[sequence_column_i],
)
tsfresh_feature_extractor.fit(numeric_sequences)
tsfresh_feature_extractors.append(tsfresh_feature_extractor)
self.tsfresh_feature_extractors_ = tsfresh_feature_extractors
return self