def __post_init_

def __post_init__()

in libs/libcommon/src/libcommon/state.py [0:0]
61 lines of code
4 McCabe index (conditional complexity)

    def __post_init__(self, pending_jobs_df: pd.DataFrame, cache_entries_df: pd.DataFrame) -> None:
        with StepProfiler(
            method="DatasetState.__post_init__",
            step="get_dataset_level_artifact_states",
        ):
            self.artifact_state_by_step = {
                processing_step.name: ArtifactState(
                    processing_step=processing_step,
                    dataset=self.dataset,
                    revision=self.revision,
                    config=None,
                    split=None,
                    pending_jobs_df=pending_jobs_df[
                        (pending_jobs_df["revision"] == self.revision)
                        & (pending_jobs_df["config"].isnull())
                        & (pending_jobs_df["split"].isnull())
                        & (pending_jobs_df["type"] == processing_step.job_type)
                    ],
                    cache_entries_df=cache_entries_df[
                        (cache_entries_df["kind"] == processing_step.cache_kind)
                        & (cache_entries_df["config"].isnull())
                        & (cache_entries_df["split"].isnull())
                    ],
                )
                for processing_step in self.processing_graph.get_input_type_processing_steps(input_type="dataset")
            }

            with StepProfiler(
                method="DatasetState.__post_init__",
                step="get_config_names",
            ):
                self.config_names = fetch_names(
                    dataset=self.dataset,
                    config=None,
                    cache_kind=DATASET_CONFIG_NAMES_KIND,
                    names_field="config_names",
                    name_field="config",
                )  # Note that we use the cached content even the revision is different (ie. maybe obsolete)

            unexpected_config_names = set(cache_entries_df["config"].unique()).difference(
                set(self.config_names).union({None})
            )
            if unexpected_config_names:
                raise UnexceptedConfigNamesError(
                    f"Unexpected config names ({len(unexpected_config_names)}): {list(islice(unexpected_config_names, 10))}{'' if len(unexpected_config_names) <= 10 else '...'}"
                )

            with StepProfiler(
                method="DatasetState.__post_init__",
                step="get_config_states",
            ):
                self.config_states = [
                    ConfigState(
                        dataset=self.dataset,
                        revision=self.revision,
                        config=config_name,
                        processing_graph=self.processing_graph,
                        pending_jobs_df=pending_jobs_df[
                            (pending_jobs_df["revision"] == self.revision) & (pending_jobs_df["config"] == config_name)
                        ],
                        cache_entries_df=cache_entries_df[cache_entries_df["config"] == config_name],
                    )
                    for config_name in self.config_names
                ]