in libs/libcommon/src/libcommon/state.py [0:0]
def __post_init__(self, pending_jobs_df: pd.DataFrame, cache_entries_df: pd.DataFrame) -> None:
with StepProfiler(
method="DatasetState.__post_init__",
step="get_dataset_level_artifact_states",
):
self.artifact_state_by_step = {
processing_step.name: ArtifactState(
processing_step=processing_step,
dataset=self.dataset,
revision=self.revision,
config=None,
split=None,
pending_jobs_df=pending_jobs_df[
(pending_jobs_df["revision"] == self.revision)
& (pending_jobs_df["config"].isnull())
& (pending_jobs_df["split"].isnull())
& (pending_jobs_df["type"] == processing_step.job_type)
],
cache_entries_df=cache_entries_df[
(cache_entries_df["kind"] == processing_step.cache_kind)
& (cache_entries_df["config"].isnull())
& (cache_entries_df["split"].isnull())
],
)
for processing_step in self.processing_graph.get_input_type_processing_steps(input_type="dataset")
}
with StepProfiler(
method="DatasetState.__post_init__",
step="get_config_names",
):
self.config_names = fetch_names(
dataset=self.dataset,
config=None,
cache_kind=DATASET_CONFIG_NAMES_KIND,
names_field="config_names",
name_field="config",
) # Note that we use the cached content even the revision is different (ie. maybe obsolete)
unexpected_config_names = set(cache_entries_df["config"].unique()).difference(
set(self.config_names).union({None})
)
if unexpected_config_names:
raise UnexceptedConfigNamesError(
f"Unexpected config names ({len(unexpected_config_names)}): {list(islice(unexpected_config_names, 10))}{'' if len(unexpected_config_names) <= 10 else '...'}"
)
with StepProfiler(
method="DatasetState.__post_init__",
step="get_config_states",
):
self.config_states = [
ConfigState(
dataset=self.dataset,
revision=self.revision,
config=config_name,
processing_graph=self.processing_graph,
pending_jobs_df=pending_jobs_df[
(pending_jobs_df["revision"] == self.revision) & (pending_jobs_df["config"] == config_name)
],
cache_entries_df=cache_entries_df[cache_entries_df["config"] == config_name],
)
for config_name in self.config_names
]