in datasets/base_video_dataset.py [0:0]
def __init__(
self,
df,
root: Union[Sequence[Path], Path] = Path(''),
frames_per_clip: int = 32,
frame_rate: float = None,
subclips_options: Dict[str, float] = None,
load_seg_labels: bool = False,
load_long_term_future_labels: int = 0,
reader_fn: TargetConf = {
'_target_': 'datasets.reader_fns.DefaultReader'
},
transform: torchvision.transforms.Compose = None,
# verb, noun, action
label_type: Union[str, Sequence[str]] = 'verb',
return_future_clips_too: bool = False,
sample_strategy: str = SAMPLE_STRAT_RAND,
sample_strategy_future: str = SAMPLE_STRAT_FIRST,
conv_to_anticipate_fn: TargetConf = None,
conv_to_anticipate_fn_runtime: TargetConf = None,
process_df_before_read_fn: TargetConf = None,
sample_clips_densely: bool = False,
sample_clips_densely_fn: TargetConf = None,
random_seed: int = 42,
verb_classes: dict = {},
noun_classes: dict = {},
action_classes: dict = {},
repeat_data_times: float = 1.0,
dummy_label: Union[list, int] = -1,
class_balanced_sampling: bool = False,
return_unsampled_video: bool = False,
uid_subset: list = None):
"""
Args:
df: DataFrame of all the data (see a subclass for example/fmt).
Must be passed in through super() when init-ing the subclass
root: The path where all the videos are stored, will be
prepended to video path.
load_seg_labels: Set to true to load frame level segmentation
labels that can be jointly used to finetune the model for
classification as well.
load_long_term_future_labels: Set to the number of future labels
to also return, from where load_seg_labels stops. This is
used for long-term rollout visualization and getting GT for
those.
transform: The video transform function
return_future_clips_too: Set to true to also return future, actual
action clips along with the tau_o clips. This is used for SSL.
sample_strategy_future: Samplnig strategy used to return future
clips, if return_future_clips_too is set.
conv_to_anticipate_fn: The function that converts to anticipation.
conv_to_anticipate_fn_runtime: A similar fn as ^, but is applied
in the getitem function. Useful if don't want to do upfront,
for large datasets like HowTo.
sample_clips_densely: Add clips to the data frame sampling the
videos densely between the first and the last labeled clip.
The class label for those clips is -1, it's mostly just
used for SSL.
sample_clips_densely_fn: If this function is set, then no need
to set the sample_clip_densely to true. It will use this fn
to densify.
process_df_before_read_fn: A function that is applied to the
data frame[idx] before it's used for reading the video etc.
repeat_data: Set to number of times to repeat the data in the
DF. This is used if the epoch is too small, so can roll
through the data more than once during a single epoch. Also
helps if the preprocessing at read time effectively means
each data item corresponds to > 1 data items really through
random cropping etc.
class_balanced_sampling: If true, it will sample from the data
such that each class appears approximately equally -- so using
the distribution of labels, it will try to enforce unformity.
This is independent of adding loss weights based on how
often a class appears, which is done in train_eval_ops.
return_unsampled_video (bool): If true, return the video clip
before it was sub-sampled to match the FPS requirements.
So if experimenting at 1FPS, this will also return the
original frame rate clip that could be used for visualization.
MUST use batch size = 1 if using this, since it will return
different length videos which won't be batch-able.
uid_subset: Make a dataset keeping only those UIDs. This is useful
for visualization code when I just want to visualize on
specific clips.
"""
super().__init__()
# Based on https://github.com/pytorch/pytorch/issues/13246#issuecomment-612396143,
# trying to avoid mem leaks by wrapping lists and dicts in this
# manager class objects
manager = Manager()
self.root = root
# Convert to list if not already
if OmegaConf.get_type(self.root) != list:
self.root = [self.root]
self.root = [Path(el) for el in self.root]
self.subclips_options = subclips_options
self.load_seg_labels = load_seg_labels
self.load_long_term_future_labels = load_long_term_future_labels
# TODO: Move away from DataFrames... based on
# https://github.com/pytorch/pytorch/issues/5902#issuecomment-374611523
# it seems data frames are not ideal and cause memory leaks...
self.df = df # Data frame that will contain all info
# To be consistent with EPIC, add a uid column if not already present
if 'uid' not in self.df.columns:
self.df.loc[:, 'uid'] = range(1, len(self.df) + 1)
if sample_clips_densely or sample_clips_densely_fn:
if sample_clips_densely_fn is None:
# Use the default parameters. Keeping this sample_clips_densely
# param to be backward compatible.
sample_clips_densely_fn = {
'_target_':
'datasets.base_video_dataset.dense_clip_sampler',
}
self.df, _ = hydra.utils.call(sample_clips_densely_fn, self.df,
self.root)
assert not (conv_to_anticipate_fn and conv_to_anticipate_fn_runtime), (
'At max only one of these should be set.')
self.conv_to_anticipate_fn = conv_to_anticipate_fn
self.discarded_df = None
if conv_to_anticipate_fn is not None:
self.df, self.discarded_df = hydra.utils.call(
conv_to_anticipate_fn, self.df, self.root)
logging.info('Discarded %d elements in anticipate conversion',
len(self.discarded_df))
# this is an alternate implementation of ^, run in getitem,
# useful for large datasets like HowTo, but won't work for
# any dataset where you want to run testing
self.conv_to_anticipate_fn_runtime = conv_to_anticipate_fn_runtime
# This is used in the output files for EPIC submissions
self.challenge_type = 'action_recognition'
if conv_to_anticipate_fn or conv_to_anticipate_fn_runtime:
# If either of these are set, this must be an anticipation setup
self.challenge_type = 'action_anticipation'
self.repeat_data_times = repeat_data_times
self.process_df_before_read_fn = process_df_before_read_fn
self.frames_per_clip = frames_per_clip
self.frame_rate = frame_rate
self.reader_fn = hydra.utils.instantiate(reader_fn)
self.transform = transform
self.label_type = label_type
if OmegaConf.get_type(self.label_type) != list:
# Will use the first one for the balancing etc
self.label_type = [self.label_type]
self.verb_classes = manager.dict(verb_classes)
self.noun_classes = manager.dict(noun_classes)
self.action_classes = manager.dict(action_classes)
self.return_future_clips_too = return_future_clips_too
self.sample_strategy = sample_strategy
self.sample_strategy_future = sample_strategy_future
self.random_seed = random_seed
self.rng = np.random.default_rng(self.random_seed)
self.dummy_label = dummy_label
if isinstance(self.dummy_label, list):
self.dummy_label = manager.list(self.dummy_label)
# Precompute some commonly useful stats
self.classes_counts = manager.dict(self._compute_stats_cls_counts())
self.class_balanced_sampling = class_balanced_sampling
if self.class_balanced_sampling:
# sort the data frame by labels, to allow for the runtime
# remapping of idx
assert len(self.label_type) == 1, 'Not supported more yet'
self.df.sort_values(by=self.label_type[0] + '_class', inplace=True)
self.return_unsampled_video = return_unsampled_video
if self.return_unsampled_video:
logging.warning('Make sure using batch size = 1 since '
'return_unsampled_videos is set to True.')
# store the full DF so far in df_before_subset, since I will now keep a
# subset that may be used for testing etc. df_before_subset will be
# used to get intermediate labels for L_cls etc still (even during
# visualizations sometimes I want to show that)
self.df_before_subset = self.df
if uid_subset is not None:
# Select a subset in the order of the list
self.df = self.df.iloc[pd.Index(
self.df.uid).get_indexer(uid_subset)].reset_index(drop=True)