in datasets/base_video_dataset.py [0:0]
def convert_to_anticipation(df: pd.DataFrame,
root_dir: Sequence[Path],
tau_a: float,
tau_o: float,
future_clip_ratios: Sequence[float] = (1.0, ),
drop_style='correct'
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Based on the definition in the original paper
https://arxiv.org/pdf/1804.02748.pdf, convert the start and end
video times to as used in anticipation.
tau_a (float): Anticipation time in seconds. By default -1, since
we train the model to do action recognition, in which case
the model sees a clip that finishes tau_a seconds before
the action to be anticipated starts. This is as per defn
in https://arxiv.org/pdf/1804.02748.pdf (pg 15)
tau_o (float): The amount of video to see before doing the
anticipation. In the original paper they used 1s
(https://arxiv.org/pdf/1804.02748.pdf), but in further ones
they use 3.5 (https://arxiv.org/pdf/1905.09035.pdf).
future_clip_ratios: A list of ratios (< 1.0) of tau_a, to define what clips
to set as the future clips. These will be used when returning future
clips. Ideally the labels should be adjusted to match this too, but
not doing that for now.
"""
del root_dir
if tau_a == -999:
# No anticipation, just simple recognition
# still add the orig_start and orig_end, future etc
# so the future prediction baseline can do the case where not future
# is predicted.
# This will ensure the future clip ends up being the same as current
tau_a = df.loc[:, 'start'] - df.loc[:, 'end']
tau_o = df.loc[:, 'end'] - df.loc[:, 'start']
logging.debug(
'Converting data to anticipation with tau_a=%s and '
'tau_o=%s.', tau_a, tau_o)
# Copy over the current start and end times
df.loc[:, 'orig_start'] = df.start
df.loc[:, 'orig_end'] = df.end
# Convert using tau_o and tau_a
df.loc[:, 'end'] = df.loc[:, 'start'] - tau_a
df.loc[:, 'start'] = df.loc[:, 'end'] - tau_o
# Add the future clips
for i, future_clip_ratio in enumerate(future_clip_ratios):
if future_clip_ratio == -999:
# A spl number to use the exact current clip as the future
df.loc[:, f'{FUTURE_PREFIX}_{i}_start'] = df.loc[:, 'start']
df.loc[:, f'{FUTURE_PREFIX}_{i}_end'] = df.loc[:, 'end']
elif future_clip_ratio > -10 and future_clip_ratio < 10:
eff_tau_a = tau_a * future_clip_ratio
df.loc[:, f'{FUTURE_PREFIX}_{i}_start'] = (df.loc[:, 'end'] +
eff_tau_a)
df.loc[:, f'{FUTURE_PREFIX}_{i}_end'] = (
df.loc[:, f'future_{i}_start'] + tau_o)
else:
raise ValueError(f'Seems out of bound {future_clip_ratio}')
# first frame seconds
f1_sec = 1 / RULSTM_TSN_FPS
old_df = df
if drop_style == 'correct':
# at least 1 frame
df = df[df.end >= f1_sec]
elif drop_style == 'full_context_in':
# All frames should be in
df = df[df.start >= f1_sec]
elif drop_style == 'action_banks':
# Based on their dataset_anticipation:__get_snippet_features()
df = df[df.end >= 2]
else:
raise NotImplementedError(f'Unknown style {drop_style}')
discarded_df = pd.concat([old_df, df]).drop_duplicates(subset=['uid'],
keep=False)
df.reset_index(inplace=True, drop=True)
return df, discarded_df