def convert_to_anticipation()

in datasets/base_video_dataset.py [0:0]
44 lines of code
12 McCabe index (conditional complexity)

def convert_to_anticipation(df: pd.DataFrame,
                            root_dir: Sequence[Path],
                            tau_a: float,
                            tau_o: float,
                            future_clip_ratios: Sequence[float] = (1.0, ),
                            drop_style='correct'
                            ) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Based on the definition in the original paper
        https://arxiv.org/pdf/1804.02748.pdf, convert the start and end
        video times to as used in anticipation.
    tau_a (float): Anticipation time in seconds. By default -1, since
        we train the model to do action recognition, in which case
        the model sees a clip that finishes tau_a seconds before
        the action to be anticipated starts. This is as per defn
        in https://arxiv.org/pdf/1804.02748.pdf (pg 15)
    tau_o (float): The amount of video to see before doing the
        anticipation. In the original paper they used 1s
        (https://arxiv.org/pdf/1804.02748.pdf), but in further ones
        they use 3.5 (https://arxiv.org/pdf/1905.09035.pdf).
    future_clip_ratios: A list of ratios (< 1.0) of tau_a, to define what clips
        to set as the future clips. These will be used when returning future
        clips. Ideally the labels should be adjusted to match this too, but
        not doing that for now.
    """
    del root_dir
    if tau_a == -999:
        # No anticipation, just simple recognition
        # still add the orig_start and orig_end, future etc
        # so the future prediction baseline can do the case where not future
        # is predicted.
        # This will ensure the future clip ends up being the same as current
        tau_a = df.loc[:, 'start'] - df.loc[:, 'end']
        tau_o = df.loc[:, 'end'] - df.loc[:, 'start']
    logging.debug(
        'Converting data to anticipation with tau_a=%s and '
        'tau_o=%s.', tau_a, tau_o)
    # Copy over the current start and end times
    df.loc[:, 'orig_start'] = df.start
    df.loc[:, 'orig_end'] = df.end
    # Convert using tau_o and tau_a
    df.loc[:, 'end'] = df.loc[:, 'start'] - tau_a
    df.loc[:, 'start'] = df.loc[:, 'end'] - tau_o
    # Add the future clips
    for i, future_clip_ratio in enumerate(future_clip_ratios):
        if future_clip_ratio == -999:
            # A spl number to use the exact current clip as the future
            df.loc[:, f'{FUTURE_PREFIX}_{i}_start'] = df.loc[:, 'start']
            df.loc[:, f'{FUTURE_PREFIX}_{i}_end'] = df.loc[:, 'end']
        elif future_clip_ratio > -10 and future_clip_ratio < 10:
            eff_tau_a = tau_a * future_clip_ratio
            df.loc[:, f'{FUTURE_PREFIX}_{i}_start'] = (df.loc[:, 'end'] +
                                                       eff_tau_a)
            df.loc[:, f'{FUTURE_PREFIX}_{i}_end'] = (
                df.loc[:, f'future_{i}_start'] + tau_o)
        else:
            raise ValueError(f'Seems out of bound {future_clip_ratio}')

    # first frame seconds
    f1_sec = 1 / RULSTM_TSN_FPS
    old_df = df
    if drop_style == 'correct':
        # at least 1 frame
        df = df[df.end >= f1_sec]
    elif drop_style == 'full_context_in':
        # All frames should be in
        df = df[df.start >= f1_sec]
    elif drop_style == 'action_banks':
        # Based on their dataset_anticipation:__get_snippet_features()
        df = df[df.end >= 2]
    else:
        raise NotImplementedError(f'Unknown style {drop_style}')
    discarded_df = pd.concat([old_df, df]).drop_duplicates(subset=['uid'],
                                                           keep=False)
    df.reset_index(inplace=True, drop=True)
    return df, discarded_df