def parse_annotations()

in build_graph/data/gtea.py [0:0]


def parse_annotations(data_dir):

    with open(f'{data_dir}/annotations/verb_idx.txt') as f:
        verbs = f.read().strip().split('\n')
        verbs = ['-'.join(line.split(' ')[:-1]).lower() for line in verbs]
    with open(f'{data_dir}/annotations/noun_idx.txt') as f:
        nouns = f.read().strip().split('\n')
        nouns = [line.split(' ')[0].lower() for line in nouns]
    with open(f'{data_dir}/annotations/action_idx.txt') as f:
        lines = f.read().strip().split('\n')
        actions = []
        for line in lines:
            split = line.split(' ')
            noun = split[-2].split(',')[0].lower()
            verb = '-'.join(split[:-2]).lower()
            actions.append((verbs.index(verb), nouns.index(noun)))

    annotations = {'verbs':verbs, 'nouns':nouns, 'actions':actions}

    def parse_interaction_annotatons(lines):
        entries = []
        for line in lines:

            split = line.split(' ')
            clip, action, verb, noun = split[:4]
            noun2 = split[4] if len(split)==5 else '0'

            P, R, dish, clip_start, clip_stop, fstart, fstop = clip.split('-')
            v_id = f'{P}-{R}-{dish}'
            entry = {'v_id':v_id,
                     'start':int(fstart[1:]), 'stop':int(fstop[1:]),
                     'clip_start':int(clip_start), 'clip_stop':int(clip_stop),
                     'verb':int(verb)-1, 'noun':int(noun)-1, 'noun2':int(noun2)-1, 'action':int(action)-1}
            entries.append(entry)

        entries = sorted(entries, key=lambda entry:entry['start'])

        return entries

    lines = open(f'{data_dir}/annotations/train_split1.txt').read().strip().split('\n')
    lines += open(f'{data_dir}/annotations/test_split1.txt').read().strip().split('\n')
    interactions = parse_interaction_annotatons(lines)
    annotations['interactions'] = interactions


    videos = set([entry['v_id'] for entry in interactions])
    annotations['videos'] = sorted(videos)

    # interactions in train/test come from the same videos. Use an alternate split
    train_test_splits = json.load(open(f'{data_dir}/train_test_splits.json'))
    annotations.update(train_test_splits)

    video_lengths = collections.defaultdict(int)
    for entry in interactions:
        video_lengths[entry['v_id']] = max(video_lengths[entry['v_id']], entry['stop'])
    annotations['vid_lengths'] = video_lengths

    torch.save(annotations, 'build_graph/data/gtea/gtea_data.pth')