def create_an_lmdb_database()

in process_data/kinetics/create_video_lmdb_test_multicrop.py [0:0]


def create_an_lmdb_database(list_file, output_file, use_local_file=True):
    print("Write video to a lmdb...")
    LMDB_MAP_SIZE = 1 << 40   # MODIFY
    env = lmdb.open(output_file, map_size=LMDB_MAP_SIZE)

    # start counters
    total_size = 0
    index = 0
    test_start_frame_num = 10
    crop_times = 3

    # initialize empty lists
    list_idx = []
    list_file_name = []
    # list_start_frame = []
    list_label_strings = []

    # read in a list and shuffle
    items = 0
    with open(list_file, 'r') as data:
        for line in data:
            tokens = line.split()
            list_file_name.append(tokens[0])
            # list_start_frame.append(int(tokens[1]))
            list_label_strings.append(tokens[1])
            list_idx.append(items)
            items = items + 1

    with env.begin(write=True) as txn:
        for i in range(items):
            if not use_local_file:
                # read raw video data and store to db
                with open(list_file_name[list_idx[i]], mode='rb') as file:
                    video_data = file.read()
            else:
                # store the full path to local video file
                video_data = list_file_name[list_idx[i]]

            if i % 1000 == 0:
                print(i)

            for ct in range(crop_times):

                for j in range(test_start_frame_num):

                    tensor_protos = caffe2_pb2.TensorProtos()
                    video_tensor = tensor_protos.protos.add()
                    video_tensor.data_type = 4  # string data
                    video_tensor.string_data.append(video_data)

                    label_tensor = tensor_protos.protos.add()
                    label_tensor.data_type = 2
                    label_string = list_label_strings[list_idx[i]]
                    labels = label_string.split(',')
                    label_tensor.int32_data.append(i)
                    # for label in labels:
                        # label_tensor.int32_data.append(int(label))

                    start_frame_tensor = tensor_protos.protos.add()
                    start_frame_tensor.data_type = 2
                    start_frame_tensor.int32_data.append(j)

                    spatial_pos_tensor = tensor_protos.protos.add()
                    spatial_pos_tensor.data_type = 2
                    spatial_pos_tensor.int32_data.append(ct)

                    txn.put(
                        '{}'.format(index).encode('ascii'),
                        tensor_protos.SerializeToString()
                    )
                    index = index + 1
                    total_size = total_size + len(video_data) + sys.getsizeof(int)

    print(
        "Done writing {} clips into database with a total size of {}".format(
            len(list_idx),
            total_size
        )
    )
    return total_size