def load_glove()

in ludwig/utils/data_utils.py [0:0]


def load_glove(file_path):
    logger.info('  Loading Glove format file {}'.format(file_path))
    embeddings = {}
    embedding_size = 0

    # collect embeddings size assuming the first line is correct
    with open(file_path, 'r', encoding='utf-8') as f:
        found_line = False
        while not found_line:
            line = f.readline()
            if line:
                embedding_size = len(line.split()) - 1
                found_line = True

    # collect embeddings
    with open(file_path, 'r', encoding='utf-8') as f:
        for line_number, line in enumerate(f):
            if line:
                try:
                    split = line.split()
                    if len(split) != embedding_size + 1:
                        raise ValueError
                    word = split[0]
                    embedding = np.array(
                        [float(val) for val in split[-embedding_size:]]
                    )
                    embeddings[word] = embedding
                except ValueError:
                    logger.warning(
                        'Line {} in the GloVe file {} is malformed, '
                        'skipping it'.format(
                            line_number, file_path
                        )
                    )
    logger.info('  {0} embeddings loaded'.format(len(embeddings)))
    return embeddings