def __init__()

in utils.py [0:0]


    def __init__(self, uri_path, balance=True):

        self.s3_client = None
        
        # get uris and labels to train on
        uris, labels = [], []
        for l in open(uri_path):
            uri, label = l[:-1].split(",")
            label = float(label)

            uris.append(uri)
            labels.append(label)

        # convert to np arrays for easier indexing
        uris = np.asarray(uris)
        labels = np.asarray(labels)

        # split into benign and malicious uris
        benign_uris = uris[labels == 0]
        malicious_uris = uris[labels == 1]

        # balance the dataset by throwing away samples in the majority class
        if balance:
            if len(benign_uris) > len(malicious_uris):
                benign_idxs = np.random.permutation(len(benign_uris))
                benign_uris = benign_uris[benign_idxs[:len(malicious_uris)]]
            else:
                malicious_idxs = np.random.permutation(len(malicious_uris))
                malicious_uris = malicious_uris[malicious_idxs[:len(malicious_uris)]]

        # finally, stitch everything together
        self.uris = np.concatenate([malicious_uris, benign_uris])
        self.labels = np.concatenate([np.ones(len(malicious_uris)), np.zeros(len(benign_uris))]).astype(np.float32)

        return