def __init__()

in example/speech-demo/io_func/feat_io.py [0:0]


    def __init__(self, dataset_args, n_ins):

        # stats
        self.mean = None
        self.std = None
        if 'train_stat' in dataset_args.keys():
            train_stat = dataset_args['train_stat']
            featureStats = stats.FeatureStats()
            featureStats.Load(train_stat)
            self.mean = featureStats.GetMean()
            self.std = featureStats.GetInvStd()

        # open lstfile
        file_path = dataset_args["lst_file"]
        if file_path.endswith('.gz'):
            file_read = gzip.open(file_path, 'r')
        else:
            file_read = open(file_path, 'r')

        separate_lines = False
        if "separate_lines" in dataset_args:
            separate_lines = to_bool(dataset_args["separate_lines"])

        self.has_labels = True
        if "has_labels" in dataset_args:
            self.has_labels = to_bool(dataset_args["has_labels"])

        # parse it, file_lst is a list of (featureFile, labelFile) pairs in the input set
        lines = [ln.strip() for ln in file_read]
        lines = [ln for ln in lines if ln != "" ]

        if self.has_labels:
            if separate_lines:
                if len(lines) % 2 != 0:
                    print("List has mis-matched number of feature files and label files")
                    sys.exit(1)
                self.orig_file_lst = []
                for i in xrange(0, len(lines), 2):
                    self.orig_file_lst.append((lines[i], lines[i+1]))
            else:
                self.orig_file_lst = []
                for i in xrange(len(lines)):
                    pair = re.compile("\s+").split(lines[i])
                    if len(pair) != 2:
                        print(lines[i])
                        print("Each line in the train and eval lists must contain feature file and label file separated by space character")
                        sys.exit(1)
                    self.orig_file_lst.append(pair)
        else:
            # no labels
            self.orig_file_lst = []
            for i in xrange(0, len(lines), 1):
                self.orig_file_lst.append((lines[i], None))

        # save arguments

        self.n_ins = n_ins
        self.file_format = dataset_args['file_format']

        self.file_format = "htk"
        if 'file_format' in dataset_args:
            self.file_format = dataset_args['file_format']

        self.offsetLabels = False
        if 'offset_labels' in dataset_args:
            self.offsetLabels = to_bool(dataset_args['offset_labels'])

        self.chunk_size = 32768
        if 'gpu_chunk' in dataset_args:
            self.chunk_size = int(dataset_args['gpu_chunk'])

        self.maxFeats = 0
        if "max_feats" in dataset_args:
            self.maxFeats = int(dataset_args["max_feats"])
        if self.maxFeats == 0:
            self.maxFeats = sys.maxint

        self.shuffle = True
        if 'shuffle' in dataset_args:
            self.shuffle = to_bool(dataset_args['shuffle'])

        self.seed = None
        if "seed" in dataset_args:
            self.seed = int(dataset_args["seed"])

        if int("_split_id" in dataset_args) + int("_num_splits" in dataset_args) == 1:
            raise Exception("_split_id must be used with _num_splits")
        self.num_splits = 0
        if "_num_splits" in dataset_args:
            self.num_splits = int(dataset_Args["_num_splits"])
            self.split_id = dataset_args["_split_id"]

        # internal state
        self.split_parts = False
        self.by_matrix = False
        self.x = numpy.zeros((self.chunk_size, self.n_ins), dtype=numpy.float32)
        if self.has_labels:
            self.y = numpy.zeros((self.chunk_size,), dtype=numpy.int32)
        else:
            self.y = None
        self.numpy_rng = numpy.random.RandomState(self.seed)

        #self.make_shared()
        self.initialize_read()