in sagemaker-voice-classification/notebook/coswara_dataset.py [0:0]
def __init__(self, csv_path: Path, file_path: Path, new_sr=8000, audio_len=20, sampling_ratio=5):
"""Create cosware dataset. Assume single channel 0.
Args:
csv_path (Path): Path to dataset metadata csv
file_path (Path): Path to data folders. Prefix can be found in metadata csv
new_sr (int, optional): New sampling rate. Defaults to 8000.
audio_len (int, optional): Audio length based on new sampling rate (sec). Defaults to 20.
sampling_ratio (int, optional): Additional downsampling ratio. Defaults to 5.
"""
df = pd.read_csv(csv_path)
self.file_names = []
self.labels = []
self.new_sr = new_sr
self.audio_len = audio_len
self.sampling_ratio = sampling_ratio
self.channel = 0
for i in range(len(df)):
# Get file fullpath
if int(df.iloc[i,1])>1024: ## small file may cause RuntimeError or ExecuteUserScriptError in __getitem__ function
file_names = file_path / df.iloc[i, 0]
label = status2idx[df.iloc[i, 2]]
self.file_names.append(file_names)
self.labels.append(label)