in data_preparation/download_librivox.py [0:0]
def handle_starttag(self, tag, attrs):
if tag == "table" and attrs[0] == ('class', 'chapter-download'):
if self.tableFound:
raise RuntimeError("Two speakers tables ??")
self.tableBegin = True
self.tableFound = True
if not self.tableBegin:
return
if tag == 'tbody':
self.isInBody = True
if tag == 'tr' and self.isInBody:
self.nChapters += 1
self.chapterNames.append(None)
self.chapterReaders.append(None)
self.currNameFound = False
self.currIDFound = False
if tag == 'a':
if len(attrs) == 2 and attrs[1] == ("class", "chapter-name"):
if self.currNameFound:
raise RuntimeError("Two names for the same chapter !")
name = attrs[0][1].split('/')[-1]
self.chapterNames[-1] = os.path.splitext(name)[0]
self.currNameFound = True
elif len(attrs) == 1:
_, link = attrs[0]
if link.find('https://librivox.org/reader/') == 0:
_size = len('https://librivox.org/reader/')
if self.currIDFound:
self.chapterReaders[-1].append(link[_size:])
self.chapterReaders[-1] = [link[_size:]]
self.currIDFound = True