def handle_starttag()

in data_preparation/download_librivox.py [0:0]


    def handle_starttag(self, tag, attrs):
        if tag == "table" and attrs[0] == ('class', 'chapter-download'):
            if self.tableFound:
                raise RuntimeError("Two speakers tables ??")
            self.tableBegin = True
            self.tableFound = True

        if not self.tableBegin:
            return

        if tag == 'tbody':
            self.isInBody = True

        if tag == 'tr' and self.isInBody:
            self.nChapters += 1
            self.chapterNames.append(None)
            self.chapterReaders.append(None)
            self.currNameFound = False
            self.currIDFound = False

        if tag == 'a':
            if len(attrs) == 2 and attrs[1] == ("class", "chapter-name"):
                if self.currNameFound:
                    raise RuntimeError("Two names for the same chapter !")
                name = attrs[0][1].split('/')[-1]
                self.chapterNames[-1] = os.path.splitext(name)[0]
                self.currNameFound = True
            elif len(attrs) == 1:
                _, link = attrs[0]
                if link.find('https://librivox.org/reader/') == 0:
                    _size = len('https://librivox.org/reader/')
                    if self.currIDFound:
                        self.chapterReaders[-1].append(link[_size:])
                    self.chapterReaders[-1] = [link[_size:]]
                    self.currIDFound = True