def import_page()

in data_preparation/download_librivox.py [0:0]


def import_page(baseUrl, offset, limit, dirOut, language):

    urlRequest = f'{baseUrl}/?offset={offset}&format=json&limit={limit}'
    response = requests.get(urlRequest).json()

    if "error" in response:
        return -1

    if "books" not in response:
        raise RuntimeError(f"Invalid url {baseUrl}")

    bookList = response['books']
    fullSize = 0

    if len(bookList) == 0:
        return -1

    for bookData in bookList:
        if bookData['language'] != language:
            continue

        title = bookData['title']
        print(f'Loading title {title}...')
        chapterReaders, chaptersNames = None, None
        try:
            chaptersNames, chapterReaders = \
                get_reader_data(bookData['url_librivox'])
        except:
            print(colored(f'Error when loading title {title} metadata', 'red'))
            print(colored(sys.exc_info(), 'red'))

        name = os.path.splitext(os.path.basename(bookData['url_zip_file']))[0]

        outSpeaker = os.path.join(dirOut, f'{name}_speaker_data.json')
        with open(outSpeaker, 'w') as file:
            json.dump({"names": chaptersNames,
                       "readers": chapterReaders},
                      file, indent=2)

        print(f'{title}\'s speaker data loaded')

        outMetadata = os.path.join(dirOut, f'{name}_metadata.json')
        with open(outMetadata, 'w') as file:
            json.dump(bookData, file, indent=2)
        print(f'{title}\'s metadata loaded')
        try:
            txtData = get_text_data(bookData['url_text_source'])
            outTxt = os.path.join(dirOut, f'{name}_text.txt')
            with open(outTxt, 'w') as file:
                file.write(txtData)
                print('... text data loaded')
            fullSize += os.path.getsize(outTxt)
        except:
            print(colored(f'Error when loading {title}\'s text data', 'red'))

        print(f'Loading audio data at {bookData["url_zip_file"]}')
        outPath = os.path.join(dirOut, name + ".zip")
        if not os.path.isfile(outPath):
            try:
                d = urllib.request.urlopen(bookData['url_zip_file'])
                fullSize += int(d.info()['Content-Length'])
                urllib.request.urlretrieve(bookData['url_zip_file'], outPath,
                                           RequestPBar())
            except KeyboardInterrupt:
                if os.path.isfile(outPath):
                    os.remove(outPath)
                sys.exit()
            except:
                if os.path.isfile(outPath):
                    os.remove(outPath)

        print('')

    return fullSize