static HsInt loadArchive_()

in rts/linker/LoadArchive.c [242:624]


static HsInt loadArchive_ (pathchar *path)
{
    ObjectCode* oc = NULL;
    char *image = NULL;
    HsInt retcode = 0;
    int memberSize;
    FILE *f = NULL;
    int n;
    size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */
    char *fileName;
    size_t fileNameSize;
    int isObject, isGnuIndex, isThin, isImportLib;
    char tmp[20];
    char *gnuFileIndex;
    int gnuFileIndexSize;
    int misalignment = 0;

    DEBUG_LOG("start\n");
    DEBUG_LOG("Loading archive `%" PATH_FMT "'\n", path);

    /* Check that we haven't already loaded this archive.
       Ignore requests to load multiple times */
    if (isAlreadyLoaded(path)) {
        IF_DEBUG(linker,
                 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
        return 1; /* success */
    }

    gnuFileIndex = NULL;
    gnuFileIndexSize = 0;

    fileNameSize = 32;
    fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");

    isThin = 0;
    isImportLib = 0;

    f = pathopen(path, WSTR("rb"));
    if (!f)
        FAIL("loadObj: can't read `%" PATH_FMT "'", path);

    /* Check if this is an archive by looking for the magic "!<arch>\n"
     * string.  Usually, if this fails, we belch an error and return.  On
     * Darwin however, we may have a fat archive, which contains archives for
     * more than one architecture.  Fat archives start with the magic number
     * 0xcafebabe, always stored big endian.  If we find a fat_header, we scan
     * through the fat_arch structs, searching through for one for our host
     * architecture.  If a matching struct is found, we read the offset
     * of our archive data (nfat_offset) and seek forward nfat_offset bytes
     * from the start of the file.
     *
     * A subtlety is that all of the members of the fat_header and fat_arch
     * structs are stored big endian, so we need to call byte order
     * conversion functions.
     *
     * If we find the appropriate architecture in a fat archive, we gobble
     * its magic "!<arch>\n" string and continue processing just as if
     * we had a single architecture archive.
     */

    n = fread ( tmp, 1, 8, f );
    if (n != 8) {
        FAIL("Failed reading header from `%" PATH_FMT "'", path);
    }
    if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
    /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
     *
     * ar thin libraries have the exact same format as normal archives except they
     * have a different magic string and they don't copy the object files into the
     * archive.
     *
     * Instead each header entry points to the location of the object file on disk.
     * This is useful when a library is only created to satisfy a compile time dependency
     * instead of to be distributed. This saves the time required for copying.
     *
     * Thin archives are always flattened. They always only contain simple headers
     * pointing to the object file and so we need not allocate more memory than needed
     * to find the object file.
     *
     */
    else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
        isThin = 1;
    }
    else {
        StgBool success = checkFatArchive(tmp, f, path);
        if (!success)
            goto fail;
    }
    DEBUG_LOG("loading archive contents\n");

    while (1) {
        DEBUG_LOG("reading at %ld\n", ftell(f));
        n = fread ( fileName, 1, 16, f );
        if (n != 16) {
            if (feof(f)) {
                DEBUG_LOG("EOF while reading from '%" PATH_FMT "'\n", path);
                break;
            }
            else {
                FAIL("Failed reading file name from `%" PATH_FMT "'", path);
            }
        }

#if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
        if (strncmp(fileName, "!<arch>\n", 8) == 0) {
            DEBUG_LOG("found the start of another archive, breaking\n");
            break;
        }
#endif

        n = fread ( tmp, 1, 12, f );
        if (n != 12)
            FAIL("Failed reading mod time from `%" PATH_FMT "'", path);
        n = fread ( tmp, 1, 6, f );
        if (n != 6)
            FAIL("Failed reading owner from `%" PATH_FMT "'", path);
        n = fread ( tmp, 1, 6, f );
        if (n != 6)
            FAIL("Failed reading group from `%" PATH_FMT "'", path);
        n = fread ( tmp, 1, 8, f );
        if (n != 8)
            FAIL("Failed reading mode from `%" PATH_FMT "'", path);
        n = fread ( tmp, 1, 10, f );
        if (n != 10)
            FAIL("Failed reading size from `%" PATH_FMT "'", path);
        tmp[10] = '\0';
        for (n = 0; isdigit(tmp[n]); n++);
        tmp[n] = '\0';
        memberSize = atoi(tmp);

        DEBUG_LOG("size of this archive member is %d\n", memberSize);
        n = fread ( tmp, 1, 2, f );
        if (n != 2)
            FAIL("Failed reading magic from `%" PATH_FMT "'", path);
        if (strncmp(tmp, "\x60\x0A", 2) != 0)
            FAIL("Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
                 path, ftell(f), tmp[0], tmp[1]);

        isGnuIndex = 0;
        /* Check for BSD-variant large filenames */
        if (0 == strncmp(fileName, "#1/", 3)) {
            size_t n = 0;
            fileName[16] = '\0';
            if (isdigit(fileName[3])) {
                for (n = 4; isdigit(fileName[n]); n++)
                    ;

                fileName[n] = '\0';
                thisFileNameSize = atoi(fileName + 3);
                memberSize -= thisFileNameSize;
                if (thisFileNameSize >= fileNameSize) {
                    /* Double it to avoid potentially continually
                       increasing it by 1 */
                    fileNameSize = thisFileNameSize * 2;
                    fileName = stgReallocBytes(fileName, fileNameSize,
                                               "loadArchive(fileName)");
                }
                n = fread(fileName, 1, thisFileNameSize, f);
                if (n != thisFileNameSize) {
                    errorBelch("Failed reading filename from `%" PATH_FMT "'",
                               path);
                    goto fail;
                }
                fileName[thisFileNameSize] = 0;
                /* On OS X at least, thisFileNameSize is the size of the
                   fileName field, not the length of the fileName
                   itself. */
                thisFileNameSize = strlen(fileName);
            } else {
                errorBelch("BSD-variant filename size not found "
                           "while reading filename from `%" PATH_FMT "'", path);
                goto fail;
            }
        }
        /* Check for GNU file index file */
        else if (0 == strncmp(fileName, "//", 2)) {
            fileName[0] = '\0';
            thisFileNameSize = 0;
            isGnuIndex = 1;
        }
        /* Check for a file in the GNU file index */
        else if (fileName[0] == '/') {
            if (!lookupGNUArchiveIndex(gnuFileIndexSize, &fileName,
                     gnuFileIndex, path, &thisFileNameSize, &fileNameSize)) {
                goto fail;
            }
        }
        /* Finally, the case where the filename field actually contains
           the filename */
        else {
            /* GNU ar terminates filenames with a '/', this allowing
               spaces in filenames. So first look to see if there is a
               terminating '/'. */
            for (thisFileNameSize = 0;
                 thisFileNameSize < 16;
                 thisFileNameSize++) {
                if (fileName[thisFileNameSize] == '/') {
                    fileName[thisFileNameSize] = '\0';
                    break;
                }
            }
            /* If we didn't find a '/', then a space teminates the
               filename. Note that if we don't find one, then
               thisFileNameSize ends up as 16, and we already have the
               '\0' at the end. */
            if (thisFileNameSize == 16) {
                for (thisFileNameSize = 0;
                     thisFileNameSize < 16;
                     thisFileNameSize++) {
                    if (fileName[thisFileNameSize] == ' ') {
                        fileName[thisFileNameSize] = '\0';
                        break;
                    }
                }
            }
        }

        DEBUG_LOG("Found member file `%s'\n", fileName);

        /* TODO: Stop relying on file extensions to determine input formats.
                 Instead try to match file headers. See Trac #13103.  */
        isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o"  , 2) == 0)
                || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0)
                || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".obj", 4) == 0);

#if defined(OBJFORMAT_PEi386)
        /*
        * Note [MSVC import files (ext .lib)]
        * MSVC compilers store the object files in
        * the import libraries with extension .dll
        * so on Windows we should look for those too.
        * The PE COFF format doesn't specify any specific file name
        * for sections. So on windows, just try to load it all.
        *
        * Linker members (e.g. filename / are skipped since they are not needed)
        */
        isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
#endif // windows

        DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize);
        DEBUG_LOG("\tisObject = %d\n", isObject);

        if (isObject) {
            char *archiveMemberName;

            DEBUG_LOG("Member is an object file...loading...\n");

#if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
            if (RTS_LINKER_USE_MMAP)
                image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
            else {
                /* See loadObj() */
                misalignment = machoGetMisalignment(f);
                image = stgMallocBytes(memberSize + misalignment,
                                        "loadArchive(image)");
                image += misalignment;
            }

#else // not darwin
            image = stgMallocBytes(memberSize, "loadArchive(image)");
#endif
            if (isThin) {
                if (!readThinArchiveMember(n, memberSize, path,
                        fileName, image)) {
                    goto fail;
                }
            }
            else
            {
                n = fread ( image, 1, memberSize, f );
                if (n != memberSize) {
                    FAIL("error whilst reading `%" PATH_FMT "'", path);
                }
            }

            archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
                                               "loadArchive(file)");
            sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
                    path, (int)thisFileNameSize, fileName);

            oc = mkOc(path, image, memberSize, false, archiveMemberName
                     , misalignment);
#if defined(OBJFORMAT_MACHO)
            ocInit_MachO( oc );
#endif
#if defined(OBJFORMAT_ELF)
            ocInit_ELF( oc );
#endif

            stgFree(archiveMemberName);

            if (0 == loadOc(oc)) {
                stgFree(fileName);
                fclose(f);
                return 0;
            } else {
                oc->next = objects;
                objects = oc;
            }
        }
        else if (isGnuIndex) {
            if (gnuFileIndex != NULL) {
                FAIL("GNU-variant index found, but already have an index, \
while reading filename from `%" PATH_FMT "'", path);
            }
            DEBUG_LOG("Found GNU-variant file index\n");
#if RTS_LINKER_USE_MMAP
            gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
#else
            gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
#endif
            n = fread ( gnuFileIndex, 1, memberSize, f );
            if (n != memberSize) {
                FAIL("error whilst reading `%" PATH_FMT "'", path);
            }
            gnuFileIndex[memberSize] = '/';
            gnuFileIndexSize = memberSize;
        }
        else if (isImportLib) {
#if defined(OBJFORMAT_PEi386)
            if (checkAndLoadImportLibrary(path, fileName, f)) {
                DEBUG_LOG("Member is an import file section... "
                          "Corresponding DLL has been loaded...\n");
            }
            else {
                DEBUG_LOG("Member is not a valid import file section... "
                          "Skipping...\n");
                n = fseek(f, memberSize, SEEK_CUR);
                if (n != 0)
                    FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
                    memberSize, path);
            }
#endif
        }
        else {
            DEBUG_LOG("`%s' does not appear to be an object file\n",
                      fileName);
            if (!isThin || thisFileNameSize == 0) {
                n = fseek(f, memberSize, SEEK_CUR);
                if (n != 0)
                    FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
                         memberSize, path);
            }
        }

        /* .ar files are 2-byte aligned */
        if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
            DEBUG_LOG("trying to read one pad byte\n");
            n = fread ( tmp, 1, 1, f );
            if (n != 1) {
                if (feof(f)) {
                    DEBUG_LOG("found EOF while reading one pad byte\n");
                    break;
                }
                else {
                    FAIL("Failed reading padding from `%" PATH_FMT "'", path);
                }
            }
            DEBUG_LOG("successfully read one pad byte\n");
        }
        DEBUG_LOG("reached end of archive loading while loop\n");
    }
    retcode = 1;
fail:
    if (f != NULL)
        fclose(f);

    if (fileName != NULL)
        stgFree(fileName);
    if (gnuFileIndex != NULL) {
#if RTS_LINKER_USE_MMAP
        munmap(gnuFileIndex, gnuFileIndexSize + 1);
#else
        stgFree(gnuFileIndex);
#endif
    }

    if (RTS_LINKER_USE_MMAP)
        m32_allocator_flush();

    DEBUG_LOG("done\n");
    return retcode;
}