HRESULT OptimizeFacesImpl()

in DirectXMesh/DirectXMeshOptimizeLRU.cpp [196:498]


    HRESULT OptimizeFacesImpl(
        _In_reads_(indexCount) const IndexType* indexList, uint32_t indexCount,
        _Out_writes_(indexCount / 3) uint32_t* faceRemap, uint32_t lruCacheSize, uint32_t offset)
    {
        std::unique_ptr<OptimizeVertexData<IndexType>[]> vertexDataList(new (std::nothrow) OptimizeVertexData<IndexType>[indexCount]);
        if (!vertexDataList)
            return E_OUTOFMEMORY;

        std::unique_ptr<uint32_t[]> vertexRemap(new (std::nothrow) uint32_t[indexCount]);
        std::unique_ptr<uint32_t[]> activeFaceList(new (std::nothrow) uint32_t[indexCount]);
        if (!vertexRemap || !activeFaceList)
            return E_OUTOFMEMORY;

        const uint32_t faceCount = indexCount / 3;

        std::unique_ptr<uint8_t[]> processedFaceList(new (std::nothrow) uint8_t[faceCount]);
        std::unique_ptr<uint32_t[]> faceSorted(new (std::nothrow) uint32_t[faceCount]);
        std::unique_ptr<uint32_t[]> faceReverseLookup(new (std::nothrow) uint32_t[faceCount]);
        if (!processedFaceList || !faceSorted || !faceReverseLookup)
            return E_OUTOFMEMORY;

        memset(processedFaceList.get(), 0, sizeof(uint8_t) * faceCount);

        // build the vertex remap table
        uint32_t uniqueVertexCount = 0;
        uint32_t unused = 0;
        {
            using indexSorter = IndexSortCompareIndexed<uint32_t, IndexType>;

            std::unique_ptr<uint32_t[]> indexSorted(new (std::nothrow) uint32_t[indexCount]);
            if (!indexSorted)
                return E_OUTOFMEMORY;

            for (uint32_t i = 0; i < indexCount; i++)
            {
                indexSorted[i] = i;
            }

            indexSorter sortFunc(indexList);
            std::sort(indexSorted.get(), indexSorted.get() + indexCount, sortFunc);

            bool first = false;
            for (uint32_t i = 0; i < indexCount; i++)
            {
                uint32_t idx = indexSorted[i];
                if (indexList[idx] == IndexType(-1))
                {
                    unused++;
                    vertexRemap[idx] = UNUSED32;
                    continue;
                }

                if (!i || first || sortFunc(indexSorted[i - 1], idx))
                {
                    // it's not a duplicate
                    vertexRemap[idx] = uniqueVertexCount;
                    uniqueVertexCount++;
                    first = false;
                }
                else
                {
                    vertexRemap[indexSorted[i]] = vertexRemap[indexSorted[i - 1]];
                }
            }
        }

        // compute face count per vertex
        for (uint32_t i = 0; i < indexCount; ++i)
        {
            if (vertexRemap[i] == UNUSED32)
                continue;

            OptimizeVertexData<IndexType>& vertexData = vertexDataList[vertexRemap[i]];
            vertexData.activeFaceListSize++;
        }

        const IndexType kEvictedCacheIndex = std::numeric_limits<IndexType>::max();
        {
            // allocate face list per vertex
            uint32_t curActiveFaceListPos = 0;
            for (uint32_t i = 0; i < uniqueVertexCount; ++i)
            {
                OptimizeVertexData<IndexType>& vertexData = vertexDataList[i];
                vertexData.cachePos0 = kEvictedCacheIndex;
                vertexData.cachePos1 = kEvictedCacheIndex;
                vertexData.activeFaceListStart = curActiveFaceListPos;
                curActiveFaceListPos += vertexData.activeFaceListSize;
                vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos0, lruCacheSize);

                vertexData.activeFaceListSize = 0;
            }

            assert(curActiveFaceListPos == (indexCount - unused));
        }

        // sort unprocessed faces by highest score
        for (uint32_t f = 0; f < faceCount; f++)
        {
            faceSorted[f] = f;
        }

        FaceValenceSort<uint32_t, IndexType> faceValenceSort(vertexDataList.get());
        std::sort(faceSorted.get(), faceSorted.get() + faceCount, faceValenceSort);

        for (uint32_t f = 0; f < faceCount; f++)
        {
            faceReverseLookup[faceSorted[f]] = f;
        }

        // fill out face list per vertex
        for (uint32_t i = 0; i < indexCount; i += 3)
        {
            for (uint32_t j = 0; j < 3; ++j)
            {
                uint32_t v = vertexRemap[size_t(i) + size_t(j)];
                if (v == UNUSED32)
                    continue;

                OptimizeVertexData<IndexType>& vertexData = vertexDataList[v];
                activeFaceList[size_t(vertexData.activeFaceListStart) + vertexData.activeFaceListSize] = i;
                vertexData.activeFaceListSize++;
            }
        }

        uint32_t vertexCacheBuffer[(kMaxVertexCacheSize + 3) * 2] = {};
        uint32_t *cache0 = vertexCacheBuffer;
        uint32_t *cache1 = vertexCacheBuffer + (kMaxVertexCacheSize + 3);
        uint32_t entriesInCache0 = 0;

        uint32_t bestFace = 0;
        for (size_t i = 0; i < indexCount; i += 3)
        {
            if (vertexRemap[i] == UNUSED32
                || vertexRemap[i + 1] == UNUSED32
                || vertexRemap[i + 2] == UNUSED32)
            {
                ++bestFace;
                continue;
            }
            else
                break;
        }

        float bestScore = -1.f;

        uint32_t nextBestFace = 0;

        uint32_t curFace = 0;
        for (size_t i = 0; i < indexCount; i += 3)
        {
            if (vertexRemap[i] == UNUSED32
                || vertexRemap[i + 1] == UNUSED32
                || vertexRemap[i + 2] == UNUSED32)
            {
                continue;
            }

            if (bestScore < 0.f)
            {
                // no verts in the cache are used by any unprocessed faces so
                // search all unprocessed faces for a new starting point
                while (nextBestFace < faceCount)
                {
                    uint32_t faceIndex = faceSorted[nextBestFace++];
                    if (processedFaceList[faceIndex] == 0)
                    {
                        uint32_t face = faceIndex * 3;
                        uint32_t i0 = vertexRemap[face];
                        uint32_t i1 = vertexRemap[size_t(face) + 1];
                        uint32_t i2 = vertexRemap[size_t(face) + 2];
                        if (i0 != UNUSED32 && i1 != UNUSED32 && i2 != UNUSED32)
                        {
                            // we're searching a pre-sorted list, first one we find will be the best
                            bestFace = face;
                            bestScore = vertexDataList[i0].score
                                + vertexDataList[i1].score
                                + vertexDataList[i2].score;
                            break;
                        }
                    }
                }
                assert(bestScore >= 0.f);
            }

            processedFaceList[bestFace / 3] = 1;
            uint16_t entriesInCache1 = 0;

            faceRemap[curFace] = (bestFace / 3) + offset;
            curFace++;

            // add bestFace to LRU cache
            assert(vertexRemap[bestFace] != UNUSED32);
            assert(vertexRemap[size_t(bestFace) + 1] != UNUSED32);
            assert(vertexRemap[size_t(bestFace) + 2] != UNUSED32);

            for (size_t v = 0; v < 3; ++v)
            {
                OptimizeVertexData<IndexType>& vertexData = vertexDataList[vertexRemap[bestFace + v]];

                if (vertexData.cachePos1 >= entriesInCache1)
                {
                    vertexData.cachePos1 = entriesInCache1;
                    cache1[entriesInCache1++] = vertexRemap[bestFace + v];

                    if (vertexData.activeFaceListSize == 1)
                    {
                        --vertexData.activeFaceListSize;
                        continue;
                    }
                }

                assert(vertexData.activeFaceListSize > 0);
                uint32_t* begin = activeFaceList.get() + vertexData.activeFaceListStart;
                uint32_t* end = activeFaceList.get() + (size_t(vertexData.activeFaceListStart) + vertexData.activeFaceListSize);
                uint32_t* it = std::find(begin, end, bestFace);

                assert(it != end);

                std::swap(*it, *(end - 1));

                --vertexData.activeFaceListSize;
                vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);

                // need to re-sort the faces that use this vertex, as their score will change due to activeFaceListSize shrinking
                for (const uint32_t *fi = begin; fi != end - 1; ++fi)
                {
                    uint32_t faceIndex = *fi / 3;
                    uint32_t n = faceReverseLookup[faceIndex];
                    assert(faceSorted[n] == faceIndex);

                    // found it, now move it up
                    while (n > 0)
                    {
                        if (faceValenceSort(n, n - 1))
                        {
                            faceReverseLookup[faceSorted[n]] = n - 1;
                            faceReverseLookup[faceSorted[n - 1]] = n;
                            std::swap(faceSorted[n], faceSorted[n - 1]);
                            n--;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            // move the rest of the old verts in the cache down and compute their new scores
            for (uint32_t c0 = 0; c0 < entriesInCache0; ++c0)
            {
                OptimizeVertexData<IndexType>& vertexData = vertexDataList[cache0[c0]];

                if (vertexData.cachePos1 >= entriesInCache1)
                {
                    vertexData.cachePos1 = entriesInCache1;
                    cache1[entriesInCache1++] = cache0[c0];
                    vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize);

                    // don't need to re-sort this vertex... once it gets out of the cache, it'll have its original score
                }
            }

            // find the best scoring triangle in the current cache (including up to 3 that were just evicted)
            bestScore = -1.f;

            for (uint32_t c1 = 0; c1 < entriesInCache1; ++c1)
            {
                OptimizeVertexData<IndexType>& vertexData = vertexDataList[cache1[c1]];
                vertexData.cachePos0 = vertexData.cachePos1;
                vertexData.cachePos1 = kEvictedCacheIndex;

                for (uint32_t j = 0; j < vertexData.activeFaceListSize; ++j)
                {
                    uint32_t face = activeFaceList[size_t(vertexData.activeFaceListStart) + j];
                    float faceScore = 0.f;

                    for (uint32_t v = 0; v < 3; v++)
                    {
                        OptimizeVertexData<IndexType>& faceVertexData = vertexDataList[vertexRemap[size_t(face) + v]];
                        faceScore += faceVertexData.score;
                    }

                    if (faceScore > bestScore)
                    {
                        bestScore = faceScore;
                        bestFace = face;
                    }
                }
            }

            std::swap(cache0, cache1);

            entriesInCache0 = std::min<uint32_t>(entriesInCache1, lruCacheSize);
        }

        for (; curFace < faceCount; ++curFace)
        {
            faceRemap[curFace] = UNUSED32;
        }

        return S_OK;
    }