static ITensor CreateTensor()

in Tools/WinMLRunner/src/BindingUtilities.cpp [453:647]


    static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<int64_t>& tensorShape,
                                const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc)
    {
        using TensorValue = typename TensorKindToValue<TKind>::Type;
        using WriteType = typename TensorKindToPointerType<TKind>::Type;

        // Map the incoming Tensor as a TensorNative to get the actual data buffer.
        auto tensorValue = TensorValue::Create(tensorShape);

        com_ptr<ITensorNative> spTensorValueNative;
        tensorValue.as(spTensorValueNative);

        WriteType* actualData;
        uint32_t actualSizeInBytes;
        THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast<BYTE**>(&actualData), &actualSizeInBytes));

        if (args.IsCSVInput() || args.IsImageInput())
        {
            // Assumes NCHW
            uint32_t channels = static_cast<uint32_t>(tensorShape[1]);
            uint32_t tensorHeight = static_cast<uint32_t>(tensorShape[2]);
            uint32_t tensorWidth = static_cast<uint32_t>(tensorShape[3]);

            // Check to make sure the sizes are right
            uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes;
            uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType));
            if (inputElementCount != outputElementCount)
            {
                throw hresult_invalid_argument(L"Input size / shape is different from what the model expects");
            }

            float scale;
            std::vector<float> means = {};
            std::vector<float> stddevs = {};

            const auto& tensorizeArgs = args.TensorizeArgs();
            const auto& normalizeParams = tensorizeArgs.Normalize;
            switch (tensorizeArgs.Func)
            {
                case TensorizeFuncs::Identity:
                    scale = 1.0f;
                    means.resize(channels, 0.0f);
                    stddevs.resize(channels, 1.0f);
                    break;
                case TensorizeFuncs::Normalize:
                    switch (inputBufferDesc.elementFormat)
                    {
                        case BitmapPixelFormat::Gray8:
                        case BitmapPixelFormat::Gray16:
                        case BitmapPixelFormat::Rgba8:
                        case BitmapPixelFormat::Rgba16:
                            scale = normalizeParams.Scale;
                            means.resize(channels);
                            stddevs.resize(channels);
                            for (uint32_t i = 0; i < channels; ++i)
                            {
                                means[i] = normalizeParams.Means[i];
                                stddevs[i] = normalizeParams.StdDevs[i];
                            }
                            break;
                        case BitmapPixelFormat::Bgra8:
                            scale = normalizeParams.Scale;
                            means.resize(channels);
                            stddevs.resize(channels);
                            for (uint32_t i = 0; i < channels; ++i)
                            {
                                means[channels - 1 - i] = normalizeParams.Means[i];
                                stddevs[channels - 1 - i] = normalizeParams.StdDevs[i];
                            }
                            break;

                        default:
                            throw hresult_invalid_argument(
                                L"CreateTensor<TKind>: Unhandled SoftwareBitmap pixel format");
                    }
                    break;
                default:
                    throw hresult_invalid_argument(L"CreateTensor<TKind>: Unknown Tensorize Function");
            }

            switch (inputBufferDesc.channelFormat)
            {
                case TensorKind::UInt8:
                    CopyTensorFromBuffer<TKind, uint8_t>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
                                                         means, stddevs);
                    break;
                case TensorKind::Float:
                    CopyTensorFromBuffer<TKind, float>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
                                                       means, stddevs);
                    break;
                default:
                    throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!");
            }
        }
        // Garbage Data
        else if (args.IsGarbageDataRange())
        {
            GenerateRandomData<TKind>(actualData, actualSizeInBytes, args.GarbageDataMaxValue());
        }

        if (inputBindingType == InputBindingType::CPU)
        {
            return tensorValue;
        }
        else // GPU Tensor
        {
            com_ptr<ID3D12Resource> pGPUResource = nullptr;
            try
            {
                // create the d3d device.
                com_ptr<ID3D12Device> pD3D12Device = nullptr;
                D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
                                  reinterpret_cast<void**>(&pD3D12Device));
                auto heapType = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
                auto resourceDesc =
                    CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
                pD3D12Device->CreateCommittedResource(
                    &heapType, D3D12_HEAP_FLAG_NONE, &resourceDesc,
                    D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void());
                if (!args.IsGarbageInput())
                {
                    com_ptr<ID3D12Resource> imageUploadHeap;
                    heapType = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
                    resourceDesc =
                        CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes);
                    // Create the GPU upload buffer.
                    pD3D12Device->CreateCommittedResource(&heapType, D3D12_HEAP_FLAG_NONE, &resourceDesc,
                                                          D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
                        __uuidof(ID3D12Resource), imageUploadHeap.put_void());

                    // create the command queue.
                    com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
                    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
                    commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
                    pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
                                                     reinterpret_cast<void**>(&dxQueue));
                    com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
                        get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
                    com_ptr<::IUnknown> spUnk;
                    devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());

                    // Create ID3D12GraphicsCommandList and Allocator
                    D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
                    com_ptr<ID3D12CommandAllocator> alloctor;
                    com_ptr<ID3D12GraphicsCommandList> cmdList;
                    pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
                                                         alloctor.put_void());
                    pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
                                                    winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());

                    // Copy from Cpu to GPU
                    D3D12_SUBRESOURCE_DATA CPUData = {};
                    CPUData.pData = actualData;
                    CPUData.RowPitch = actualSizeInBytes;
                    CPUData.SlicePitch = actualSizeInBytes;
                    UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);

                    // Close the command list and execute it to begin the initial GPU setup.
                    cmdList->Close();
                    ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
                    dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);

                    // Create Event
                    HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);

                    // Create Fence
                    ::Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
                    THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
                                                              IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
                    // Adds fence to queue
                    THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
                    THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));

                    // Wait for signal
                    DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
                    if (retVal != WAIT_OBJECT_0)
                    {
                        THROW_IF_FAILED(E_UNEXPECTED);
                    }
                }
            }
            catch (...)
            {
                std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
                throw;
            }
            com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
            com_ptr<::IUnknown> spUnkTensor;
            tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast<int64_t*>(tensorShape.data()),
                                                   static_cast<int>(tensorShape.size()), spUnkTensor.put());
            TensorValue returnTensor(nullptr);
            spUnkTensor.try_as(returnTensor);
            return returnTensor;
        }
    }