in Tools/WinMLRunner/src/BindingUtilities.cpp [453:647]
static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<int64_t>& tensorShape,
const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc)
{
using TensorValue = typename TensorKindToValue<TKind>::Type;
using WriteType = typename TensorKindToPointerType<TKind>::Type;
// Map the incoming Tensor as a TensorNative to get the actual data buffer.
auto tensorValue = TensorValue::Create(tensorShape);
com_ptr<ITensorNative> spTensorValueNative;
tensorValue.as(spTensorValueNative);
WriteType* actualData;
uint32_t actualSizeInBytes;
THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast<BYTE**>(&actualData), &actualSizeInBytes));
if (args.IsCSVInput() || args.IsImageInput())
{
// Assumes NCHW
uint32_t channels = static_cast<uint32_t>(tensorShape[1]);
uint32_t tensorHeight = static_cast<uint32_t>(tensorShape[2]);
uint32_t tensorWidth = static_cast<uint32_t>(tensorShape[3]);
// Check to make sure the sizes are right
uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes;
uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType));
if (inputElementCount != outputElementCount)
{
throw hresult_invalid_argument(L"Input size / shape is different from what the model expects");
}
float scale;
std::vector<float> means = {};
std::vector<float> stddevs = {};
const auto& tensorizeArgs = args.TensorizeArgs();
const auto& normalizeParams = tensorizeArgs.Normalize;
switch (tensorizeArgs.Func)
{
case TensorizeFuncs::Identity:
scale = 1.0f;
means.resize(channels, 0.0f);
stddevs.resize(channels, 1.0f);
break;
case TensorizeFuncs::Normalize:
switch (inputBufferDesc.elementFormat)
{
case BitmapPixelFormat::Gray8:
case BitmapPixelFormat::Gray16:
case BitmapPixelFormat::Rgba8:
case BitmapPixelFormat::Rgba16:
scale = normalizeParams.Scale;
means.resize(channels);
stddevs.resize(channels);
for (uint32_t i = 0; i < channels; ++i)
{
means[i] = normalizeParams.Means[i];
stddevs[i] = normalizeParams.StdDevs[i];
}
break;
case BitmapPixelFormat::Bgra8:
scale = normalizeParams.Scale;
means.resize(channels);
stddevs.resize(channels);
for (uint32_t i = 0; i < channels; ++i)
{
means[channels - 1 - i] = normalizeParams.Means[i];
stddevs[channels - 1 - i] = normalizeParams.StdDevs[i];
}
break;
default:
throw hresult_invalid_argument(
L"CreateTensor<TKind>: Unhandled SoftwareBitmap pixel format");
}
break;
default:
throw hresult_invalid_argument(L"CreateTensor<TKind>: Unknown Tensorize Function");
}
switch (inputBufferDesc.channelFormat)
{
case TensorKind::UInt8:
CopyTensorFromBuffer<TKind, uint8_t>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
means, stddevs);
break;
case TensorKind::Float:
CopyTensorFromBuffer<TKind, float>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
means, stddevs);
break;
default:
throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!");
}
}
// Garbage Data
else if (args.IsGarbageDataRange())
{
GenerateRandomData<TKind>(actualData, actualSizeInBytes, args.GarbageDataMaxValue());
}
if (inputBindingType == InputBindingType::CPU)
{
return tensorValue;
}
else // GPU Tensor
{
com_ptr<ID3D12Resource> pGPUResource = nullptr;
try
{
// create the d3d device.
com_ptr<ID3D12Device> pD3D12Device = nullptr;
D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
reinterpret_cast<void**>(&pD3D12Device));
auto heapType = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
auto resourceDesc =
CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
pD3D12Device->CreateCommittedResource(
&heapType, D3D12_HEAP_FLAG_NONE, &resourceDesc,
D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void());
if (!args.IsGarbageInput())
{
com_ptr<ID3D12Resource> imageUploadHeap;
heapType = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
resourceDesc =
CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes);
// Create the GPU upload buffer.
pD3D12Device->CreateCommittedResource(&heapType, D3D12_HEAP_FLAG_NONE, &resourceDesc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
__uuidof(ID3D12Resource), imageUploadHeap.put_void());
// create the command queue.
com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
reinterpret_cast<void**>(&dxQueue));
com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
com_ptr<::IUnknown> spUnk;
devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());
// Create ID3D12GraphicsCommandList and Allocator
D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
com_ptr<ID3D12CommandAllocator> alloctor;
com_ptr<ID3D12GraphicsCommandList> cmdList;
pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
alloctor.put_void());
pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());
// Copy from Cpu to GPU
D3D12_SUBRESOURCE_DATA CPUData = {};
CPUData.pData = actualData;
CPUData.RowPitch = actualSizeInBytes;
CPUData.SlicePitch = actualSizeInBytes;
UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);
// Close the command list and execute it to begin the initial GPU setup.
cmdList->Close();
ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
// Create Event
HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
// Create Fence
::Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
// Adds fence to queue
THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));
// Wait for signal
DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
if (retVal != WAIT_OBJECT_0)
{
THROW_IF_FAILED(E_UNEXPECTED);
}
}
}
catch (...)
{
std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
throw;
}
com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
com_ptr<::IUnknown> spUnkTensor;
tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast<int64_t*>(tensorShape.data()),
static_cast<int>(tensorShape.size()), spUnkTensor.put());
TensorValue returnTensor(nullptr);
spUnkTensor.try_as(returnTensor);
return returnTensor;
}
}