컴퓨트 파이프라인은 그래픽스 파이프라인보다 더 쉽습니다.
리소스와 연산이 분리되어 있어서 더 쉽게 이해 할 수 있습니다.
컴퓨트 파이프라인을 살펴 봅시다.
Resources
레스터라이제이션, 레이트레이싱 파이프라인과 마찬가지로 계산에는 삼각형 데이터를 가진 구조화 버퍼, 렌더링에 관여되지 않는 정렬되지 않은 엑세스 뷰(UAV), 상수버퍼 뷰와 같은 리소스들이 필요합니다.
// Create the root signature.
D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData = {};
// This is the highest version the sample supports. If
// CheckFeatureSupport succeeds, the HighestVersion returned will not be
// greater than this.
featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1;
if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE,
&featureData,
sizeof(featureData))))
{
featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_0;
}
D3D12_DESCRIPTOR_RANGE1 ranges[1];
ranges[0].BaseShaderRegister = 0;
ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
ranges[0].NumDescriptors = 1;
ranges[0].RegisterSpace = 0;
ranges[0].OffsetInDescriptorsFromTableStart = 0;
ranges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
D3D12_ROOT_PARAMETER1 rootParameters[1];
rootParameters[0].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParameters[0].DescriptorTable.NumDescriptorRanges = 1;
rootParameters[0].DescriptorTable.pDescriptorRanges = ranges;
D3D12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc;
rootSignatureDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
rootSignatureDesc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
rootSignatureDesc.Desc_1_1.NumParameters = 1;
rootSignatureDesc.Desc_1_1.pParameters = rootParameters;
rootSignatureDesc.Desc_1_1.NumStaticSamplers = 0;
rootSignatureDesc.Desc_1_1.pStaticSamplers = nullptr;
ID3DBlob* signatureBlob;
ID3DBlob* error;
try
{
ThrowIfFailed(D3D12SerializeVersionedRootSignature(
&rootSignatureDesc, &signatureBlob, &error));
ThrowIfFailed(mDevice->CreateRootSignature(
0, signature->GetBufferPointer(), signatureBlob->GetBufferSize(),
IID_PPV_ARGS(&rootSignature)));
rootSignature->SetName(L"Hello Compute Root Signature");
}
catch (std::exception e)
{
const char* errStr = (const char*)error->GetBufferPointer();
std::cout << errStr;
error->Release();
error = nullptr;
}
if (signatureBlob)
{
signatureBlob->Release();
signatureBlob = nullptr;
}
C++
복사
Compute Shaders
컴퓨트 쉐이더는 간단합니다. 기존의 렌더링 파이프라인들 보다. 오직 하나의 스테이지만 존재합니다.
RWTexture2D<float4> tOutput : register(u0);
[numthreads(16, 16, 1)]
void main(uint3 groupThreadID : SV_GroupThreadID, // The current thread group (so pixel) of this group defined by `numthreads`
uint3 groupID : SV_GroupID, // The current thread group ID, the group of threads defined in `Dispatch(x,y,z)`
uint groupIndex : SV_GroupIndex, // The index of this group (so represent the group ID linearly)
uint3 dispatchThreadID: SV_DispatchThreadID) // Your current pixel
{
tOutput[dispatchThreadID.xy] = float4( float(groupThreadID.x) / 16.0, float(groupThreadID.y) / 16.0, dispatchThreadID.x / 1280.0, 1.0);
}
C++
복사
Pipeline State
컴퓨트 파이프라인은 셰이더와 루트 시그니처만 사용합니다. 그게 전부 다입니다.
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.pRootSignature = rootSignature;
D3D12_SHADER_BYTECODE csBytecode;
csBytecode.pShaderBytecode = compShader->GetBufferPointer();
csBytecode.BytecodeLength = compShader->GetBufferSize();
psoDesc.CS = csBytecode;
try
{
ThrowIfFailed(mDevice->CreateComputePipelineState(
&psoDesc, IID_PPV_ARGS(&pipelineState)));
}
catch (std::exception e)
{
std::cout << "Failed to create Compute Pipeline!";
}
if (compShader)
{
compShader->Release();
compShader = nullptr;
}
C++
복사
Unordered Access View
이제 데이터를 기록 할 수 있는 언오더드 엑세스 뷰를 만들어 보겠습니다.
// Create the Temp Texture
D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
heapDesc.NumDescriptors = 1;
heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
ThrowIfFailed(
mDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&mUavHeap)));
D3D12_RESOURCE_DESC texResourceDesc = {};
texResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
texResourceDesc.Alignment = 0;
texResourceDesc.Width = mWidth;
texResourceDesc.Height = mHeight;
texResourceDesc.DepthOrArraySize = 1;
texResourceDesc.MipLevels = 1;
texResourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
texResourceDesc.SampleDesc.Count = 1;
texResourceDesc.SampleDesc.Quality = 0;
texResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
texResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
D3D12_CLEAR_VALUE clearValue = {};
clearValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
clearValue.Color[0] = clearValue.Color[1] = clearValue.Color[2] =
clearValue.Color[3] = 1.f;
D3D12_HEAP_PROPERTIES heapProps;
heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heapProps.CreationNodeMask = 1;
heapProps.VisibleNodeMask = 1;
ThrowIfFailed(mDevice->CreateCommittedResource(
&heapProps, D3D12_HEAP_FLAG_NONE, &texResourceDesc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr,
IID_PPV_ARGS(&mTexResource)));
mTexResource->SetName(L"Compute Target");
mUAVDescriptorSize = mDevice->GetDescriptorHandleIncrementSize(
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
auto AllocateDescriptor =
[&](D3D12_CPU_DESCRIPTOR_HANDLE* cpuDescriptor,
UINT descriptorIndexToUse)
{
auto descriptorHeapCpuBase =
mUavHeap->GetCPUDescriptorHandleForHeapStart();
if (descriptorIndexToUse >= mUavHeap->GetDesc().NumDescriptors)
{
descriptorIndexToUse = mDescriptorsAllocated++;
}
*cpuDescriptor = D3D12_CPU_DESCRIPTOR_HANDLE{
descriptorHeapCpuBase.ptr +
INT64(descriptorIndexToUse) * INT64(mUAVDescriptorSize)};
return descriptorIndexToUse;
};
heapIndex = AllocateDescriptor(&uavCPUHandle, heapIndex);
uavGPUHandle = D3D12_GPU_DESCRIPTOR_HANDLE{
mUavHeap->GetGPUDescriptorHandleForHeapStart().ptr +
INT64(0) * INT64(mUAVDescriptorSize)};
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
mDevice->CreateUnorderedAccessView(mTexResource, nullptr, &uavDesc,
uavCPUHandle);
C++
복사
Compute Calls
컴퓨트 호출 실행은 주어진 스레드 그룹 세트를 인수로 Dispatch를 호출하여 실행한다. 화면 공간 계산 셰이더를 실행할 것이므로 화면의 모든 16x16 블록에 대한 그룹이 인수로 있을 것이다.
void setupCommands()
{
ThrowIfFailed(commandAllocator->Reset());
ThrowIfFailed(commandList->Reset(commandAllocator, mPipelineState));
// Set necessary state.
commandList->SetComputeRootSignature(rootSignature);
ID3D12DescriptorHeap* pDescriptorHeaps[] = {mUavHeap};
commandList->SetDescriptorHeaps(_countof(pDescriptorHeaps),
pDescriptorHeaps);
commandList->SetComputeRootDescriptorTable(0, uavGPUHandle);
auto divCiel = [](unsigned val, unsigned x) -> unsigned
{ return val / x + ((val % x) > 0 ? 1 : 0); };
commandList->Dispatch(divCiel(width, 16), divCiel(height, 16), 1);
D3D12_RESOURCE_BARRIER preCopyBarriers[2];
preCopyBarriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(
mRenderTargets[frameIndex], D3D12_RESOURCE_STATE_PRESENT,
D3D12_RESOURCE_STATE_COPY_DEST);
preCopyBarriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(
mTexResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_COPY_SOURCE);
commandList->ResourceBarrier(ARRAYSIZE(preCopyBarriers), preCopyBarriers);
commandList->CopyResource(renderTargets[frameIndex], texResource);
D3D12_RESOURCE_BARRIER postCopyBarriers[2];
postCopyBarriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(
mRenderTargets[fameIndex], D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_PRESENT);
postCopyBarriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(
mTexResource, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
CD3DX12_RESOURCE_BARRIER result = {};
D3D12_RESOURCE_BARRIER& barrier = result;
result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
result.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = mTexResource;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
commandList->ResourceBarrier(ARRAYSIZE(postCopyBarriers),
postCopyBarriers);
ThrowIfFailed(commandList->Close());
}
C++
복사