Search
Duplicate

Compute Pipeline(컴퓨트 파이프라인)

컴퓨트 파이프라인은 그래픽스 파이프라인보다 더 쉽습니다.
리소스와 연산이 분리되어 있어서 더 쉽게 이해 할 수 있습니다.
컴퓨트 파이프라인을 살펴 봅시다.

Resources

레스터라이제이션, 레이트레이싱 파이프라인과 마찬가지로 계산에는 삼각형 데이터를 가진 구조화 버퍼, 렌더링에 관여되지 않는 정렬되지 않은 엑세스 뷰(UAV), 상수버퍼 뷰와 같은 리소스들이 필요합니다.
// Create the root signature. D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData = {}; // This is the highest version the sample supports. If // CheckFeatureSupport succeeds, the HighestVersion returned will not be // greater than this. featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1; if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData)))) { featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_0; } D3D12_DESCRIPTOR_RANGE1 ranges[1]; ranges[0].BaseShaderRegister = 0; ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; ranges[0].NumDescriptors = 1; ranges[0].RegisterSpace = 0; ranges[0].OffsetInDescriptorsFromTableStart = 0; ranges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; D3D12_ROOT_PARAMETER1 rootParameters[1]; rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[0].DescriptorTable.NumDescriptorRanges = 1; rootParameters[0].DescriptorTable.pDescriptorRanges = ranges; D3D12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; rootSignatureDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; rootSignatureDesc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; rootSignatureDesc.Desc_1_1.NumParameters = 1; rootSignatureDesc.Desc_1_1.pParameters = rootParameters; rootSignatureDesc.Desc_1_1.NumStaticSamplers = 0; rootSignatureDesc.Desc_1_1.pStaticSamplers = nullptr; ID3DBlob* signatureBlob; ID3DBlob* error; try { ThrowIfFailed(D3D12SerializeVersionedRootSignature( &rootSignatureDesc, &signatureBlob, &error)); ThrowIfFailed(mDevice->CreateRootSignature( 0, signature->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&rootSignature))); rootSignature->SetName(L"Hello Compute Root Signature"); } catch (std::exception e) { const char* errStr = (const char*)error->GetBufferPointer(); std::cout << errStr; error->Release(); error = nullptr; } if (signatureBlob) { signatureBlob->Release(); signatureBlob = nullptr; }
C++
복사

Compute Shaders

컴퓨트 쉐이더는 간단합니다. 기존의 렌더링 파이프라인들 보다. 오직 하나의 스테이지만 존재합니다.
RWTexture2D<float4> tOutput : register(u0); [numthreads(16, 16, 1)] void main(uint3 groupThreadID : SV_GroupThreadID, // The current thread group (so pixel) of this group defined by `numthreads` uint3 groupID : SV_GroupID, // The current thread group ID, the group of threads defined in `Dispatch(x,y,z)` uint groupIndex : SV_GroupIndex, // The index of this group (so represent the group ID linearly) uint3 dispatchThreadID: SV_DispatchThreadID) // Your current pixel { tOutput[dispatchThreadID.xy] = float4( float(groupThreadID.x) / 16.0, float(groupThreadID.y) / 16.0, dispatchThreadID.x / 1280.0, 1.0); }
C++
복사

Pipeline State

컴퓨트 파이프라인은 셰이더와 루트 시그니처만 사용합니다. 그게 전부 다입니다.
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; psoDesc.pRootSignature = rootSignature; D3D12_SHADER_BYTECODE csBytecode; csBytecode.pShaderBytecode = compShader->GetBufferPointer(); csBytecode.BytecodeLength = compShader->GetBufferSize(); psoDesc.CS = csBytecode; try { ThrowIfFailed(mDevice->CreateComputePipelineState( &psoDesc, IID_PPV_ARGS(&pipelineState))); } catch (std::exception e) { std::cout << "Failed to create Compute Pipeline!"; } if (compShader) { compShader->Release(); compShader = nullptr; }
C++
복사

Unordered Access View

이제 데이터를 기록 할 수 있는 언오더드 엑세스 뷰를 만들어 보겠습니다.
// Create the Temp Texture D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; heapDesc.NumDescriptors = 1; heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; ThrowIfFailed( mDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&mUavHeap))); D3D12_RESOURCE_DESC texResourceDesc = {}; texResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; texResourceDesc.Alignment = 0; texResourceDesc.Width = mWidth; texResourceDesc.Height = mHeight; texResourceDesc.DepthOrArraySize = 1; texResourceDesc.MipLevels = 1; texResourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; texResourceDesc.SampleDesc.Count = 1; texResourceDesc.SampleDesc.Quality = 0; texResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; texResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; D3D12_CLEAR_VALUE clearValue = {}; clearValue.Format = DXGI_FORMAT_R8G8B8A8_UNORM; clearValue.Color[0] = clearValue.Color[1] = clearValue.Color[2] = clearValue.Color[3] = 1.f; D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; ThrowIfFailed(mDevice->CreateCommittedResource( &heapProps, D3D12_HEAP_FLAG_NONE, &texResourceDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&mTexResource))); mTexResource->SetName(L"Compute Target"); mUAVDescriptorSize = mDevice->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); auto AllocateDescriptor = [&](D3D12_CPU_DESCRIPTOR_HANDLE* cpuDescriptor, UINT descriptorIndexToUse) { auto descriptorHeapCpuBase = mUavHeap->GetCPUDescriptorHandleForHeapStart(); if (descriptorIndexToUse >= mUavHeap->GetDesc().NumDescriptors) { descriptorIndexToUse = mDescriptorsAllocated++; } *cpuDescriptor = D3D12_CPU_DESCRIPTOR_HANDLE{ descriptorHeapCpuBase.ptr + INT64(descriptorIndexToUse) * INT64(mUAVDescriptorSize)}; return descriptorIndexToUse; }; heapIndex = AllocateDescriptor(&uavCPUHandle, heapIndex); uavGPUHandle = D3D12_GPU_DESCRIPTOR_HANDLE{ mUavHeap->GetGPUDescriptorHandleForHeapStart().ptr + INT64(0) * INT64(mUAVDescriptorSize)}; D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; mDevice->CreateUnorderedAccessView(mTexResource, nullptr, &uavDesc, uavCPUHandle);
C++
복사

Compute Calls

컴퓨트 호출 실행은 주어진 스레드 그룹 세트를 인수로 Dispatch를 호출하여 실행한다. 화면 공간 계산 셰이더를 실행할 것이므로 화면의 모든 16x16 블록에 대한 그룹이 인수로 있을 것이다.
void setupCommands() { ThrowIfFailed(commandAllocator->Reset()); ThrowIfFailed(commandList->Reset(commandAllocator, mPipelineState)); // Set necessary state. commandList->SetComputeRootSignature(rootSignature); ID3D12DescriptorHeap* pDescriptorHeaps[] = {mUavHeap}; commandList->SetDescriptorHeaps(_countof(pDescriptorHeaps), pDescriptorHeaps); commandList->SetComputeRootDescriptorTable(0, uavGPUHandle); auto divCiel = [](unsigned val, unsigned x) -> unsigned { return val / x + ((val % x) > 0 ? 1 : 0); }; commandList->Dispatch(divCiel(width, 16), divCiel(height, 16), 1); D3D12_RESOURCE_BARRIER preCopyBarriers[2]; preCopyBarriers[0] = CD3DX12_RESOURCE_BARRIER::Transition( mRenderTargets[frameIndex], D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_COPY_DEST); preCopyBarriers[1] = CD3DX12_RESOURCE_BARRIER::Transition( mTexResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); commandList->ResourceBarrier(ARRAYSIZE(preCopyBarriers), preCopyBarriers); commandList->CopyResource(renderTargets[frameIndex], texResource); D3D12_RESOURCE_BARRIER postCopyBarriers[2]; postCopyBarriers[0] = CD3DX12_RESOURCE_BARRIER::Transition( mRenderTargets[fameIndex], D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PRESENT); postCopyBarriers[1] = CD3DX12_RESOURCE_BARRIER::Transition( mTexResource, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); CD3DX12_RESOURCE_BARRIER result = {}; D3D12_RESOURCE_BARRIER& barrier = result; result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; result.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Transition.pResource = mTexResource; barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; commandList->ResourceBarrier(ARRAYSIZE(postCopyBarriers), postCopyBarriers); ThrowIfFailed(commandList->Close()); }
C++
복사

참고자료