vulkan-playground/reconstructor.h

637 lines
30 KiB
C++

/*
* Copyright (c) 2024 mittorn
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef RECONSTRUCTOR_H
#define RECONSTRUCTOR_H
#include "vulkan_utl.h"
#include "vulkan_pipeline_utl.h"
#include "vulkan_framebuffer_utl.h"
#include "vulkan_contructors.h"
#include "vulkan_texture_utl.h"
#include <pthread.h>
#define USE_SAMPLER 1
struct ReconstructionComputePipeline: BaseVulkanPipeline
{
void Init(VkDevice dev, VkRenderPass renderPass, VkSampler *pImmutableSamplers, int count1, int count2, int count3, unsigned int width, unsigned int height, unsigned int fwidth, unsigned int fheight)
{
device = dev;
CreateDescriptorSetLayout(
BasicBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,1,VK_SHADER_STAGE_COMPUTE_BIT, pImmutableSamplers),
#if USE_SAMPLER
BasicBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,1,VK_SHADER_STAGE_COMPUTE_BIT),
#else
BasicBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
#endif
BasicBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT)
);
CreatePool((count1 + count3) * count2,
BasicPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, count2 * count3*3),
#if USE_SAMPLER
// BasicPoolSize(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, count3 * count2 * count1),
BasicPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, count3 * count2 * count1)
#else
BasicPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, count3 * count2 * count1*2)
#endif
);
// todo: wrapper for specialization arrays...
float specData[2] = {(float)width,(float)height};
VkSpecializationMapEntry specs[2] = {{0, 0, sizeof(float)},1,sizeof(float),sizeof(float)};
VkSpecializationInfo sinfo = {2, specs, sizeof(specData), specData };
VkShaderModule shader;
// todo: combined reconstruction/foveation shader? (maybe ineffective for compute pipeline)
CreateComputePipeline(ShaderFromFile(shader, "reconstruction.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT, &sinfo));
// todo: should not we destroy shader internally?
vkDestroyShaderModule(device, shader, NULL);
}
void UpdateDescriptors(VkDescriptorSet dstSet, VkImageView imageView, VkSampler sampler, VkImageView imageView1, VkImageView imageView2, VkSampler sampler2)
{
WriteDescriptors(
ImageWrite(dstSet, 0, ImageDescriptor(imageView, VK_IMAGE_LAYOUT_GENERAL, sampler), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER),
ImageWrite(dstSet, 1, ImageDescriptor(imageView1, VK_IMAGE_LAYOUT_GENERAL, sampler2), USE_SAMPLER?VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:VK_DESCRIPTOR_TYPE_STORAGE_IMAGE),
ImageWrite(dstSet, 2, ImageDescriptor(imageView2, VK_IMAGE_LAYOUT_GENERAL)));
}
};
struct ReconstructionGraphicsPipeline: BaseVulkanPipeline
{
void Init(VkDevice dev, VkRenderPass renderPass, VkSampler *pImmutableSamplers, int count)
{
device = dev;
CreateDescriptorSetLayout(
//BasicBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,1,VK_SHADER_STAGE_VERTEX_BIT)
BasicBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,1,VK_SHADER_STAGE_FRAGMENT_BIT, pImmutableSamplers)
);
CreatePool(count,
//BasicPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1)
BasicPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, count)
);
VkShaderModule vp, fp;
CreateGraphicsPipeline(renderPass,
Stages(
ShaderFromFile(vp, "quad.vert.spv", VK_SHADER_STAGE_VERTEX_BIT),
ShaderFromFile(fp, "quad.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT)),
VertexBindings(),
VertexAttributes(),
DynamicStates(
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR
),
DepthStencil(true, true),
AssemblyTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
//RasterMode(VK_FRONT_FACE_CLOCKWISE)
);
vkDestroyShaderModule(device, vp, NULL);
vkDestroyShaderModule(device, fp, NULL);
}
void UpdateDescriptors(VkDescriptorSet dstSet, VkImageView imageView, VkSampler sampler)
{
WriteDescriptors(
//BufferWrite(dstSet, 0, buffer)
ImageWrite(dstSet, 0, ImageDescriptor(imageView, VK_IMAGE_LAYOUT_GENERAL, sampler), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
);
}
};
constexpr static size_t MAX_SWAPCHAIN_IMAGES = 8;
constexpr static size_t MAX_DECODER_FRAMES = 4;
constexpr static size_t RECONSTRUCTION_TARGET_FRAMES = 4;
struct ReconsructionSourceImage
{
VkImage image;
VkDeviceMemory image_memory;
VkImageView image_view;
VkSampler image_sampler;
unsigned char *pMappedData;
VkCommandBuffer copyCmdBuf;
};
template <typename TReconstructionReader, typename TLosslessDecompressor, size_t Frames>
struct ReconstructionSource
{
constexpr static size_t MaxFrames = Frames;
ReconsructionSourceImage sources[Frames];
TReconstructionReader &reader;
//unsigned int frame_idx;
FILE *reconstructionStream;
VulkanBuffer stagingBuffer;
VulkanDevice *device;
VkSemaphore inputSemaphore;
VkSemaphore outputSemaphore;
VkQueue cpyQueue;
PFN_vkWaitSemaphoresKHR pvkWaitSemaphoresKHR;
PFN_vkSignalSemaphoreKHR pvkSignalSemaphoreKHR;
pthread_t reconstructionThread;
VkCommandPool pool;
uint familyIndex;
ReconstructionSource(TReconstructionReader &r) : reader(r){}
template <bool needTransfer>
static void *ReconstructionReadThread(void *arg)
{
ReconstructionSource &src = *((ReconstructionSource*)arg);
TLosslessDecompressor dec;
int frameNum = 0;
while(true)
{
uint32_t length = 0;
//if(fread(&length, sizeof(int), 1, src.reconstructionStream) != 1)
//break;
//frameNum++; // todo: get from packet headers
static char buffer[((1024 + 3) >> 2) * ((540 + 3) >> 2)*8+16];
//fread(buffer, 1, length, src.reconstructionStream);
length = src.reader.Read(buffer, frameNum);
CallWith($M(VkSemaphoreWaitInfoKHR{VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR}, $(semaphoreCount),
$(pSemaphores) = &src.outputSemaphore, $(pValues) &= (uint64_t)frameNum ),
src.pvkWaitSemaphoresKHR(src.device->device,&ref, 100000000000));
printf("recinp %d\n", frameNum);
void *mem = needTransfer?src.stagingBuffer.mapped:src.sources[frameNum & 3].pMappedData;
int len = dec.Decompress(buffer, length, mem, ((1024 + 3) >> 2) * ((540 + 3) >> 2)*8+16);
ReconsructionSourceImage &image = src.sources[frameNum & 3];
if constexpr(needTransfer)
{
VK_CHECK_RESULT(CallWith($Sc(
SubmitInfo(image.copyCmdBuf, $(signalSemaphoreCount),
$(pSignalSemaphores) = &src.inputSemaphore),
$M(VkTimelineSemaphoreSubmitInfoKHR{VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR},
$(signalSemaphoreValueCount), $(pSignalSemaphoreValues) &= (uint64_t)frameNum)),
vkQueueSubmit(src.cpyQueue, 1, &ref, NULL)));
}
else
{
CallWith($M(VkSemaphoreSignalInfoKHR{VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR},
$(semaphore) = src.inputSemaphore, $(value) = (uint64_t)frameNum),
src.pvkSignalSemaphoreKHR(src.device->device, &ref));
}
}
return NULL;
}
// todo: shared reconstruction info descriptor
void Setup(VulkanContext &context, VulkanDevice &dev, VkFormat format, unsigned int width, unsigned int height)
{
VkFormatProperties props;
vkGetPhysicalDeviceFormatProperties(dev.physicalDevice, format, &props);
pvkWaitSemaphoresKHR = (PFN_vkWaitSemaphoresKHR)vkGetInstanceProcAddr(context.instance, "vkWaitSemaphoresKHR");
pvkSignalSemaphoreKHR = (PFN_vkSignalSemaphoreKHR)vkGetInstanceProcAddr(context.instance, "vkSignalSemaphoreKHR");
CallWith($Sc(
VkSemaphoreCreateInfo{VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO},
$M(VkSemaphoreTypeCreateInfoKHR{VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR},
$(semaphoreType) = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
$(initialValue) = 0)
), vkCreateSemaphore(dev.device, &ref, NULL, &inputSemaphore));
CallWith($Sc(
VkSemaphoreCreateInfo{VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO},
$M(VkSemaphoreTypeCreateInfoKHR{VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR},
$(semaphoreType) = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
$(initialValue) = 4)
), vkCreateSemaphore(dev.device, &ref, NULL, &outputSemaphore));
bool needTransfer = !(props.linearTilingFeatures & (USE_SAMPLER?VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT:VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT));
printf("needTransfer %d\n", needTransfer);
if(needTransfer)
{
if(cpyQueue == dev.defaultQueue)
printf("Warning: using default queue for transfer");
dev.CreateBuffer(stagingBuffer, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, ((1024 + 3) >> 2) * ((height + 3) >> 2)*8);
VkCommandPoolCreateInfo cmdPoolInfo = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
cmdPoolInfo.queueFamilyIndex = familyIndex;
VK_CHECK_RESULT(vkCreateCommandPool(dev.device, &cmdPoolInfo, NULL, &pool));
stagingBuffer.Map();
}
for(int i = 0; i < Frames; i++)
{
//uint32_t sharedQueues[2] = {dev.defaultFamilyIndex, familyIndex};
ReconsructionSourceImage &image = sources[i];
VkImageUsageFlags flags = (USE_SAMPLER?VK_IMAGE_USAGE_SAMPLED_BIT:VK_IMAGE_USAGE_STORAGE_BIT);
if(needTransfer)
flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
CallWith(Image2dInfo(flags, format, width, height,
$(tiling) = needTransfer?VK_IMAGE_TILING_OPTIMAL:VK_IMAGE_TILING_LINEAR,
$(initialLayout) = VK_IMAGE_LAYOUT_UNDEFINED/*, $(sharingMode), $(queueFamilyIndexCount) = 2, $(pQueueFamilyIndices) = sharedQueues*/),
vkCreateImage(dev.device, &ref,NULL, &image.image));
VkMemoryRequirements mem_reqs;
vkGetImageMemoryRequirements(dev.device, image.image, &mem_reqs);
VkMemoryAllocateInfo info = AllocateInfo(mem_reqs.size);
VkMemoryPropertyFlags mflags = 0;
if(!needTransfer)
mflags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
dev.GetMemoryType(mem_reqs.memoryTypeBits, mflags, &info.memoryTypeIndex);
vkAllocateMemory(dev.device, &info, NULL, &image.image_memory);
if(!needTransfer)
vkMapMemory(dev.device, image.image_memory, 0, mem_reqs.size, 0, (void**)&image.pMappedData);
vkBindImageMemory(dev.device, image.image, image.image_memory, 0);
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO},
$(image) = image.image, $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
$(format) = format,
$(subresourceRange) = SubresourceRange()),
vkCreateImageView(dev.device, &ref, NULL, &image.image_view));
#if USE_SAMPLER
CallWith($M(VkSamplerCreateInfo{VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO},
$(magFilter) = VK_FILTER_NEAREST,
$(minFilter) = VK_FILTER_NEAREST,
$(mipmapMode) = VK_SAMPLER_MIPMAP_MODE_NEAREST, // zero
$(addressModeU) = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
$(addressModeV) = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
$(addressModeW) = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
$(maxLod) = 1.0f,
$(borderColor) = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK),
vkCreateSampler(dev.device, &ref, NULL, &image.image_sampler);
);
#endif
VkCommandBuffer cbuf = dev.CreateCommandBuffer();
VulkanTexture::SetImageLayout(cbuf, image.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
dev.FlushCommandBuffer(cbuf,dev.defaultQueue);
if(needTransfer)
{
//image.copyCmdBuf = dev.CreateCommandBuffer();
VkCommandBufferAllocateInfo cmdBufAllocateInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
cmdBufAllocateInfo.commandPool = pool;
cmdBufAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdBufAllocateInfo.commandBufferCount = 1;
VkCommandBufferBeginInfo cmdBufInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
vkAllocateCommandBuffers(dev.device, &cmdBufAllocateInfo, &image.copyCmdBuf);
vkBeginCommandBuffer(image.copyCmdBuf, &cmdBufInfo);
VulkanTexture::SetImageLayout(image.copyCmdBuf, image.image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VkBufferImageCopy cp = VkBufferImageCopy {
.bufferOffset = 0,
.bufferRowLength = 1024,
.imageSubresource =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageExtent = {
.width = width,
.height = height,
.depth = 1,
},
};
vkCmdCopyBufferToImage(image.copyCmdBuf, stagingBuffer.buffer, image.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1, &cp);
VulkanTexture::SetImageLayout(image.copyCmdBuf, image.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkEndCommandBuffer(image.copyCmdBuf);
}
}
//reconstructionStream = fopen("reconstruction.bin", "rb");
device = &dev;
if(needTransfer)
pthread_create(&reconstructionThread, NULL, &ReconstructionReadThread<true>, this);
else
pthread_create(&reconstructionThread, NULL, &ReconstructionReadThread<false>, this);
}
};
template <typename TVideoDecoder, typename TReconstructionSource>
struct Reconstructor
{
VulkanDevice &dev;
VulkanContext &context;
TVideoDecoder& decoder;
TReconstructionSource &reconstruction;
Reconstructor(VulkanDevice &dev_, VulkanContext &ctx, TVideoDecoder &decoder_, TReconstructionSource &src_)
: dev(dev_), context(ctx), decoder(decoder_), reconstruction(src_){}
VulkanFramebuffer swapchainFbs[MAX_SWAPCHAIN_IMAGES];
VkImage swapchainImages[MAX_SWAPCHAIN_IMAGES];
//VkFence chainFences[MAX_SWAPCHAIN_IMAGES];
// direct reconstruction to swapchain
VkCommandBuffer commandBuffers[MAX_SWAPCHAIN_IMAGES * MAX_DECODER_FRAMES];
// reconstruction to temporary framebuffer
VulkanFramebuffer reconstructionFbs[RECONSTRUCTION_TARGET_FRAMES];
VkImage reconstructionImages[RECONSTRUCTION_TARGET_FRAMES];
VkDeviceMemory reconstructionImagesMem[RECONSTRUCTION_TARGET_FRAMES];
VkCommandBuffer reconstructionCommandBuffers[MAX_DECODER_FRAMES * RECONSTRUCTION_TARGET_FRAMES];
VkCommandBuffer presentCommandBuffers[MAX_SWAPCHAIN_IMAGES * RECONSTRUCTION_TARGET_FRAMES];
// todo: maybe we need timeline semaphore here
VkSemaphore reconstructionSemaphore[RECONSTRUCTION_TARGET_FRAMES];
// todo
ReconstructionGraphicsPipeline graphicsPipeline;
ReconstructionComputePipeline computePipeline;
VkDescriptorSet descriptorSet;
VkDescriptorSet swapchainDescriptorSets[MAX_SWAPCHAIN_IMAGES*MAX_DECODER_FRAMES];
VkDescriptorSet reconstructionDescriptorSets[RECONSTRUCTION_TARGET_FRAMES*MAX_DECODER_FRAMES];
VkDescriptorSet decodeDescriptorSet[MAX_DECODER_FRAMES];
// todo
bool separateReconstruction = true;
VkImageLayout swapchainLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
// unused stuff
//VulkanBuffer uboBuf;
//VulkanBuffer stagingVert;
//VulkanBuffer stagingInd;
//VkFence swapchainFence;
uint32_t numFramebuffers;
int GetReconstructionFrameIndex(int frame_idx){ return frame_idx & 3;}
#if 0
// todo: external threaded reconstruction source interface
int ReadReconstructionFrame()
{
if(!dec)
dec = xpack_alloc_decompressor();
uint32_t length = 0;
ReconsructionSourceImage &image = gReconstruction.sources[++gReconstruction.frame_idx & 3];
fread(&length, sizeof(int), 1, gReconstruction.reconstructionStream);
#if 1
#if 1
static char buffer[((1024 + 3) >> 2) * ((540 + 3) >> 2)*8+16];
fread(buffer, 1, length, gReconstruction.reconstructionStream);
//LZ4_decompress_safe(buffer,(char*)gReconstruction.stagingBuffer.mapped, length, ((1024 + 3) >> 2) * ((540 + 3) >> 2)*8);
size_t length2;
xpack_decompress(dec, buffer, length, gReconstruction.stagingBuffer.mapped, ((1024 + 3) >> 2) * ((540 + 3) >> 2)*8+16, &length2);
#else
fread(gReconstruction.stagingBuffer.mapped, 1, length, gReconstruction.reconstructionStream);
#endif
CallWith(SubmitInfo(
image.copyCmdBuf),
vkQueueSubmit(gReconstruction.device->defaultQueue, 1, &ref, NULL));
#else
fread(image.pMappedData, 1, length, gReconstruction.reconstructionStream);
#endif
return gReconstruction.frame_idx;
}
#endif
bool Setup(bool compute, int width, int height, size_t numSwapchainImages)
{
// todo: reconstruction frames source resolution may be lower (another way doing foveated rendering)
// this might be useful, because foveated stream high-frequency part is much detailed
// on edges and may increase bandwidth to much
int rwidth_out = width; // fov
int rheight_out = height; // fov
int rwidth_in = rwidth_out/2;
int rheight_in = rheight_out/2;
//dev.CreateAndMap(uboBuf, sizeof(UBO));
reconstruction.Setup(context, dev, VK_FORMAT_BC1_RGB_UNORM_BLOCK, rwidth_in, rheight_in);
for(int i = 0; i < numSwapchainImages; i++)
{
swapchainFbs[i].Create(dev.device);
if(!compute)
{
swapchainFbs[i].CreateDepthAttachment(dev, VK_FORMAT_D32_SFLOAT, width, height);
swapchainFbs[i].Init(swapchainImages[i], VK_FORMAT_B8G8R8A8_UNORM, width, height);
}
else
{
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO},
$(image) = swapchainImages[i], $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
$(format) = VK_FORMAT_B8G8R8A8_UNORM,
$(subresourceRange) = SubresourceRange()),
vkCreateImageView(dev.device, &ref, NULL, &swapchainFbs[i].color_view));
}
}
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
VkSemaphoreCreateInfo semaphoreCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VK_CHECK_RESULT(vkCreateSemaphore(dev.device, &semaphoreCreateInfo, nullptr, &reconstructionSemaphore[i]));
CallWith(Image2dInfo(VK_IMAGE_USAGE_STORAGE_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_FORMAT_B8G8R8A8_UNORM, rwidth_out, rheight_out),
vkCreateImage(dev.device, &ref,NULL, &reconstructionImages[i]));
VkMemoryRequirements mem_reqs;
vkGetImageMemoryRequirements(dev.device, reconstructionImages[i], &mem_reqs);
VkMemoryAllocateInfo mem_alloc = AllocateInfo(mem_reqs.size);
if (!dev.GetMemoryType(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
&mem_alloc.memoryTypeIndex))
printf("Could not find memory type.\n");
VK_CHECK_RESULT(vkAllocateMemory(dev.device, &mem_alloc, NULL, &reconstructionImagesMem[i]));
VK_CHECK_RESULT(vkBindImageMemory(dev.device, reconstructionImages[i], reconstructionImagesMem[i], 0));
}
if(compute)
{
computePipeline.Init(dev.device, swapchainFbs[0].render_pass, &decoder.ycbcr_sampler,
numSwapchainImages, MAX_DECODER_FRAMES, RECONSTRUCTION_TARGET_FRAMES,
rwidth_in, rheight_in, width, height);
for(int i = 0; i < numSwapchainImages; i++)
{
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
{
int ci = MAX_DECODER_FRAMES * i + j;
//swapchainDescriptorSets[ci] = computePipeline.AllocateSingleDescriptorSet();
// todo: reconstruction sources are incorrect here!
//computePipeline.UpdateDescriptors(swapchainDescriptorSets[ci], decoder.images[j].image_view, decoder.ycbcr_sampler, reconstruction.sources[i].image_view, swapchainFbs[i].color_view, reconstruction.sources[i].image_sampler);
}
}
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO},
$(image) = reconstructionImages[i], $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
$(format) = VK_FORMAT_B8G8R8A8_UNORM,
$(subresourceRange) = SubresourceRange()),
vkCreateImageView(dev.device, &ref, NULL, &reconstructionFbs[i].color_view));
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
{
int ci = MAX_DECODER_FRAMES * i + j;
reconstructionDescriptorSets[ci] = computePipeline.AllocateSingleDescriptorSet();
computePipeline.UpdateDescriptors(reconstructionDescriptorSets[ci], decoder.images[j].image_view, decoder.ycbcr_sampler, reconstruction.sources[i].image_view, reconstructionFbs[i].color_view, reconstruction.sources[i].image_sampler);
}
}
}
else
{
graphicsPipeline.Init(dev.device, swapchainFbs[0].render_pass, &decoder.ycbcr_sampler, MAX_DECODER_FRAMES);
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
{
decodeDescriptorSet[j] = graphicsPipeline.AllocateSingleDescriptorSet();
graphicsPipeline.UpdateDescriptors(decodeDescriptorSet[j], decoder.images[j].image_view, decoder.ycbcr_sampler);
}
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
reconstructionFbs[i].Create(dev.device);
reconstructionFbs[i].CreateDepthAttachment(dev,VK_FORMAT_D32_SFLOAT, rwidth_out, rheight_out);
reconstructionFbs[i].Init(reconstructionImages[i],VK_FORMAT_B8G8R8A8_UNORM, rwidth_out, rheight_out);
}
}
for(int i = 0; i < numSwapchainImages; i++)
{
#if 0
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
{
int ci = MAX_DECODER_FRAMES * i + j;
commandBuffers[ci] = dev.CreateCommandBuffer();
//VulkanTexture::SetImageLayout(commandBuffers[i], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
//vkCmdClearColorImage(commandBuffers[i], swapchainImages[i],VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &color, 1, &range );
if(!compute)
{
swapchainFbs[i].BeginRenderPass(commandBuffers[ci]);
swapchainFbs[i].SetViewportAndScissor(commandBuffers[ci]);
vkCmdBindDescriptorSets(commandBuffers[ci],VK_PIPELINE_BIND_POINT_GRAPHICS,graphicsPipeline.pipelineLayout, 0, 1, &decodeDescriptorSet[j], 0, nullptr);
vkCmdBindPipeline(commandBuffers[ci], VK_PIPELINE_BIND_POINT_GRAPHICS,graphicsPipeline.pipeline);
// VkDeviceSize offset = 0;
//vkCmdBindVertexBuffers(commandBuffers[i],0, 1, &stagingVert.buffer, &offset);
//vkCmdBindIndexBuffer(commandBuffers[i], stagingInd.buffer, 0, VK_INDEX_TYPE_UINT32);
//vkCmdDrawIndexed(commandBuffers[i], 3, 1, 0, 0, 0);
vkCmdDraw(commandBuffers[ci], 4, 1, 0, 0);
vkCmdEndRenderPass(commandBuffers[ci]);
if(swapchainLayout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
VulkanTexture::SetImageLayout(commandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, swapchainLayout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
}
else
{
const int WIDTH = rwidth_in;
const int HEIGHT = rheight_in;
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
VulkanTexture::SetImageLayout(commandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdBindPipeline(commandBuffers[ci], VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
vkCmdBindDescriptorSets(commandBuffers[ci], VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout, 0, 1, &swapchainDescriptorSets[ci], 0, NULL);
vkCmdDispatch(commandBuffers[ci], (uint32_t)ceil(WIDTH / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT / float(WORKGROUP_SIZE)), 1);
if(swapchainLayout != VK_IMAGE_LAYOUT_GENERAL)
VulkanTexture::SetImageLayout(commandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_GENERAL, swapchainLayout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
}
vkEndCommandBuffer(commandBuffers[ci]);
}
#endif
//VK_CHECK_RESULT(CallWith(FenceInfo(VK_FENCE_CREATE_SIGNALED_BIT),
// vkCreateFence(dev.device, &ref, NULL, &chainFences[i])));
}
for(int i = 0; i < numSwapchainImages; i++)
{
for(int j = 0; j < RECONSTRUCTION_TARGET_FRAMES; j++)
{
int ci = RECONSTRUCTION_TARGET_FRAMES*i+j;
presentCommandBuffers[ci] = dev.CreateCommandBuffer();
//VulkanTexture::SetImageLayout(presentCommandBuffers[ci], reconstructionImages[j], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(presentCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VkImageCopy regions = $M(VkImageCopy{}, $(srcSubresource) = SubresourceLayers(), $(dstSubresource) = SubresourceLayers(), $(extent) = VkExtent3D{(unsigned int)rwidth_out, (unsigned int)rheight_out, 1});
vkCmdCopyImage(presentCommandBuffers[ci], reconstructionImages[j],VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, swapchainImages[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &regions);
if(swapchainLayout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
VulkanTexture::SetImageLayout(presentCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, swapchainLayout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkEndCommandBuffer(presentCommandBuffers[ci]);
}
}
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
{
int ci = RECONSTRUCTION_TARGET_FRAMES*i+j;
reconstructionCommandBuffers[ci] = dev.CreateCommandBuffer();
if(!compute)
{
VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], reconstructionImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
reconstructionFbs[i].BeginRenderPass(reconstructionCommandBuffers[ci]);
reconstructionFbs[i].SetViewportAndScissor(reconstructionCommandBuffers[ci]);
vkCmdBindDescriptorSets(reconstructionCommandBuffers[ci],VK_PIPELINE_BIND_POINT_GRAPHICS,graphicsPipeline.pipelineLayout, 0, 1, &decodeDescriptorSet[j], 0, nullptr);
vkCmdBindPipeline(reconstructionCommandBuffers[ci], VK_PIPELINE_BIND_POINT_GRAPHICS,graphicsPipeline.pipeline);
// VkDeviceSize offset = 0;
//vkCmdBindVertexBuffers(commandBuffers[i],0, 1, &stagingVert.buffer, &offset);
//vkCmdBindIndexBuffer(commandBuffers[i], stagingInd.buffer, 0, VK_INDEX_TYPE_UINT32);
//vkCmdDrawIndexed(commandBuffers[i], 3, 1, 0, 0, 0);
vkCmdDraw(reconstructionCommandBuffers[ci], 4, 1, 0, 0);
vkCmdEndRenderPass(reconstructionCommandBuffers[ci]);
VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], reconstructionImages[i], VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
}
else
{
const int WIDTH = rwidth_in;
const int HEIGHT = rheight_in;
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
//VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], reconstructionImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdBindPipeline(reconstructionCommandBuffers[ci], VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
vkCmdBindDescriptorSets(reconstructionCommandBuffers[ci], VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout, 0, 1, &reconstructionDescriptorSets[ci], 0, NULL);
vkCmdDispatch(reconstructionCommandBuffers[ci], (uint32_t)ceil(WIDTH / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT / float(WORKGROUP_SIZE)), 1);
VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], reconstructionImages[i], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
}
vkEndCommandBuffer(reconstructionCommandBuffers[ci]);
}
}
numFramebuffers = numSwapchainImages;
return true;
}
void SubmitFrame(uint32_t frame_idx, uint32_t idx, uint32_t decoder_idx, VkFence fence, VkSemaphore renderSemaphore, VkSemaphore presentSemaphore)
{
VkPipelineStageFlags waitDstStageMask[2] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT};
VkSemaphore semaphores[2] = {reconstruction.inputSemaphore, renderSemaphore};
uint64_t wait_vals[2] = {(uint64_t)frame_idx};
if(separateReconstruction)
{
int ridx = GetReconstructionFrameIndex(frame_idx);//ReadReconstructionFrame() & 3;
CallWith($Sc(
SubmitInfo(
reconstructionCommandBuffers[ridx * MAX_DECODER_FRAMES + decoder_idx],$(waitSemaphoreCount) = 1 + !!renderSemaphore, $(signalSemaphoreCount),
$(pWaitSemaphores) = semaphores,
$(pSignalSemaphores) = &reconstruction.outputSemaphore,
$(pWaitDstStageMask) = &waitDstStageMask),
$M(VkTimelineSemaphoreSubmitInfoKHR{VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR},
$(waitSemaphoreValueCount) = 1 + !!renderSemaphore, $(signalSemaphoreValueCount),
$(pWaitSemaphoreValues) = wait_vals,
$(pSignalSemaphoreValues) &= (uint64_t)frame_idx + 4)),
vkQueueSubmit(dev.defaultQueue, 1, &ref, NULL));
CallWith($Sc(
SubmitInfo(
presentCommandBuffers[idx * RECONSTRUCTION_TARGET_FRAMES + ridx],$(waitSemaphoreCount), $(signalSemaphoreCount) = !!presentSemaphore,
$(pWaitSemaphores) = &reconstruction.outputSemaphore,
$(pSignalSemaphores) = &presentSemaphore,
$(pWaitDstStageMask) = &waitDstStageMask),
$M(VkTimelineSemaphoreSubmitInfoKHR{VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR}, $(waitSemaphoreValueCount),
$(pWaitSemaphoreValues) &=(uint64_t)frame_idx + 4)),
vkQueueSubmit(dev.defaultQueue, 1, &ref, fence));
//ridx = (ridx + 1) & 3;
}
else
{
CallWith(
SubmitInfo(
commandBuffers[idx*MAX_DECODER_FRAMES + decoder_idx],$(waitSemaphoreCount), $(signalSemaphoreCount),
$(pWaitSemaphores) = &renderSemaphore,
$(pSignalSemaphores) = &presentSemaphore,
$(pWaitDstStageMask) = &waitDstStageMask),
vkQueueSubmit(dev.defaultQueue, 1, &ref, fence));
}
}
void Destroy()
{
for(int i = 0; i < numFramebuffers; i++)
swapchainFbs[i].Destroy();
// todo
//gReconstruction.Destroy();
}
};
#endif // RECONSTRUCTOR_H