Specialize compute pipeline resolution, split different resolution constants preparing for real reconstruction pipeline

This commit is contained in:
mittorn 2024-10-29 22:32:50 +03:00
parent 13ff80a9d0
commit 7ec9f972ac
3 changed files with 58 additions and 68 deletions

View file

@ -1,16 +1,18 @@
#version 450
#extension GL_ARB_separate_shader_objects : enable
#define WIDTH 1920
#define HEIGHT 1080
#define WORKGROUP_SIZE 32
layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1 ) in;
layout (binding = 0) uniform sampler2D ycbcrSampler;
layout (binding = 1, rgba8) uniform image2D resultImage;
layout (constant_id = 0) const float width = 1920;
layout (constant_id = 1) const float height = 1080;
const vec2 in_res = vec2(width,height);
void main()
{
vec2 uv = vec2(gl_GlobalInvocationID.xy)/vec2(WIDTH,HEIGHT);
vec2 uv = vec2(gl_GlobalInvocationID.xy)/in_res;
vec3 yuv = texture(ycbcrSampler,uv).xyz;
float y = yuv.y;
float u = yuv.z;// - 0.5;

View file

@ -21,9 +21,6 @@ extern "C" {
#include <GLFW/glfw3.h>
#define GLFW_EXPOSE_NATIVE_X11
#include <GLFW/glfw3native.h>
// todo: REMOVE THIS. Why "simpliest triangle" demos even use this????
// It does not "HELP" drawing 2D triangle!!! It is not useful in engines using own matrix transform code!
//#include <glm/gtc/matrix_transform.hpp>
#define MAX_SWAPCHAIN_IMAGES 8
// workaround for ffmpeg stable api nonsence with incompatible pointers between versions
struct PointerWrap
@ -57,10 +54,9 @@ struct DecoderImage
};
void CreateSoftwareImage(VkInstance inst, VulkanDevice &dev, DecoderImage &image, const VkSamplerYcbcrConversionInfo &ycbcr_info, VkFormat format)
void CreateSoftwareImage(VkInstance inst, VulkanDevice &dev, DecoderImage &image, const VkSamplerYcbcrConversionInfo &ycbcr_info, VkFormat format, unsigned int width, unsigned int height)
{
CallWith(Image2dInfo(VK_IMAGE_USAGE_SAMPLED_BIT, format, 1920, 1080,
CallWith(Image2dInfo(VK_IMAGE_USAGE_SAMPLED_BIT, format, width, height,
$(flags) = VK_IMAGE_CREATE_DISJOINT_BIT,
$(tiling) = VK_IMAGE_TILING_LINEAR,
$(initialLayout) = VK_IMAGE_LAYOUT_PREINITIALIZED ),
@ -219,7 +215,7 @@ static void *DecoderThread(void*)
return NULL;
}
template <bool p010>
static void SetupFFMpeg(VkInstance inst, VulkanDevice &dev)
static void SetupFFMpeg(VkInstance inst, VulkanDevice &dev, unsigned int width, unsigned int height)
{
PFN_vkCreateSamplerYcbcrConversion pvkCreateSamplerYcbcrConversion = (PFN_vkCreateSamplerYcbcrConversion)vkGetInstanceProcAddr(inst, "vkCreateSamplerYcbcrConversionKHR");
@ -251,18 +247,28 @@ static void SetupFFMpeg(VkInstance inst, VulkanDevice &dev)
vkCreateSampler(dev.device, &ref, NULL, &gFF.ycbcr_sampler);
);
for(int i = 0; i < MAX_DECODER_FRAMES; i++)
CreateSoftwareImage(inst, dev, gFF.images[i], ycbcr_info, format);
CreateSoftwareImage(inst, dev, gFF.images[i], ycbcr_info, format, width, height);
pthread_mutex_init(&gFF.mutex, NULL);
pthread_cond_init(&gFF.cond, NULL);
pthread_create(&gFF.thread, NULL, &DecoderThread<p010>, NULL);
}
// decoder outputs frames mapped to random VkImage from decoder pool
// reconstruction source frames match decoder frame index (but not image index)
#define RECONSTRUCTION_SOURCE_FRAMES 4
// if separate reconstruction disabled, each decoder/reconstruction source combo generates command buffer for each swapchain image
// separate reconstruction:
// reconstruction target frame index always match reconstruction source frames
#define RECONSTRUCTION_TARGET_FRAMES 4
// each reconstruction target builds command buffer for each decoder image
// foveation pipeline builds command buffer for each swapchain image/reprojection targer pair
// reconstruction
// sample one decoder pixel and one reconstruction pixel
// map decoder pixels to 0-511 (or 0-2) and reconstruction to -256-255 (-1-1) (divide by two?)
// graphics
// depending on even/noteven coordinates, add or substract pixels and write to output
struct GraphicsApplicationPipeline: BaseVulkanPipeline
{
void Init(VkDevice dev, VkRenderPass renderPass, VkSampler *pImmutableSamplers, int count)
{
@ -301,11 +307,11 @@ struct GraphicsApplicationPipeline: BaseVulkanPipeline
);
}
};
// compute
// write 4 output pixels for each source pair
struct ComputeApplicationPipeline: BaseVulkanPipeline
{
void Init(VkDevice dev, VkRenderPass renderPass, VkSampler *pImmutableSamplers, int count1, int count2, int count3)
void Init(VkDevice dev, VkRenderPass renderPass, VkSampler *pImmutableSamplers, int count1, int count2, int count3, unsigned int width, unsigned int height, unsigned int fwidth, unsigned int fheight)
{
device = dev;
@ -316,8 +322,13 @@ struct ComputeApplicationPipeline: BaseVulkanPipeline
CreatePool((count1 + count3) * count2,
BasicPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, count2 * count3),
BasicPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, count3 * count2 * count1));
// todo: wrapper for specialization arrays...
float specData[2] = {(float)width,(float)height};
VkSpecializationMapEntry specs[2] = {{0, 0, sizeof(float)},1,sizeof(float),sizeof(float)};
VkSpecializationInfo sinfo = {2, specs, sizeof(specData), specData };
VkShaderModule shader;
CreateComputePipeline(ShaderFromFile(shader, "quad.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT));
// todo: combined reconstruction/foveation shader? (maybe ineffective for compute pipeline)
CreateComputePipeline(ShaderFromFile(shader, "quad.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT, &sinfo));
// todo: should not we destroy shader internally?
vkDestroyShaderModule(device, shader, NULL);
}
@ -329,6 +340,8 @@ struct ComputeApplicationPipeline: BaseVulkanPipeline
}
};
// todo: how should be compute defoveation done (input/output resolution?)
struct GraphicsApplication
{
VulkanContext context;
@ -357,7 +370,6 @@ struct GraphicsApplication
struct UBO
{
//glm::mat4 transformationMatrix;
};
GraphicsApplicationPipeline graphicsPipeline;
@ -471,28 +483,6 @@ struct GraphicsApplication
vkDestroySurfaceKHR(context.instance,surface,NULL);
}
// todo: NUKE THIS!!!
void updateUniformData() {
static unsigned int frameNum;
// Rotate based on time
long long millis = frameNum++;
#if 0
float angle = (millis % 400) / 400.0f * glm::radians(360.f);
glm::mat4 modelMatrix = glm::identity<glm::mat4>();
modelMatrix = glm::rotate(modelMatrix, angle, glm::vec3(0, 0, 1));
//((UBO*)uboBuf.mapped)->transformationMatrix = modelMatrix;
modelMatrix = glm::translate(modelMatrix, glm::vec3(0.5f / 3.0f, -0.5f / 3.0f, 0.0f));
// Set up view
auto viewMatrix = glm::lookAt(glm::vec3(1, 1, 1), glm::vec3(0, 0, 0), glm::vec3(0, 0, -1));
// Set up projection
auto projMatrix = glm::perspective(glm::radians(70.f), 800.0f / 600.0f, 0.1f, 10.0f);
((UBO*)uboBuf.mapped)->transformationMatrix = projMatrix * viewMatrix * modelMatrix;
#endif
}
void waitFence(int chidx)
{
VK_CHECK_RESULT(vkWaitForFences(dev.device, 1, &chainFences[chidx], VK_TRUE, 100000000000));
@ -505,24 +495,32 @@ struct GraphicsApplication
#else
#define ENABLE_VALIDATION_LAYERS 0
#endif
bool compute = false;
bool separateReconstruction = false;
bool compute = true;
bool separateReconstruction = true;
context.Create("streamingengine", "vulkan-playground-server", ENABLE_VALIDATION_LAYERS);
dev.Create(context.FindPhysicalDevice(), compute?VK_QUEUE_COMPUTE_BIT : VK_QUEUE_GRAPHICS_BIT);
dev.CreateDevice(context);
int width = 1920;
int height = 1080;
// todo: reconstruction frames source resolution may be lower (another way doing foveated rendering)
// this might be useful, because foveated stream high-frequency part is much detailed
// on edges and may increase bandwidth to much
int rwidth_out = 1920; // fov
int rheight_out = 1080; // fov
int rwidth_in = 1920; // /2
int rheight_in = 1080; // /2
CreateWindow("demo", width, height, false);
CreateSwapchain(width,height, !compute);
printf("%d swapchain images\n", numSwapchainImages);
dev.CreateAndMap(uboBuf, sizeof(UBO));
SetupFFMpeg<false>(context.instance, dev);
SetupFFMpeg<false>(context.instance, dev, rwidth_in, rheight_in);
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
VkSemaphoreCreateInfo semaphoreCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VK_CHECK_RESULT(vkCreateSemaphore(dev.device, &semaphoreCreateInfo, nullptr, &reconstructionSemaphore[i]));
CallWith(Image2dInfo(VK_IMAGE_USAGE_STORAGE_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_FORMAT_B8G8R8A8_UNORM, width, height),
VK_FORMAT_B8G8R8A8_UNORM, rwidth_out, rheight_out),
vkCreateImage(dev.device, &ref,NULL, &reconstructionImages[i]));
VkMemoryRequirements mem_reqs;
@ -539,7 +537,9 @@ struct GraphicsApplication
}
if(compute)
{
computePipeline.Init(dev.device, swapchainFbs[0].render_pass, &gFF.ycbcr_sampler, numSwapchainImages, MAX_DECODER_FRAMES, RECONSTRUCTION_TARGET_FRAMES);
computePipeline.Init(dev.device, swapchainFbs[0].render_pass, &gFF.ycbcr_sampler,
numSwapchainImages, MAX_DECODER_FRAMES, RECONSTRUCTION_TARGET_FRAMES,
rwidth_in, rheight_in, width, height);
for(int i = 0; i < numSwapchainImages; i++)
{
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
@ -576,23 +576,11 @@ struct GraphicsApplication
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
reconstructionFbs[i].Create(dev.device);
reconstructionFbs[i].CreateDepthAttachment(dev,VK_FORMAT_D32_SFLOAT, width, height);
reconstructionFbs[i].Init(reconstructionImages[i],VK_FORMAT_B8G8R8A8_UNORM, width, height);
reconstructionFbs[i].CreateDepthAttachment(dev,VK_FORMAT_D32_SFLOAT, rwidth_out, rheight_out);
reconstructionFbs[i].Init(reconstructionImages[i],VK_FORMAT_B8G8R8A8_UNORM, rwidth_out, rheight_out);
}
}
/*updateUniformData();
Vertex vertices[] = {
{ { -0.5f, -0.5f, 0.0f }, { 1.0f, 0.0f, 0.0f } },
{ { -0.5f, 0.5f, 0.0f }, { 0.0f, 1.0f, 0.0f } },
{ { 0.5f, 0.5f, 0.0f }, { 0.0f, 0.0f, 1.0f } }
};
uint32_t indices[] = { 0, 1, 2 };
dev.CreateBuffer(stagingVert, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, sizeof(vertices), vertices);
dev.CreateBuffer(stagingInd, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, sizeof(indices), indices);
*/
for(int i = 0; i < numSwapchainImages; i++)
{
for(int j = 0; j < MAX_DECODER_FRAMES; j++)
@ -618,8 +606,8 @@ struct GraphicsApplication
}
else
{
const int WIDTH = 1920; // Size of rendered mandelbrot set.
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
const int WIDTH = rwidth_in;
const int HEIGHT = rheight_in;
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
VulkanTexture::SetImageLayout(commandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkCmdBindPipeline(commandBuffers[ci], VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
@ -640,7 +628,7 @@ struct GraphicsApplication
presentCommandBuffers[ci] = dev.CreateCommandBuffer();
//VulkanTexture::SetImageLayout(presentCommandBuffers[ci], reconstructionImages[j], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(presentCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VkImageCopy regions = $M(VkImageCopy{}, $(srcSubresource) = SubresourceLayers(), $(dstSubresource) = SubresourceLayers(), $(extent) = VkExtent3D{1920, 1080, 1});
VkImageCopy regions = $M(VkImageCopy{}, $(srcSubresource) = SubresourceLayers(), $(dstSubresource) = SubresourceLayers(), $(extent) = VkExtent3D{(unsigned int)rwidth_out, (unsigned int)rheight_out, 1});
vkCmdCopyImage(presentCommandBuffers[ci], reconstructionImages[j],VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, swapchainImages[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &regions);
VulkanTexture::SetImageLayout(presentCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
vkEndCommandBuffer(presentCommandBuffers[ci]);
@ -671,8 +659,8 @@ struct GraphicsApplication
}
else
{
const int WIDTH = 1920; // Size of rendered mandelbrot set.
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
const int WIDTH = rwidth_in;
const int HEIGHT = rheight_in;
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
//VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], swapchainImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(reconstructionCommandBuffers[ci], reconstructionImages[i], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
@ -690,7 +678,6 @@ struct GraphicsApplication
int ridx = 0;
while(true)
{
updateUniformData();
sem_idx = (sem_idx + 1) % numSwapchainImages;
waitFence(sem_idx);
idx = AcquireImage(sem_idx);

View file

@ -111,7 +111,7 @@ struct BaseVulkanPipeline
return ret;
}
VkPipelineShaderStageCreateInfo ShaderFromFile(VkShaderModule &outShaderModule, const char *filename, VkShaderStageFlagBits stage, const char *entrypoint = "main")
VkPipelineShaderStageCreateInfo ShaderFromFile(VkShaderModule &outShaderModule, const char *filename, VkShaderStageFlagBits stage, const VkSpecializationInfo *sinfo = NULL, const char *entrypoint = "main")
{
// todo: rewrite this in safer way
FILE* fp = fopen(filename, "rb");
@ -145,7 +145,8 @@ struct BaseVulkanPipeline
return $M(VkPipelineShaderStageCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO},
$(stage) = stage,
$(module) = outShaderModule,
$(pName) = entrypoint);
$(pName) = entrypoint,
$(pSpecializationInfo) = sinfo);
}
static VkPipelineInputAssemblyStateCreateInfo AssemblyTopology(VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, bool restart = false, VkPipelineInputAssemblyStateCreateFlags flags = 0)
{