vulkan-playground/vkcompute2.cpp

304 lines
12 KiB
C++

/*
* Copyright (c) 2017 Eric Arnebäck
* Copyright (c) 2024 mittorn
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "vulkan_pipeline_utl.h"
#include "vulkan_utl.h"
#include "vulkan_texture_utl.h"
#include "vulkan_contructors.h"
#include <string.h>
#include <math.h>
#include <sys/mman.h>
#include <drm_fourcc.h>
#include <fcntl.h>
#include <sys/ioctl.h>
//#include "vaapi-recorder.h"
#include "vaapi_encoder_h264.h"
#include "vaapi_encoder_hevc.h"
#define CHAIN_SIZE 4
struct ComputeApplicationPipeline: BaseVulkanPipeline
{
void Init(VkDevice dev, uint32_t width, uint32_t height)
{
device = dev;
CreateDescriptorSetLayout(
BasicBinding(0, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
BasicBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
BasicBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
BasicBinding(3, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT)
);
CreatePool(CHAIN_SIZE,
BasicPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,CHAIN_SIZE*3),
BasicPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, CHAIN_SIZE*1));
float specData[2] = {(float)width,(float)height};
VkSpecializationMapEntry specs[2] = {{0, 0, sizeof(float)},1,sizeof(float),sizeof(float)};
VkSpecializationInfo sinfo = {2, specs, sizeof(specData), specData };
VkShaderModule shader;
CreateComputePipeline(ShaderFromFile(shader, "image-decomposite.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT, &sinfo));
// todo: should not we destroy shader internally?
vkDestroyShaderModule(device, shader, NULL);
}
void UpdateDescriptors(VkDescriptorSet dstSet, VkImageView imageView0, VkImageView imageView1, VkImageView imageView2, const VkDescriptorBufferInfo &buffer)
{
WriteDescriptors(
ImageWrite(dstSet, 0, ImageDescriptor(imageView0, VK_IMAGE_LAYOUT_GENERAL)),
ImageWrite(dstSet, 1, ImageDescriptor(imageView1, VK_IMAGE_LAYOUT_GENERAL)),
ImageWrite(dstSet, 2, ImageDescriptor(imageView2, VK_IMAGE_LAYOUT_GENERAL)),
BufferWrite(dstSet, 3, buffer));
}
};
const int WIDTH = 1920; // Size of rendered mandelbrot set.
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
struct ComputeApplication {
// The pixels of the rendered mandelbrot set are in this format:
struct Pixel {
float r, g, b, a;
};
VulkanContext context;
VulkanDevice dev;
ComputeApplicationPipeline computePipeline;
struct UBO{
float frameNum;
};
struct FrameContext
{
VkDescriptorSet descriptorSet;
VulkanBuffer uboBuf;
VulkanTexture texture0, texture1, texture2;
VkCommandBuffer commandBuffer;
VkFence fence;
bool running = false;
uint8_t *pReconstructionData;
} chain[CHAIN_SIZE];
void CreateReconstructionImage(VulkanTexture &image, VkFormat format, unsigned int width, unsigned int height, uint8_t **ppReconstructionData)
{
CallWith(Image2dInfo(VK_IMAGE_USAGE_STORAGE_BIT, format, width, height,
$(tiling) = VK_IMAGE_TILING_LINEAR,
$(initialLayout) = VK_IMAGE_LAYOUT_UNDEFINED ),
vkCreateImage(dev.device, &ref,NULL, &image.image));
VkMemoryRequirements mem_reqs;
vkGetImageMemoryRequirements(dev.device, image.image, &mem_reqs);
VkMemoryAllocateInfo info = AllocateInfo(mem_reqs.size);
dev.GetMemoryType(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &info.memoryTypeIndex);
vkAllocateMemory(dev.device, &info, NULL, &image.device_memory);
vkMapMemory(dev.device, image.device_memory, 0, mem_reqs.size, 0, (void**)ppReconstructionData);
vkBindImageMemory(dev.device, image.image, image.device_memory, 0);
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO},
$(image) = image.image, $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
$(format) = format,
$(subresourceRange) = SubresourceRange()),
vkCreateImageView(dev.device, &ref, NULL, &image.view));
image.owning_device = dev.device;
}
void ImportVaapiImage(VulkanTexture &texture0, VulkanTexture &texture1, unsigned int width, unsigned int height, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2, bool p010)
{
$Sc layout = SubresourceLayout(pitch1);
$Sc iinfo{Image2dInfo(
VK_IMAGE_USAGE_STORAGE_BIT,
p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM,
width, height,
$(tiling) = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT),
$Vk<VkExternalMemoryImageCreateInfo>(
$(handleTypes) = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT),
$Vk<VkImageDrmFormatModifierExplicitCreateInfoEXT>(
$(drmFormatModifierPlaneCount), $(drmFormatModifier) = mod,
$(pPlaneLayouts) &= layout)
};
VK_CHECK_RESULT(vkCreateImage(dev.device, &iinfo, NULL, &texture0.image)); // create image.
$F(iinfo,$(format) = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM,
$(extent) = VkExtent3D{ width/2, height/2, 1 });
$F(layout,$(offset) = 0, $(rowPitch) = pitch2 );
VK_CHECK_RESULT(vkCreateImage(dev.device, &iinfo, NULL, &texture1.image)); // create image.
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(dev.device, texture0.image, &memoryRequirements);
VkMemoryFdPropertiesKHR fdProps = $Vk<VkMemoryFdPropertiesKHR>();
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(context.instance, "vkGetMemoryFdPropertiesKHR");
vkGetMemoryFdProperties(dev.device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fdProps);
$Sc ainfo{
AllocateInfo(size),
$Vk<VkImportMemoryFdInfoKHR>(
$(handleType) = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
$(fd) = fd)
};
dev.GetMemoryType(memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0, &ainfo.memoryTypeIndex);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(dev.device, &ainfo, NULL, &texture0.device_memory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindImageMemory(dev.device, texture0.image, texture0.device_memory, 0));
VK_CHECK_RESULT(vkBindImageMemory(dev.device, texture1.image, texture0.device_memory, offset));
$F(texture0,
$(width) = width, $(height) = height,
$(mip_levels), $(layer_count));
$F(texture1,
$(width) = width/2, $(height) = height/2,
$(mip_levels), $(layer_count));
texture0.CreateImageView(dev.device, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
texture1.CreateImageView(dev.device, p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM);
}
void createDescriptorSet(int chidx) {
chain[chidx].descriptorSet = computePipeline.AllocateSingleDescriptorSet();
computePipeline.UpdateDescriptors(chain[chidx].descriptorSet, chain[chidx].texture0.view, chain[chidx].texture1.view, chain[chidx].texture2.view, chain[chidx].uboBuf.descriptor);
}
void prepareImage(int chidx)
{
VkCommandBuffer commandBuffer = dev.CreateCommandBuffer();
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture0.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture1.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture2.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
dev.FlushCommandBuffer(commandBuffer, dev.defautQueue);
}
void createCommandBuffer(int chidx) {
chain[chidx].commandBuffer = dev.CreateCommandBuffer();
/*
We need to bind a pipeline, AND a descriptor set before we dispatch.
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
*/
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/4 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/4 / float(WORKGROUP_SIZE)), 1);
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
$Sc fenceCreateInfo = FenceInfo();
VK_CHECK_RESULT(vkCreateFence(dev.device, &fenceCreateInfo, NULL, &chain[chidx].fence));
}
void waitFence(int chidx)
{
if(chain[chidx].running)
VK_CHECK_RESULT(vkWaitForFences(dev.device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
chain[chidx].running = false;
}
void runCommandBuffer(int chidx) {
$Sc submitInfo = SubmitInfo(chain[chidx].commandBuffer);
waitFence(chidx);
vkResetFences(dev.device, 1, &chain[chidx].fence);
VK_CHECK_RESULT(vkQueueSubmit(dev.defautQueue, 1, &submitInfo, chain[chidx].fence));
chain[chidx].running = true;
}
template <typename Codec>
void run(bool p010, const char *filename) {
#ifndef NDEBUG
#define ENABLE_VALIDATION_LAYERS 1
#else
#define ENABLE_VALIDATION_LAYERS 0
#endif
context.Create("streamingengine", "vulkan-playground", ENABLE_VALIDATION_LAYERS);
dev.Create(context.FindPhysicalDevice(), VK_QUEUE_COMPUTE_BIT);
dev.CreateDevice(context);
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
uint64_t mod;
uint32_t size, offset, pitch1, pitch2;
int fd[CHAIN_SIZE];
uint64_t modifiers[32];
int count = dev.GetAvailiableModifiersList(modifiers, 32, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
Codec enc = {};
enc.Setup(drm_fd, WIDTH/2, HEIGHT/2, filename, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
for(int i = 0; i < CHAIN_SIZE; i++)
{
dev.CreateAndMap(chain[i].uboBuf, sizeof(UBO));
ImportVaapiImage(chain[i].texture0, chain[i].texture1, WIDTH/2, HEIGHT/2, fd[i], mod, size, offset, pitch1, pitch2, p010);
CreateReconstructionImage(chain[i].texture2, VK_FORMAT_R4G4B4A4_UNORM_PACK16, WIDTH/2, HEIGHT/2, &chain[i].pReconstructionData);
}
computePipeline.Init(dev.device, WIDTH, HEIGHT);
for(int i = 0; i < CHAIN_SIZE; i++)
{
prepareImage(i);
createDescriptorSet(i);
createCommandBuffer(i);
}
int frameNum = 0;
FILE *reconstructionStream = fopen("reconstruction.bin", "wb");
while(frameNum++ < 1000)
{
int chidx = frameNum & 3;
// Finally, run the recorded command buffer.
runCommandBuffer(chidx);
#ifndef SKIP_FENCE_SYNC
waitFence(chidx);
#endif
uint32_t imageSize = (WIDTH/2) * (HEIGHT/2) * 2;
fwrite(&imageSize, sizeof(imageSize), 1, reconstructionStream);
fwrite(chain[chidx].pReconstructionData, 1, imageSize, reconstructionStream);
//recorder_frame4(r, chidx);
if(frameNum == 1)
enc.EncodeIDR(chidx);
else
enc.EncodeP(chidx);
((UBO*)chain[chidx].uboBuf.mapped)->frameNum = frameNum;
}
fclose(reconstructionStream);
for(int i = 0; i < CHAIN_SIZE; i++)
{
waitFence(i);
vkDestroyFence(dev.device, chain[i].fence, NULL);
chain[i].uboBuf.Destroy();
chain[i].texture2.Destroy();
chain[i].texture1.Destroy();
chain[i].texture0.Destroy();
}
computePipeline.Destroy();
dev.Destroy();
context.Destroy();
}
};
int main(int argc, char **argv) {
ComputeApplication app;
bool hevc = argc > 1;
bool p010 = false;
if(hevc)
p010 = atoi(argv[1]);
if(hevc)
app.run<VaapiEncoderHEVC>(p010, "out.265");
else
app.run<VaapiEncoderH264>(false, "out.264");
return EXIT_SUCCESS;
}