304 lines
12 KiB
C++
304 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2017 Eric Arnebäck
|
|
* Copyright (c) 2024 mittorn
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#include "vulkan_pipeline_utl.h"
|
|
#include "vulkan_utl.h"
|
|
#include "vulkan_texture_utl.h"
|
|
#include "vulkan_contructors.h"
|
|
#include <string.h>
|
|
|
|
#include <math.h>
|
|
#include <sys/mman.h>
|
|
#include <drm_fourcc.h>
|
|
#include <fcntl.h>
|
|
#include <sys/ioctl.h>
|
|
|
|
//#include "vaapi-recorder.h"
|
|
#include "vaapi_encoder_h264.h"
|
|
#include "vaapi_encoder_hevc.h"
|
|
#define CHAIN_SIZE 4
|
|
|
|
struct ComputeApplicationPipeline: BaseVulkanPipeline
|
|
{
|
|
|
|
void Init(VkDevice dev, uint32_t width, uint32_t height)
|
|
{
|
|
|
|
device = dev;
|
|
CreateDescriptorSetLayout(
|
|
BasicBinding(0, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
|
|
BasicBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
|
|
BasicBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,1,VK_SHADER_STAGE_COMPUTE_BIT),
|
|
BasicBinding(3, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,1,VK_SHADER_STAGE_COMPUTE_BIT)
|
|
);
|
|
CreatePool(CHAIN_SIZE,
|
|
BasicPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,CHAIN_SIZE*3),
|
|
BasicPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, CHAIN_SIZE*1));
|
|
float specData[2] = {(float)width,(float)height};
|
|
VkSpecializationMapEntry specs[2] = {{0, 0, sizeof(float)},1,sizeof(float),sizeof(float)};
|
|
VkSpecializationInfo sinfo = {2, specs, sizeof(specData), specData };
|
|
VkShaderModule shader;
|
|
CreateComputePipeline(ShaderFromFile(shader, "image-decomposite.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT, &sinfo));
|
|
// todo: should not we destroy shader internally?
|
|
vkDestroyShaderModule(device, shader, NULL);
|
|
}
|
|
void UpdateDescriptors(VkDescriptorSet dstSet, VkImageView imageView0, VkImageView imageView1, VkImageView imageView2, const VkDescriptorBufferInfo &buffer)
|
|
{
|
|
WriteDescriptors(
|
|
ImageWrite(dstSet, 0, ImageDescriptor(imageView0, VK_IMAGE_LAYOUT_GENERAL)),
|
|
ImageWrite(dstSet, 1, ImageDescriptor(imageView1, VK_IMAGE_LAYOUT_GENERAL)),
|
|
ImageWrite(dstSet, 2, ImageDescriptor(imageView2, VK_IMAGE_LAYOUT_GENERAL)),
|
|
BufferWrite(dstSet, 3, buffer));
|
|
}
|
|
};
|
|
|
|
const int WIDTH = 1920; // Size of rendered mandelbrot set.
|
|
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
|
|
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
|
|
|
|
struct ComputeApplication {
|
|
// The pixels of the rendered mandelbrot set are in this format:
|
|
struct Pixel {
|
|
float r, g, b, a;
|
|
};
|
|
|
|
VulkanContext context;
|
|
VulkanDevice dev;
|
|
ComputeApplicationPipeline computePipeline;
|
|
|
|
struct UBO{
|
|
float frameNum;
|
|
};
|
|
struct FrameContext
|
|
{
|
|
VkDescriptorSet descriptorSet;
|
|
VulkanBuffer uboBuf;
|
|
VulkanTexture texture0, texture1, texture2;
|
|
VkCommandBuffer commandBuffer;
|
|
VkFence fence;
|
|
bool running = false;
|
|
uint8_t *pReconstructionData;
|
|
|
|
} chain[CHAIN_SIZE];
|
|
|
|
void CreateReconstructionImage(VulkanTexture &image, VkFormat format, unsigned int width, unsigned int height, uint8_t **ppReconstructionData)
|
|
{
|
|
CallWith(Image2dInfo(VK_IMAGE_USAGE_STORAGE_BIT, format, width, height,
|
|
$(tiling) = VK_IMAGE_TILING_LINEAR,
|
|
$(initialLayout) = VK_IMAGE_LAYOUT_UNDEFINED ),
|
|
vkCreateImage(dev.device, &ref,NULL, &image.image));
|
|
VkMemoryRequirements mem_reqs;
|
|
vkGetImageMemoryRequirements(dev.device, image.image, &mem_reqs);
|
|
VkMemoryAllocateInfo info = AllocateInfo(mem_reqs.size);
|
|
dev.GetMemoryType(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &info.memoryTypeIndex);
|
|
vkAllocateMemory(dev.device, &info, NULL, &image.device_memory);
|
|
vkMapMemory(dev.device, image.device_memory, 0, mem_reqs.size, 0, (void**)ppReconstructionData);
|
|
vkBindImageMemory(dev.device, image.image, image.device_memory, 0);
|
|
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO},
|
|
$(image) = image.image, $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
|
|
$(format) = format,
|
|
$(subresourceRange) = SubresourceRange()),
|
|
vkCreateImageView(dev.device, &ref, NULL, &image.view));
|
|
image.owning_device = dev.device;
|
|
}
|
|
|
|
void ImportVaapiImage(VulkanTexture &texture0, VulkanTexture &texture1, unsigned int width, unsigned int height, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2, bool p010)
|
|
{
|
|
$Sc layout = SubresourceLayout(pitch1);
|
|
$Sc iinfo{Image2dInfo(
|
|
VK_IMAGE_USAGE_STORAGE_BIT,
|
|
p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM,
|
|
width, height,
|
|
$(tiling) = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT),
|
|
$Vk<VkExternalMemoryImageCreateInfo>(
|
|
$(handleTypes) = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT),
|
|
$Vk<VkImageDrmFormatModifierExplicitCreateInfoEXT>(
|
|
$(drmFormatModifierPlaneCount), $(drmFormatModifier) = mod,
|
|
$(pPlaneLayouts) &= layout)
|
|
};
|
|
|
|
VK_CHECK_RESULT(vkCreateImage(dev.device, &iinfo, NULL, &texture0.image)); // create image.
|
|
$F(iinfo,$(format) = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM,
|
|
$(extent) = VkExtent3D{ width/2, height/2, 1 });
|
|
$F(layout,$(offset) = 0, $(rowPitch) = pitch2 );
|
|
VK_CHECK_RESULT(vkCreateImage(dev.device, &iinfo, NULL, &texture1.image)); // create image.
|
|
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetImageMemoryRequirements(dev.device, texture0.image, &memoryRequirements);
|
|
|
|
VkMemoryFdPropertiesKHR fdProps = $Vk<VkMemoryFdPropertiesKHR>();
|
|
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(context.instance, "vkGetMemoryFdPropertiesKHR");
|
|
vkGetMemoryFdProperties(dev.device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fdProps);
|
|
$Sc ainfo{
|
|
AllocateInfo(size),
|
|
$Vk<VkImportMemoryFdInfoKHR>(
|
|
$(handleType) = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
|
$(fd) = fd)
|
|
};
|
|
|
|
dev.GetMemoryType(memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0, &ainfo.memoryTypeIndex);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
VK_CHECK_RESULT(vkAllocateMemory(dev.device, &ainfo, NULL, &texture0.device_memory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindImageMemory(dev.device, texture0.image, texture0.device_memory, 0));
|
|
VK_CHECK_RESULT(vkBindImageMemory(dev.device, texture1.image, texture0.device_memory, offset));
|
|
$F(texture0,
|
|
$(width) = width, $(height) = height,
|
|
$(mip_levels), $(layer_count));
|
|
$F(texture1,
|
|
$(width) = width/2, $(height) = height/2,
|
|
$(mip_levels), $(layer_count));
|
|
texture0.CreateImageView(dev.device, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
|
|
texture1.CreateImageView(dev.device, p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM);
|
|
}
|
|
|
|
void createDescriptorSet(int chidx) {
|
|
chain[chidx].descriptorSet = computePipeline.AllocateSingleDescriptorSet();
|
|
computePipeline.UpdateDescriptors(chain[chidx].descriptorSet, chain[chidx].texture0.view, chain[chidx].texture1.view, chain[chidx].texture2.view, chain[chidx].uboBuf.descriptor);
|
|
}
|
|
|
|
void prepareImage(int chidx)
|
|
{
|
|
VkCommandBuffer commandBuffer = dev.CreateCommandBuffer();
|
|
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture0.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
|
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture1.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
|
VulkanTexture::SetImageLayout(commandBuffer,chain[chidx].texture2.image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
|
|
dev.FlushCommandBuffer(commandBuffer, dev.defautQueue);
|
|
}
|
|
|
|
void createCommandBuffer(int chidx) {
|
|
chain[chidx].commandBuffer = dev.CreateCommandBuffer();
|
|
/*
|
|
We need to bind a pipeline, AND a descriptor set before we dispatch.
|
|
|
|
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
|
|
*/
|
|
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
|
|
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
|
|
vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/4 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/4 / float(WORKGROUP_SIZE)), 1);
|
|
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
|
|
$Sc fenceCreateInfo = FenceInfo();
|
|
VK_CHECK_RESULT(vkCreateFence(dev.device, &fenceCreateInfo, NULL, &chain[chidx].fence));
|
|
|
|
}
|
|
void waitFence(int chidx)
|
|
{
|
|
if(chain[chidx].running)
|
|
VK_CHECK_RESULT(vkWaitForFences(dev.device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
|
|
chain[chidx].running = false;
|
|
}
|
|
|
|
void runCommandBuffer(int chidx) {
|
|
$Sc submitInfo = SubmitInfo(chain[chidx].commandBuffer);
|
|
waitFence(chidx);
|
|
vkResetFences(dev.device, 1, &chain[chidx].fence);
|
|
VK_CHECK_RESULT(vkQueueSubmit(dev.defautQueue, 1, &submitInfo, chain[chidx].fence));
|
|
chain[chidx].running = true;
|
|
}
|
|
template <typename Codec>
|
|
void run(bool p010, const char *filename) {
|
|
#ifndef NDEBUG
|
|
#define ENABLE_VALIDATION_LAYERS 1
|
|
#else
|
|
#define ENABLE_VALIDATION_LAYERS 0
|
|
#endif
|
|
context.Create("streamingengine", "vulkan-playground", ENABLE_VALIDATION_LAYERS);
|
|
dev.Create(context.FindPhysicalDevice(), VK_QUEUE_COMPUTE_BIT);
|
|
dev.CreateDevice(context);
|
|
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
|
|
uint64_t mod;
|
|
uint32_t size, offset, pitch1, pitch2;
|
|
int fd[CHAIN_SIZE];
|
|
uint64_t modifiers[32];
|
|
int count = dev.GetAvailiableModifiersList(modifiers, 32, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
|
|
Codec enc = {};
|
|
|
|
enc.Setup(drm_fd, WIDTH/2, HEIGHT/2, filename, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
dev.CreateAndMap(chain[i].uboBuf, sizeof(UBO));
|
|
ImportVaapiImage(chain[i].texture0, chain[i].texture1, WIDTH/2, HEIGHT/2, fd[i], mod, size, offset, pitch1, pitch2, p010);
|
|
CreateReconstructionImage(chain[i].texture2, VK_FORMAT_R4G4B4A4_UNORM_PACK16, WIDTH/2, HEIGHT/2, &chain[i].pReconstructionData);
|
|
}
|
|
|
|
computePipeline.Init(dev.device, WIDTH, HEIGHT);
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
prepareImage(i);
|
|
createDescriptorSet(i);
|
|
createCommandBuffer(i);
|
|
}
|
|
int frameNum = 0;
|
|
FILE *reconstructionStream = fopen("reconstruction.bin", "wb");
|
|
|
|
while(frameNum++ < 1000)
|
|
{
|
|
int chidx = frameNum & 3;
|
|
// Finally, run the recorded command buffer.
|
|
runCommandBuffer(chidx);
|
|
#ifndef SKIP_FENCE_SYNC
|
|
waitFence(chidx);
|
|
#endif
|
|
uint32_t imageSize = (WIDTH/2) * (HEIGHT/2) * 2;
|
|
fwrite(&imageSize, sizeof(imageSize), 1, reconstructionStream);
|
|
fwrite(chain[chidx].pReconstructionData, 1, imageSize, reconstructionStream);
|
|
//recorder_frame4(r, chidx);
|
|
if(frameNum == 1)
|
|
enc.EncodeIDR(chidx);
|
|
else
|
|
enc.EncodeP(chidx);
|
|
((UBO*)chain[chidx].uboBuf.mapped)->frameNum = frameNum;
|
|
|
|
}
|
|
fclose(reconstructionStream);
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
waitFence(i);
|
|
vkDestroyFence(dev.device, chain[i].fence, NULL);
|
|
chain[i].uboBuf.Destroy();
|
|
chain[i].texture2.Destroy();
|
|
chain[i].texture1.Destroy();
|
|
chain[i].texture0.Destroy();
|
|
}
|
|
|
|
computePipeline.Destroy();
|
|
dev.Destroy();
|
|
context.Destroy();
|
|
}
|
|
};
|
|
|
|
int main(int argc, char **argv) {
|
|
ComputeApplication app;
|
|
|
|
bool hevc = argc > 1;
|
|
bool p010 = false;
|
|
if(hevc)
|
|
p010 = atoi(argv[1]);
|
|
if(hevc)
|
|
app.run<VaapiEncoderHEVC>(p010, "out.265");
|
|
else
|
|
app.run<VaapiEncoderH264>(false, "out.264");
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|