Use async chain of 4 frames with separate command buffers

This commit is contained in:
mittorn 2024-10-08 04:46:19 +03:00
parent 1fb2179423
commit bd44382e55
3 changed files with 360 additions and 153 deletions

View file

@ -106,6 +106,8 @@ enum NALUType {
#define ALIGN16(x) ((x+15)&~15) #define ALIGN16(x) ((x+15)&~15)
#define CHAIN_SIZE 4
struct vaapi_recorder { struct vaapi_recorder {
int drm_fd, output_fd; int drm_fd, output_fd;
int width, height; int width, height;
@ -152,7 +154,7 @@ struct vaapi_recorder {
VABufferID output_buf; VABufferID output_buf;
VASurfaceID output_sync_surf; VASurfaceID output_sync_surf;
} encoder; } encoder;
VASurfaceID inputFrames[CHAIN_SIZE];
}; };
/* /*
@ -2334,6 +2336,131 @@ err_free:
return NULL; return NULL;
} }
struct vaapi_recorder *
vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
VADRMPRIMESurfaceDescriptor drmSurface = {0};
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
//create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
VASurfaceAttrib va_attribs[5];
//unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv};
va_attrib_extbuf.pixel_format = VA_FOURCC_P010;
va_attrib_extbuf.width = r->width;
va_attrib_extbuf.height = r->height;
//va_attrib_extbuf.data_size = r->height * stride;
//va_attrib_extbuf.num_planes = 1;
//va_attrib_extbuf.pitches[0] = stride;
//va_attrib_extbuf.offsets[0] = 0;
//va_attrib_extbuf.buffers = &buffer_fd;
//va_attrib_extbuf.num_buffers = 1;
va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING;
va_attrib_extbuf.private_data = NULL;
VADRMFormatModifierList modList;
modList.modifiers = modifiers;
modList.num_modifiers = modifierscount;
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
va_attribs[1].type = VASurfaceAttribUsageHint;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypeInteger;
va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
va_attribs[2].type = VASurfaceAttribPixelFormat;
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[2].value.type = VAGenericValueTypeInteger;
va_attribs[2].value.value.i = VA_FOURCC_P010;
va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[3].value.type = VAGenericValueTypePointer;
va_attribs[3].value.value.p = &va_attrib_extbuf;
va_attribs[4].type = VASurfaceAttribDRMFormatModifiers;
va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[4].value.type = VAGenericValueTypePointer;
va_attribs[4].value.value.p = &modList;
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10,
r->width, r->height, r->inputFrames, CHAIN_SIZE,
&va_attribs[0], 5);
printf("%d\n", status);
for(int i = 0; i < CHAIN_SIZE; i++)
{
status = vaExportSurfaceHandle(r->va_dpy, r->inputFrames[i], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface );
printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier);
dmabuf_fd[i] = drmSurface.objects[0].fd;
if(status != VA_STATUS_SUCCESS)
exit(1);
}
*mod = drmSurface.objects[0].drm_format_modifier;
*size = drmSurface.objects[0].size;
*offset = drmSurface.layers[1].offset[0];
*pitch1 = drmSurface.layers[0].pitch[0];
*pitch2 = drmSurface.layers[1].pitch[0];
r->encoder.output_buf = VA_INVALID_ID;
setup_output_thread(r);
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
void void
vaapi_recorder_destroy(struct vaapi_recorder *r) vaapi_recorder_destroy(struct vaapi_recorder *r)
@ -2479,6 +2606,11 @@ recorder_frame3(struct vaapi_recorder *r)
encoder_encode(r, r->vpp.output); encoder_encode(r, r->vpp.output);
} }
void
recorder_frame4(struct vaapi_recorder *r, int idx)
{
encoder_encode(r, r->inputFrames[idx]);
}
static void * static void *

View file

@ -38,6 +38,9 @@ vaapi_recorder_create3(int drm_fd, int width, int height, const char *filename,
struct vaapi_recorder * struct vaapi_recorder *
vaapi_recorder_create4(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount); vaapi_recorder_create4(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount);
struct vaapi_recorder *
vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount);
void void
vaapi_recorder_destroy(struct vaapi_recorder *r); vaapi_recorder_destroy(struct vaapi_recorder *r);
@ -48,5 +51,7 @@ void
recorder_frame2(struct vaapi_recorder *r); recorder_frame2(struct vaapi_recorder *r);
void void
recorder_frame3(struct vaapi_recorder *r); recorder_frame3(struct vaapi_recorder *r);
void
recorder_frame4(struct vaapi_recorder *r, int idx);
#endif /* _VAAPI_RECORDER_H_ */ #endif /* _VAAPI_RECORDER_H_ */

View file

@ -141,14 +141,13 @@ const bool enableValidationLayers = true;
assert(res == VK_SUCCESS); \ assert(res == VK_SUCCESS); \
} \ } \
} }
#define CHAIN_SIZE 4
/* /*
The application launches a compute shader that renders the mandelbrot set, The application launches a compute shader that renders the mandelbrot set,
by rendering it into a storage buffer. by rendering it into a storage buffer.
The storage buffer is then read from the GPU, and saved as .png. The storage buffer is then read from the GPU, and saved as .png.
*/ */
class ComputeApplication { struct ComputeApplication {
private:
// The pixels of the rendered mandelbrot set are in this format: // The pixels of the rendered mandelbrot set are in this format:
struct Pixel { struct Pixel {
float r, g, b, a; float r, g, b, a;
@ -186,7 +185,6 @@ private:
To allocate such command buffers, we use a command pool. To allocate such command buffers, we use a command pool.
*/ */
VkCommandPool commandPool; VkCommandPool commandPool;
VkCommandBuffer commandBuffer;
/* /*
@ -197,25 +195,40 @@ private:
into descriptor sets, which are basically just collections of descriptors. into descriptor sets, which are basically just collections of descriptors.
*/ */
VkDescriptorPool descriptorPool; VkDescriptorPool descriptorPool;
VkDescriptorSet descriptorSet;
VkDescriptorSetLayout descriptorSetLayout; VkDescriptorSetLayout descriptorSetLayout;
VkCommandBuffer commandBuffer;
struct UBO{
float frameNum;
};
struct FrameContext
{
VkDescriptorSet descriptorSet;
VkBuffer ubo;
VkDeviceMemory uboMemory;
VkImage image0;
VkDeviceMemory imageMemory0;
VkImageView imageView0;
VkImage image1;
// todo: single memory block?
VkDeviceMemory imageMemory1;
VkImageView imageView1;
VkCommandBuffer commandBuffer;
UBO *pMappedUBO = NULL;
VkFence fence;
bool running;
} chain[CHAIN_SIZE];
/* /*
The mandelbrot set will be rendered to this buffer. The mandelbrot set will be rendered to this buffer.
The memory that backs the buffer is bufferMemory. The memory that backs the buffer is bufferMemory.
*/ */
VkBuffer buffer;
VkDeviceMemory bufferMemory;
VkImage image0;
VkDeviceMemory imageMemory0;
VkImageView imageView0; //uint32_t bufferSize; // size of `buffer` in bytes.
VkImage image1;
// todo: single memory block?
VkDeviceMemory imageMemory1;
VkImageView imageView1;
uint32_t bufferSize; // size of `buffer` in bytes.
const char * enabledLayers[16]; const char * enabledLayers[16];
size_t enabledLayersCount = 0; size_t enabledLayersCount = 0;
@ -239,93 +252,6 @@ private:
This variable keeps track of the index of that queue in its family. This variable keeps track of the index of that queue in its family.
*/ */
uint32_t queueFamilyIndex; uint32_t queueFamilyIndex;
struct UBO{
float frameNum;
};
UBO *pMappedBuffer = NULL;
public:
void run() {
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
// Initialize vulkan:
createInstance();
findPhysicalDevice();
createDevice();
createBuffer();
vkMapMemory(device, bufferMemory, 0, sizeof(UBO), 0, (void**)&pMappedBuffer);
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
uint64_t mod;
uint32_t size, offset, pitch1, pitch2;
int fd;
uint64_t modifiers[32];
int count = getAvailiableModifiersList(modifiers, 32, VK_FORMAT_R16_UNORM);
auto *r = vaapi_recorder_create4(drm_fd, WIDTH, HEIGHT, "out.264", &fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
createImageDumbDmabuf2(image0, imageView0, imageMemory0, fd, mod, size, offset, pitch1, pitch2);
createDescriptorSetLayout();
//createDescriptorSet();
createComputePipeline();
createCommandBuffer();
int frameNum = 0;
/*
We create a fence.
*/
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
while(frameNum++ < 1000)
{
// Finally, run the recorded command buffer.
runCommandBuffer();
//usleep(10000);
recorder_frame3(r);
pMappedBuffer->frameNum = frameNum;
//usleep(10000);
}
vkDestroyFence(device, fence, NULL);
vkUnmapMemory(device, bufferMemory);
// The former command rendered a mandelbrot set to a buffer.
// Save that buffer as a png on disk.
//saveRenderedImage();
// Clean up all vulkan resources.
cleanup();
}
void saveRenderedImage() {
void* mappedMemory = NULL;
#if 1
// Map the buffer memory, so that we can read from it on the CPU.
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
FILE *f = fopen("out.bin","wb");
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
fclose(f);
// Done reading, so unmap.
vkUnmapMemory(device, imageMemory0);
#else
static char mem[WIDTH * HEIGHT*4];
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
// Now we save the acquired color data to a .png.
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
FILE *f = fopen("out.bin","wb");
fwrite(mem, 4, WIDTH * HEIGHT, f);
fclose(f);
#endif
}
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn( static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
VkDebugReportFlagsEXT flags, VkDebugReportFlagsEXT flags,
@ -612,7 +538,7 @@ public:
return -1; return -1;
} }
void createBuffer() { void createUBO(int chidx) {
/* /*
We will now create a buffer. We will render the mandelbrot set into this buffer We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later. in a computer shade later.
@ -620,11 +546,11 @@ public:
VkBufferCreateInfo bufferCreateInfo = {}; VkBufferCreateInfo bufferCreateInfo = {};
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferCreateInfo.size = bufferSize; // buffer size in bytes. bufferCreateInfo.size = sizeof(UBO); // buffer size in bytes.
bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; // buffer is used as a storage buffer. bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; // buffer is used as a storage buffer.
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time. bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time.
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &buffer)); // create buffer. VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &chain[chidx].ubo)); // create buffer.
/* /*
But the buffer doesn't allocate memory for itself, so we must do that manually. But the buffer doesn't allocate memory for itself, so we must do that manually.
@ -634,7 +560,7 @@ public:
First, we find the memory requirements for the buffer. First, we find the memory requirements for the buffer.
*/ */
VkMemoryRequirements memoryRequirements; VkMemoryRequirements memoryRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memoryRequirements); vkGetBufferMemoryRequirements(device, chain[chidx].ubo, &memoryRequirements);
/* /*
Now use obtained memory requirements info to allocate the memory for the buffer. Now use obtained memory requirements info to allocate the memory for the buffer.
@ -655,10 +581,11 @@ public:
allocateInfo.memoryTypeIndex = findMemoryType( allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &bufferMemory)); // allocate memory on device. VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &chain[chidx].uboMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory. // Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindBufferMemory(device, buffer, bufferMemory, 0)); VK_CHECK_RESULT(vkBindBufferMemory(device, chain[chidx].ubo, chain[chidx].uboMemory, 0));
vkMapMemory(device, chain[chidx].uboMemory, 0, sizeof(UBO), 0, (void**)&chain[chidx].pMappedUBO);
} }
int getAvailiableModifiersList(uint64_t *modifiers2, size_t len, VkFormat fmt) int getAvailiableModifiersList(uint64_t *modifiers2, size_t len, VkFormat fmt)
{ {
@ -770,7 +697,7 @@ public:
} }
// create and import dmabuf // create and import dmabuf
void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2) { void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, VkImage &image1, VkImageView &imageView1, VkDeviceMemory &imageMemory1, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2) {
/* /*
We will now create a buffer. We will render the mandelbrot set into this buffer We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later. in a computer shade later.
@ -871,7 +798,7 @@ public:
view.image = image1; view.image = image1;
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1)); VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
} }
#if 0
// create and import dmabuf as opaque fd, allows any tiling // create and import dmabuf as opaque fd, allows any tiling
void createImageDumbOpaque(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) { void createImageDumbOpaque(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
/* /*
@ -1051,7 +978,7 @@ public:
printf("imageModifier %llx\n", imageModifiers.drmFormatModifier); printf("imageModifier %llx\n", imageModifiers.drmFormatModifier);
// todo: get subresource plane info (vkGetImageSubresourceLayout) // todo: get subresource plane info (vkGetImageSubresourceLayout)
} }
#endif
void createDescriptorSetLayout() { void createDescriptorSetLayout() {
/* /*
Here we specify a descriptor set layout. This allows us to bind our descriptors to Here we specify a descriptor set layout. This allows us to bind our descriptors to
@ -1090,8 +1017,8 @@ public:
// Create the descriptor set layout. // Create the descriptor set layout.
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout)); VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout));
} }
void createDescriptorPool()
void createDescriptorSet() { {
/* /*
So we will allocate a descriptor set here. So we will allocate a descriptor set here.
But we need to first create a descriptor pool to do that. But we need to first create a descriptor pool to do that.
@ -1102,18 +1029,20 @@ public:
*/ */
VkDescriptorPoolSize descriptorPoolSize[2] = {}; VkDescriptorPoolSize descriptorPoolSize[2] = {};
descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptorPoolSize[0].descriptorCount = 2; descriptorPoolSize[0].descriptorCount = 2*CHAIN_SIZE;
descriptorPoolSize[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptorPoolSize[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorPoolSize[1].descriptorCount = 1; descriptorPoolSize[1].descriptorCount = 1*CHAIN_SIZE;
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {}; VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptorPoolCreateInfo.maxSets = 1; // we only need to allocate one descriptor set from the pool. descriptorPoolCreateInfo.maxSets = CHAIN_SIZE; // we only need to allocate one descriptor set from the pool.
descriptorPoolCreateInfo.poolSizeCount = 2; descriptorPoolCreateInfo.poolSizeCount = 2;
descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize; descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize;
// create descriptor pool. // create descriptor pool.
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool)); VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool));
}
void createDescriptorSet(int chidx) {
/* /*
With the pool allocated, we can now allocate the descriptor set. With the pool allocated, we can now allocate the descriptor set.
*/ */
@ -1124,7 +1053,7 @@ public:
descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout; descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout;
// allocate descriptor set. // allocate descriptor set.
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet)); VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &chain[chidx].descriptorSet));
/* /*
Next, we need to connect our actual storage buffer with the descrptor. Next, we need to connect our actual storage buffer with the descrptor.
@ -1134,29 +1063,29 @@ public:
// Specify the buffer to bind to the descriptor. // Specify the buffer to bind to the descriptor.
VkDescriptorImageInfo descriptorImageInfo[2] = {}; VkDescriptorImageInfo descriptorImageInfo[2] = {};
descriptorImageInfo[0].imageView = imageView0; descriptorImageInfo[0].imageView = chain[chidx].imageView0;
descriptorImageInfo[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL; descriptorImageInfo[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
descriptorImageInfo[1].imageView = imageView1; descriptorImageInfo[1].imageView = chain[chidx].imageView1;
descriptorImageInfo[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL; descriptorImageInfo[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
VkWriteDescriptorSet writeDescriptorSet[2] = {}; VkWriteDescriptorSet writeDescriptorSet[2] = {};
writeDescriptorSet[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writeDescriptorSet[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet[0].dstSet = descriptorSet; // write to this descriptor set. writeDescriptorSet[0].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
writeDescriptorSet[0].dstBinding = 0; // write to the first, and only binding. writeDescriptorSet[0].dstBinding = 0; // write to the first, and only binding.
writeDescriptorSet[0].descriptorCount = 1; // update a single descriptor. writeDescriptorSet[0].descriptorCount = 1; // update a single descriptor.
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer. writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
writeDescriptorSet[0].pImageInfo = &descriptorImageInfo[0]; writeDescriptorSet[0].pImageInfo = &descriptorImageInfo[0];
writeDescriptorSet[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; writeDescriptorSet[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet[1].dstSet = descriptorSet; // write to this descriptor set. writeDescriptorSet[1].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
writeDescriptorSet[1].dstBinding = 1; // write to the first, and only binding. writeDescriptorSet[1].dstBinding = 1; // write to the first, and only binding.
writeDescriptorSet[1].descriptorCount = 1; // update a single descriptor. writeDescriptorSet[1].descriptorCount = 1; // update a single descriptor.
writeDescriptorSet[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer. writeDescriptorSet[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
writeDescriptorSet[1].pImageInfo = &descriptorImageInfo[1]; writeDescriptorSet[1].pImageInfo = &descriptorImageInfo[1];
VkDescriptorBufferInfo descriptorBufferInfo = {}; VkDescriptorBufferInfo descriptorBufferInfo = {};
descriptorBufferInfo.buffer = buffer; descriptorBufferInfo.buffer = chain[chidx].ubo;
descriptorBufferInfo.offset = 0; descriptorBufferInfo.offset = 0;
descriptorBufferInfo.range = bufferSize; descriptorBufferInfo.range = sizeof(UBO);
// perform the update of the descriptor set. // perform the update of the descriptor set.
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, NULL); vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, NULL);
writeDescriptorSet[0].dstBinding = 2; writeDescriptorSet[0].dstBinding = 2;
@ -1252,8 +1181,8 @@ public:
1, &pipelineCreateInfo, 1, &pipelineCreateInfo,
NULL, &pipeline)); NULL, &pipeline));
} }
void createCommandPool()
void createCommandBuffer() { {
/* /*
We are getting closer to the end. In order to send commands to the device(GPU), We are getting closer to the end. In order to send commands to the device(GPU),
we must first record commands into a command buffer. we must first record commands into a command buffer.
@ -1261,12 +1190,11 @@ public:
*/ */
VkCommandPoolCreateInfo commandPoolCreateInfo = {}; VkCommandPoolCreateInfo commandPoolCreateInfo = {};
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolCreateInfo.flags = 0; commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
// the queue family of this command pool. All command buffers allocated from this command pool, // the queue family of this command pool. All command buffers allocated from this command pool,
// must be submitted to queues of this family ONLY. // must be submitted to queues of this family ONLY.
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex; commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool)); VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool));
/* /*
Now allocate a command buffer from the command pool. Now allocate a command buffer from the command pool.
*/ */
@ -1280,48 +1208,94 @@ public:
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer. commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer. VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer.
/* }
Now we shall start recording commands into the newly allocated command buffer. void prepareImage(int chidx)
*/ {
VkCommandBufferBeginInfo beginInfo = {}; VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = 0; // the buffer is only submitted and used once in this application. beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands. VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands.
/*
We need to bind a pipeline, AND a descriptor set before we dispatch.
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
*/
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
VkImageMemoryBarrier imageMemoryBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; VkImageMemoryBarrier imageMemoryBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemoryBarrier.image = image0; imageMemoryBarrier.image = chain[chidx].image0;
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
// imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; // imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
// imageMemoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; // imageMemoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
imageMemoryBarrier.image = image1; imageMemoryBarrier.image = chain[chidx].image1;
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
VkFence fence;
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
createDescriptorSet(); VkSubmitInfo submitInfo = {};
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, NULL); submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1; // submit a single command buffer
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
vkDestroyFence(device, fence, NULL);
vkResetCommandBuffer(commandBuffer, 0);
}
void createCommandBuffer(int chidx) {
/*
Now allocate a command buffer from the command pool.
*/
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
// if the command buffer is primary, it can be directly submitted to queues.
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
// submitted to a queue. To keep things simple, we use a primary command buffer.
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &chain[chidx].commandBuffer)); // allocate command buffer.
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
VK_CHECK_RESULT(vkBeginCommandBuffer(chain[chidx].commandBuffer, &beginInfo)); // start recording commands.
/*
We need to bind a pipeline, AND a descriptor set before we dispatch.
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
*/
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
/*
Now we shall start recording commands into the newly allocated command buffer.
*/
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
/* /*
Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader. Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader.
The number of workgroups is specified in the arguments. The number of workgroups is specified in the arguments.
If you are already familiar with compute shaders from OpenGL, this should be nothing new to you. If you are already familiar with compute shaders from OpenGL, this should be nothing new to you.
*/ */
vkCmdDispatch(commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1); vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1);
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
/*
We create a fence.
*/
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &chain[chidx].fence));
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
} }
VkFence fence;
void runCommandBuffer() {
void runCommandBuffer(int chidx) {
/* /*
Now we shall finally submit the recorded command buffer to a queue. Now we shall finally submit the recorded command buffer to a queue.
*/ */
@ -1329,18 +1303,22 @@ public:
VkSubmitInfo submitInfo = {}; VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1; // submit a single command buffer submitInfo.commandBufferCount = 1; // submit a single command buffer
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit. submitInfo.pCommandBuffers = &chain[chidx].commandBuffer; // the command buffer to submit.
// static bool b; // static bool b;
//if(b) //if(b)
//VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000)); //
//b = 1; //b = 1;
vkResetFences(device, 1, &fence); if(chain[chidx].running)
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
vkResetFences(device, 1, &chain[chidx].fence);
/* /*
We submit the command buffer on the queue, at the same time giving a fence. We submit the command buffer on the queue, at the same time giving a fence.
*/ */
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence)); VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, chain[chidx].fence));
chain[chidx].running = true;
/* /*
The command will not have finished executing until the fence is signalled. The command will not have finished executing until the fence is signalled.
So we wait here. So we wait here.
@ -1348,9 +1326,101 @@ public:
and we will not be sure that the command has finished executing unless we wait for the fence. and we will not be sure that the command has finished executing unless we wait for the fence.
Hence, we use a fence here. Hence, we use a fence here.
*/ */
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000)); //VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
} }
void run() {
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
//bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
// Initialize vulkan:
createInstance();
findPhysicalDevice();
createDevice();
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
uint64_t mod;
uint32_t size, offset, pitch1, pitch2;
int fd[CHAIN_SIZE];
uint64_t modifiers[32];
int count = getAvailiableModifiersList(modifiers, 32, VK_FORMAT_R16_UNORM);
auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
for(int i = 0; i < CHAIN_SIZE; i++)
{
createUBO(i);
createImageDumbDmabuf2(chain[i].image0, chain[i].imageView0, chain[i].imageMemory0, chain[i].image1, chain[i].imageView1, chain[i].imageMemory1,
fd[i], mod, size, offset, pitch1, pitch2);
}
createDescriptorSetLayout();
createDescriptorPool();
createComputePipeline();
createCommandPool();
for(int i = 0; i < CHAIN_SIZE; i++)
{
prepareImage(i);
createDescriptorSet(i);
createCommandBuffer(i);
}
int frameNum = 0;
while(frameNum++ < 1000)
{
int chidx = frameNum & 3;
// Finally, run the recorded command buffer.
runCommandBuffer(chidx);
//usleep(10000);
recorder_frame4(r, chidx);
chain[chidx].pMappedUBO->frameNum = frameNum;
//usleep(10000);
}
for(int i = 0; i < CHAIN_SIZE; i++)
{
if(chain[i].running)
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[i].fence, VK_TRUE, 100000000000));
}
exit(0);
//vkDestroyFence(device, fence, NULL);
//vkUnmapMemory(device, bufferMemory);
// The former command rendered a mandelbrot set to a buffer.
// Save that buffer as a png on disk.
//saveRenderedImage();
// Clean up all vulkan resources.
cleanup();
}
#if 0
void saveRenderedImage() {
void* mappedMemory = NULL;
#if 1
// Map the buffer memory, so that we can read from it on the CPU.
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
FILE *f = fopen("out.bin","wb");
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
fclose(f);
// Done reading, so unmap.
vkUnmapMemory(device, imageMemory0);
#else
static char mem[WIDTH * HEIGHT*4];
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
// Now we save the acquired color data to a .png.
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
FILE *f = fopen("out.bin","wb");
fwrite(mem, 4, WIDTH * HEIGHT, f);
fclose(f);
#endif
}
#endif
void cleanup() { void cleanup() {
/* /*
Clean up all Vulkan Resources. Clean up all Vulkan Resources.
@ -1366,11 +1436,11 @@ public:
func(instance, debugReportCallback, NULL); func(instance, debugReportCallback, NULL);
} }
vkFreeMemory(device, bufferMemory, NULL); //vkFreeMemory(device, bufferMemory, NULL);
vkDestroyBuffer(device, buffer, NULL); //vkDestroyBuffer(device, buffer, NULL);
vkFreeMemory(device, imageMemory0, NULL); //vkFreeMemory(device, imageMemory0, NULL);
vkDestroyImageView(device, imageView0, NULL); //vkDestroyImageView(device, imageView0, NULL);
vkDestroyImage(device, image0, NULL); //vkDestroyImage(device, image0, NULL);
vkDestroyShaderModule(device, computeShaderModule, NULL); vkDestroyShaderModule(device, computeShaderModule, NULL);
vkDestroyDescriptorPool(device, descriptorPool, NULL); vkDestroyDescriptorPool(device, descriptorPool, NULL);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL); vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL);