Use async chain of 4 frames with separate command buffers
This commit is contained in:
parent
1fb2179423
commit
bd44382e55
3 changed files with 360 additions and 153 deletions
|
@ -106,6 +106,8 @@ enum NALUType {
|
|||
|
||||
#define ALIGN16(x) ((x+15)&~15)
|
||||
|
||||
#define CHAIN_SIZE 4
|
||||
|
||||
struct vaapi_recorder {
|
||||
int drm_fd, output_fd;
|
||||
int width, height;
|
||||
|
@ -152,7 +154,7 @@ struct vaapi_recorder {
|
|||
VABufferID output_buf;
|
||||
VASurfaceID output_sync_surf;
|
||||
} encoder;
|
||||
|
||||
VASurfaceID inputFrames[CHAIN_SIZE];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2334,6 +2336,131 @@ err_free:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
struct vaapi_recorder *
|
||||
vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
|
||||
{
|
||||
struct vaapi_recorder *r;
|
||||
VAStatus status;
|
||||
int major, minor;
|
||||
int flags;
|
||||
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
|
||||
VADRMPRIMESurfaceDescriptor drmSurface = {0};
|
||||
|
||||
r = (vaapi_recorder*)calloc(sizeof *r,1);
|
||||
if (r == NULL)
|
||||
return NULL;
|
||||
|
||||
r->width = width;
|
||||
r->height = height;
|
||||
r->drm_fd = drm_fd;
|
||||
|
||||
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
|
||||
r->output_fd = open(filename, flags, 0644);
|
||||
if (r->output_fd < 0)
|
||||
goto err_thread;
|
||||
|
||||
r->va_dpy = vaGetDisplayDRM(drm_fd);
|
||||
if (!r->va_dpy) {
|
||||
printf("failed to create VA display\n");
|
||||
goto err_fd;
|
||||
}
|
||||
|
||||
status = vaInitialize(r->va_dpy, &major, &minor);
|
||||
if (status != VA_STATUS_SUCCESS) {
|
||||
printf("vaapi: failed to initialize display\n");
|
||||
goto err_fd;
|
||||
}
|
||||
|
||||
if (setup_vpp(r) < 0) {
|
||||
printf("vaapi: failed to initialize VPP pipeline\n");
|
||||
goto err_va_dpy;
|
||||
}
|
||||
|
||||
if (setup_encoder(r) < 0) {
|
||||
goto err_vpp;
|
||||
}
|
||||
//create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
|
||||
VASurfaceAttrib va_attribs[5];
|
||||
|
||||
|
||||
|
||||
|
||||
//unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv};
|
||||
|
||||
va_attrib_extbuf.pixel_format = VA_FOURCC_P010;
|
||||
va_attrib_extbuf.width = r->width;
|
||||
va_attrib_extbuf.height = r->height;
|
||||
//va_attrib_extbuf.data_size = r->height * stride;
|
||||
//va_attrib_extbuf.num_planes = 1;
|
||||
//va_attrib_extbuf.pitches[0] = stride;
|
||||
//va_attrib_extbuf.offsets[0] = 0;
|
||||
//va_attrib_extbuf.buffers = &buffer_fd;
|
||||
//va_attrib_extbuf.num_buffers = 1;
|
||||
va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING;
|
||||
va_attrib_extbuf.private_data = NULL;
|
||||
VADRMFormatModifierList modList;
|
||||
modList.modifiers = modifiers;
|
||||
modList.num_modifiers = modifierscount;
|
||||
|
||||
va_attribs[0].type = VASurfaceAttribMemoryType;
|
||||
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
|
||||
va_attribs[0].value.type = VAGenericValueTypeInteger;
|
||||
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
|
||||
va_attribs[1].type = VASurfaceAttribUsageHint;
|
||||
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
|
||||
va_attribs[1].value.type = VAGenericValueTypeInteger;
|
||||
va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
|
||||
va_attribs[2].type = VASurfaceAttribPixelFormat;
|
||||
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
|
||||
va_attribs[2].value.type = VAGenericValueTypeInteger;
|
||||
va_attribs[2].value.value.i = VA_FOURCC_P010;
|
||||
va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor;
|
||||
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
|
||||
va_attribs[3].value.type = VAGenericValueTypePointer;
|
||||
va_attribs[3].value.value.p = &va_attrib_extbuf;
|
||||
va_attribs[4].type = VASurfaceAttribDRMFormatModifiers;
|
||||
va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE;
|
||||
va_attribs[4].value.type = VAGenericValueTypePointer;
|
||||
va_attribs[4].value.value.p = &modList;
|
||||
|
||||
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10,
|
||||
r->width, r->height, r->inputFrames, CHAIN_SIZE,
|
||||
&va_attribs[0], 5);
|
||||
printf("%d\n", status);
|
||||
for(int i = 0; i < CHAIN_SIZE; i++)
|
||||
{
|
||||
status = vaExportSurfaceHandle(r->va_dpy, r->inputFrames[i], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface );
|
||||
printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier);
|
||||
dmabuf_fd[i] = drmSurface.objects[0].fd;
|
||||
if(status != VA_STATUS_SUCCESS)
|
||||
exit(1);
|
||||
}
|
||||
*mod = drmSurface.objects[0].drm_format_modifier;
|
||||
*size = drmSurface.objects[0].size;
|
||||
*offset = drmSurface.layers[1].offset[0];
|
||||
*pitch1 = drmSurface.layers[0].pitch[0];
|
||||
*pitch2 = drmSurface.layers[1].pitch[0];
|
||||
|
||||
|
||||
r->encoder.output_buf = VA_INVALID_ID;
|
||||
setup_output_thread(r);
|
||||
|
||||
return r;
|
||||
|
||||
err_vpp:
|
||||
vpp_destroy(r);
|
||||
err_va_dpy:
|
||||
vaTerminate(r->va_dpy);
|
||||
err_fd:
|
||||
close(r->output_fd);
|
||||
err_thread:
|
||||
destroy_worker_thread(r);
|
||||
err_free:
|
||||
free(r);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vaapi_recorder_destroy(struct vaapi_recorder *r)
|
||||
|
@ -2479,6 +2606,11 @@ recorder_frame3(struct vaapi_recorder *r)
|
|||
encoder_encode(r, r->vpp.output);
|
||||
}
|
||||
|
||||
void
|
||||
recorder_frame4(struct vaapi_recorder *r, int idx)
|
||||
{
|
||||
encoder_encode(r, r->inputFrames[idx]);
|
||||
}
|
||||
|
||||
|
||||
static void *
|
||||
|
|
|
@ -38,6 +38,9 @@ vaapi_recorder_create3(int drm_fd, int width, int height, const char *filename,
|
|||
struct vaapi_recorder *
|
||||
vaapi_recorder_create4(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount);
|
||||
|
||||
struct vaapi_recorder *
|
||||
vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount);
|
||||
|
||||
|
||||
void
|
||||
vaapi_recorder_destroy(struct vaapi_recorder *r);
|
||||
|
@ -48,5 +51,7 @@ void
|
|||
recorder_frame2(struct vaapi_recorder *r);
|
||||
void
|
||||
recorder_frame3(struct vaapi_recorder *r);
|
||||
void
|
||||
recorder_frame4(struct vaapi_recorder *r, int idx);
|
||||
|
||||
#endif /* _VAAPI_RECORDER_H_ */
|
||||
|
|
374
vkcompute.cpp
374
vkcompute.cpp
|
@ -141,14 +141,13 @@ const bool enableValidationLayers = true;
|
|||
assert(res == VK_SUCCESS); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHAIN_SIZE 4
|
||||
/*
|
||||
The application launches a compute shader that renders the mandelbrot set,
|
||||
by rendering it into a storage buffer.
|
||||
The storage buffer is then read from the GPU, and saved as .png.
|
||||
*/
|
||||
class ComputeApplication {
|
||||
private:
|
||||
struct ComputeApplication {
|
||||
// The pixels of the rendered mandelbrot set are in this format:
|
||||
struct Pixel {
|
||||
float r, g, b, a;
|
||||
|
@ -186,7 +185,6 @@ private:
|
|||
To allocate such command buffers, we use a command pool.
|
||||
*/
|
||||
VkCommandPool commandPool;
|
||||
VkCommandBuffer commandBuffer;
|
||||
|
||||
/*
|
||||
|
||||
|
@ -197,25 +195,40 @@ private:
|
|||
into descriptor sets, which are basically just collections of descriptors.
|
||||
*/
|
||||
VkDescriptorPool descriptorPool;
|
||||
VkDescriptorSet descriptorSet;
|
||||
VkDescriptorSetLayout descriptorSetLayout;
|
||||
VkCommandBuffer commandBuffer;
|
||||
|
||||
struct UBO{
|
||||
float frameNum;
|
||||
};
|
||||
struct FrameContext
|
||||
{
|
||||
VkDescriptorSet descriptorSet;
|
||||
VkBuffer ubo;
|
||||
VkDeviceMemory uboMemory;
|
||||
VkImage image0;
|
||||
VkDeviceMemory imageMemory0;
|
||||
VkImageView imageView0;
|
||||
VkImage image1;
|
||||
// todo: single memory block?
|
||||
VkDeviceMemory imageMemory1;
|
||||
VkImageView imageView1;
|
||||
VkCommandBuffer commandBuffer;
|
||||
UBO *pMappedUBO = NULL;
|
||||
VkFence fence;
|
||||
bool running;
|
||||
|
||||
} chain[CHAIN_SIZE];
|
||||
|
||||
/*
|
||||
The mandelbrot set will be rendered to this buffer.
|
||||
|
||||
The memory that backs the buffer is bufferMemory.
|
||||
*/
|
||||
VkBuffer buffer;
|
||||
VkDeviceMemory bufferMemory;
|
||||
|
||||
VkImage image0;
|
||||
VkDeviceMemory imageMemory0;
|
||||
VkImageView imageView0;
|
||||
VkImage image1;
|
||||
// todo: single memory block?
|
||||
VkDeviceMemory imageMemory1;
|
||||
VkImageView imageView1;
|
||||
uint32_t bufferSize; // size of `buffer` in bytes.
|
||||
|
||||
|
||||
//uint32_t bufferSize; // size of `buffer` in bytes.
|
||||
|
||||
const char * enabledLayers[16];
|
||||
size_t enabledLayersCount = 0;
|
||||
|
@ -239,93 +252,6 @@ private:
|
|||
This variable keeps track of the index of that queue in its family.
|
||||
*/
|
||||
uint32_t queueFamilyIndex;
|
||||
struct UBO{
|
||||
float frameNum;
|
||||
};
|
||||
UBO *pMappedBuffer = NULL;
|
||||
|
||||
public:
|
||||
void run() {
|
||||
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
|
||||
bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
|
||||
|
||||
|
||||
// Initialize vulkan:
|
||||
createInstance();
|
||||
findPhysicalDevice();
|
||||
createDevice();
|
||||
createBuffer();
|
||||
vkMapMemory(device, bufferMemory, 0, sizeof(UBO), 0, (void**)&pMappedBuffer);
|
||||
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
|
||||
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
|
||||
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
|
||||
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
|
||||
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
|
||||
uint64_t mod;
|
||||
uint32_t size, offset, pitch1, pitch2;
|
||||
int fd;
|
||||
uint64_t modifiers[32];
|
||||
int count = getAvailiableModifiersList(modifiers, 32, VK_FORMAT_R16_UNORM);
|
||||
auto *r = vaapi_recorder_create4(drm_fd, WIDTH, HEIGHT, "out.264", &fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
|
||||
createImageDumbDmabuf2(image0, imageView0, imageMemory0, fd, mod, size, offset, pitch1, pitch2);
|
||||
createDescriptorSetLayout();
|
||||
//createDescriptorSet();
|
||||
createComputePipeline();
|
||||
createCommandBuffer();
|
||||
int frameNum = 0;
|
||||
|
||||
/*
|
||||
We create a fence.
|
||||
*/
|
||||
VkFenceCreateInfo fenceCreateInfo = {};
|
||||
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fenceCreateInfo.flags = 0;
|
||||
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
|
||||
|
||||
while(frameNum++ < 1000)
|
||||
{
|
||||
// Finally, run the recorded command buffer.
|
||||
runCommandBuffer();
|
||||
//usleep(10000);
|
||||
recorder_frame3(r);
|
||||
pMappedBuffer->frameNum = frameNum;
|
||||
//usleep(10000);
|
||||
}
|
||||
|
||||
vkDestroyFence(device, fence, NULL);
|
||||
vkUnmapMemory(device, bufferMemory);
|
||||
|
||||
// The former command rendered a mandelbrot set to a buffer.
|
||||
// Save that buffer as a png on disk.
|
||||
//saveRenderedImage();
|
||||
|
||||
// Clean up all vulkan resources.
|
||||
cleanup();
|
||||
}
|
||||
|
||||
void saveRenderedImage() {
|
||||
void* mappedMemory = NULL;
|
||||
#if 1
|
||||
// Map the buffer memory, so that we can read from it on the CPU.
|
||||
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
|
||||
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
|
||||
FILE *f = fopen("out.bin","wb");
|
||||
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
|
||||
fclose(f);
|
||||
|
||||
// Done reading, so unmap.
|
||||
vkUnmapMemory(device, imageMemory0);
|
||||
#else
|
||||
static char mem[WIDTH * HEIGHT*4];
|
||||
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
|
||||
// Now we save the acquired color data to a .png.
|
||||
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
|
||||
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
|
||||
FILE *f = fopen("out.bin","wb");
|
||||
fwrite(mem, 4, WIDTH * HEIGHT, f);
|
||||
fclose(f);
|
||||
#endif
|
||||
}
|
||||
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
|
||||
VkDebugReportFlagsEXT flags,
|
||||
|
@ -612,7 +538,7 @@ public:
|
|||
return -1;
|
||||
}
|
||||
|
||||
void createBuffer() {
|
||||
void createUBO(int chidx) {
|
||||
/*
|
||||
We will now create a buffer. We will render the mandelbrot set into this buffer
|
||||
in a computer shade later.
|
||||
|
@ -620,11 +546,11 @@ public:
|
|||
|
||||
VkBufferCreateInfo bufferCreateInfo = {};
|
||||
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
bufferCreateInfo.size = bufferSize; // buffer size in bytes.
|
||||
bufferCreateInfo.size = sizeof(UBO); // buffer size in bytes.
|
||||
bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; // buffer is used as a storage buffer.
|
||||
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time.
|
||||
|
||||
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &buffer)); // create buffer.
|
||||
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &chain[chidx].ubo)); // create buffer.
|
||||
|
||||
/*
|
||||
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
||||
|
@ -634,7 +560,7 @@ public:
|
|||
First, we find the memory requirements for the buffer.
|
||||
*/
|
||||
VkMemoryRequirements memoryRequirements;
|
||||
vkGetBufferMemoryRequirements(device, buffer, &memoryRequirements);
|
||||
vkGetBufferMemoryRequirements(device, chain[chidx].ubo, &memoryRequirements);
|
||||
|
||||
/*
|
||||
Now use obtained memory requirements info to allocate the memory for the buffer.
|
||||
|
@ -655,10 +581,11 @@ public:
|
|||
allocateInfo.memoryTypeIndex = findMemoryType(
|
||||
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
|
||||
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &bufferMemory)); // allocate memory on device.
|
||||
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &chain[chidx].uboMemory)); // allocate memory on device.
|
||||
|
||||
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
||||
VK_CHECK_RESULT(vkBindBufferMemory(device, buffer, bufferMemory, 0));
|
||||
VK_CHECK_RESULT(vkBindBufferMemory(device, chain[chidx].ubo, chain[chidx].uboMemory, 0));
|
||||
vkMapMemory(device, chain[chidx].uboMemory, 0, sizeof(UBO), 0, (void**)&chain[chidx].pMappedUBO);
|
||||
}
|
||||
int getAvailiableModifiersList(uint64_t *modifiers2, size_t len, VkFormat fmt)
|
||||
{
|
||||
|
@ -770,7 +697,7 @@ public:
|
|||
}
|
||||
|
||||
// create and import dmabuf
|
||||
void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2) {
|
||||
void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, VkImage &image1, VkImageView &imageView1, VkDeviceMemory &imageMemory1, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2) {
|
||||
/*
|
||||
We will now create a buffer. We will render the mandelbrot set into this buffer
|
||||
in a computer shade later.
|
||||
|
@ -871,7 +798,7 @@ public:
|
|||
view.image = image1;
|
||||
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
|
||||
}
|
||||
|
||||
#if 0
|
||||
// create and import dmabuf as opaque fd, allows any tiling
|
||||
void createImageDumbOpaque(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
|
||||
/*
|
||||
|
@ -1051,7 +978,7 @@ public:
|
|||
printf("imageModifier %llx\n", imageModifiers.drmFormatModifier);
|
||||
// todo: get subresource plane info (vkGetImageSubresourceLayout)
|
||||
}
|
||||
|
||||
#endif
|
||||
void createDescriptorSetLayout() {
|
||||
/*
|
||||
Here we specify a descriptor set layout. This allows us to bind our descriptors to
|
||||
|
@ -1090,8 +1017,8 @@ public:
|
|||
// Create the descriptor set layout.
|
||||
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout));
|
||||
}
|
||||
|
||||
void createDescriptorSet() {
|
||||
void createDescriptorPool()
|
||||
{
|
||||
/*
|
||||
So we will allocate a descriptor set here.
|
||||
But we need to first create a descriptor pool to do that.
|
||||
|
@ -1102,18 +1029,20 @@ public:
|
|||
*/
|
||||
VkDescriptorPoolSize descriptorPoolSize[2] = {};
|
||||
descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
descriptorPoolSize[0].descriptorCount = 2;
|
||||
descriptorPoolSize[0].descriptorCount = 2*CHAIN_SIZE;
|
||||
descriptorPoolSize[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
descriptorPoolSize[1].descriptorCount = 1;
|
||||
descriptorPoolSize[1].descriptorCount = 1*CHAIN_SIZE;
|
||||
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
|
||||
descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
descriptorPoolCreateInfo.maxSets = 1; // we only need to allocate one descriptor set from the pool.
|
||||
descriptorPoolCreateInfo.maxSets = CHAIN_SIZE; // we only need to allocate one descriptor set from the pool.
|
||||
descriptorPoolCreateInfo.poolSizeCount = 2;
|
||||
descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize;
|
||||
|
||||
// create descriptor pool.
|
||||
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool));
|
||||
}
|
||||
|
||||
void createDescriptorSet(int chidx) {
|
||||
/*
|
||||
With the pool allocated, we can now allocate the descriptor set.
|
||||
*/
|
||||
|
@ -1124,7 +1053,7 @@ public:
|
|||
descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout;
|
||||
|
||||
// allocate descriptor set.
|
||||
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet));
|
||||
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &chain[chidx].descriptorSet));
|
||||
|
||||
/*
|
||||
Next, we need to connect our actual storage buffer with the descrptor.
|
||||
|
@ -1134,29 +1063,29 @@ public:
|
|||
// Specify the buffer to bind to the descriptor.
|
||||
|
||||
VkDescriptorImageInfo descriptorImageInfo[2] = {};
|
||||
descriptorImageInfo[0].imageView = imageView0;
|
||||
descriptorImageInfo[0].imageView = chain[chidx].imageView0;
|
||||
descriptorImageInfo[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
descriptorImageInfo[1].imageView = imageView1;
|
||||
descriptorImageInfo[1].imageView = chain[chidx].imageView1;
|
||||
descriptorImageInfo[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
VkWriteDescriptorSet writeDescriptorSet[2] = {};
|
||||
writeDescriptorSet[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writeDescriptorSet[0].dstSet = descriptorSet; // write to this descriptor set.
|
||||
writeDescriptorSet[0].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
|
||||
writeDescriptorSet[0].dstBinding = 0; // write to the first, and only binding.
|
||||
writeDescriptorSet[0].descriptorCount = 1; // update a single descriptor.
|
||||
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
|
||||
writeDescriptorSet[0].pImageInfo = &descriptorImageInfo[0];
|
||||
writeDescriptorSet[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writeDescriptorSet[1].dstSet = descriptorSet; // write to this descriptor set.
|
||||
writeDescriptorSet[1].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
|
||||
writeDescriptorSet[1].dstBinding = 1; // write to the first, and only binding.
|
||||
writeDescriptorSet[1].descriptorCount = 1; // update a single descriptor.
|
||||
writeDescriptorSet[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
|
||||
writeDescriptorSet[1].pImageInfo = &descriptorImageInfo[1];
|
||||
|
||||
VkDescriptorBufferInfo descriptorBufferInfo = {};
|
||||
descriptorBufferInfo.buffer = buffer;
|
||||
descriptorBufferInfo.buffer = chain[chidx].ubo;
|
||||
descriptorBufferInfo.offset = 0;
|
||||
descriptorBufferInfo.range = bufferSize;
|
||||
descriptorBufferInfo.range = sizeof(UBO);
|
||||
// perform the update of the descriptor set.
|
||||
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, NULL);
|
||||
writeDescriptorSet[0].dstBinding = 2;
|
||||
|
@ -1252,8 +1181,8 @@ public:
|
|||
1, &pipelineCreateInfo,
|
||||
NULL, &pipeline));
|
||||
}
|
||||
|
||||
void createCommandBuffer() {
|
||||
void createCommandPool()
|
||||
{
|
||||
/*
|
||||
We are getting closer to the end. In order to send commands to the device(GPU),
|
||||
we must first record commands into a command buffer.
|
||||
|
@ -1261,12 +1190,11 @@ public:
|
|||
*/
|
||||
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
|
||||
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
commandPoolCreateInfo.flags = 0;
|
||||
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
// the queue family of this command pool. All command buffers allocated from this command pool,
|
||||
// must be submitted to queues of this family ONLY.
|
||||
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
||||
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool));
|
||||
|
||||
/*
|
||||
Now allocate a command buffer from the command pool.
|
||||
*/
|
||||
|
@ -1280,48 +1208,94 @@ public:
|
|||
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
|
||||
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer.
|
||||
|
||||
/*
|
||||
Now we shall start recording commands into the newly allocated command buffer.
|
||||
*/
|
||||
}
|
||||
void prepareImage(int chidx)
|
||||
{
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
|
||||
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands.
|
||||
|
||||
/*
|
||||
We need to bind a pipeline, AND a descriptor set before we dispatch.
|
||||
|
||||
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
|
||||
*/
|
||||
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
VkImageMemoryBarrier imageMemoryBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
||||
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
imageMemoryBarrier.image = image0;
|
||||
imageMemoryBarrier.image = chain[chidx].image0;
|
||||
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
||||
// imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
// imageMemoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
|
||||
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
|
||||
imageMemoryBarrier.image = image1;
|
||||
imageMemoryBarrier.image = chain[chidx].image1;
|
||||
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
|
||||
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
|
||||
VkFence fence;
|
||||
VkFenceCreateInfo fenceCreateInfo = {};
|
||||
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fenceCreateInfo.flags = 0;
|
||||
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
|
||||
|
||||
createDescriptorSet();
|
||||
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, NULL);
|
||||
VkSubmitInfo submitInfo = {};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1; // submit a single command buffer
|
||||
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
|
||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
||||
vkDestroyFence(device, fence, NULL);
|
||||
vkResetCommandBuffer(commandBuffer, 0);
|
||||
}
|
||||
|
||||
void createCommandBuffer(int chidx) {
|
||||
|
||||
/*
|
||||
Now allocate a command buffer from the command pool.
|
||||
*/
|
||||
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
|
||||
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
|
||||
// if the command buffer is primary, it can be directly submitted to queues.
|
||||
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
|
||||
// submitted to a queue. To keep things simple, we use a primary command buffer.
|
||||
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
|
||||
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &chain[chidx].commandBuffer)); // allocate command buffer.
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
|
||||
VK_CHECK_RESULT(vkBeginCommandBuffer(chain[chidx].commandBuffer, &beginInfo)); // start recording commands.
|
||||
/*
|
||||
We need to bind a pipeline, AND a descriptor set before we dispatch.
|
||||
|
||||
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
|
||||
*/
|
||||
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
/*
|
||||
Now we shall start recording commands into the newly allocated command buffer.
|
||||
*/
|
||||
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
|
||||
|
||||
/*
|
||||
Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader.
|
||||
The number of workgroups is specified in the arguments.
|
||||
If you are already familiar with compute shaders from OpenGL, this should be nothing new to you.
|
||||
*/
|
||||
vkCmdDispatch(commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1);
|
||||
vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1);
|
||||
|
||||
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
|
||||
/*
|
||||
We create a fence.
|
||||
*/
|
||||
VkFenceCreateInfo fenceCreateInfo = {};
|
||||
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fenceCreateInfo.flags = 0;
|
||||
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &chain[chidx].fence));
|
||||
|
||||
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
|
||||
}
|
||||
VkFence fence;
|
||||
|
||||
void runCommandBuffer() {
|
||||
|
||||
void runCommandBuffer(int chidx) {
|
||||
/*
|
||||
Now we shall finally submit the recorded command buffer to a queue.
|
||||
*/
|
||||
|
@ -1329,18 +1303,22 @@ public:
|
|||
VkSubmitInfo submitInfo = {};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1; // submit a single command buffer
|
||||
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
|
||||
submitInfo.pCommandBuffers = &chain[chidx].commandBuffer; // the command buffer to submit.
|
||||
|
||||
// static bool b;
|
||||
//if(b)
|
||||
//VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
||||
//
|
||||
//b = 1;
|
||||
vkResetFences(device, 1, &fence);
|
||||
if(chain[chidx].running)
|
||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
|
||||
|
||||
vkResetFences(device, 1, &chain[chidx].fence);
|
||||
|
||||
/*
|
||||
We submit the command buffer on the queue, at the same time giving a fence.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, chain[chidx].fence));
|
||||
chain[chidx].running = true;
|
||||
/*
|
||||
The command will not have finished executing until the fence is signalled.
|
||||
So we wait here.
|
||||
|
@ -1348,9 +1326,101 @@ public:
|
|||
and we will not be sure that the command has finished executing unless we wait for the fence.
|
||||
Hence, we use a fence here.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
||||
//VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
||||
}
|
||||
void run() {
|
||||
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
|
||||
//bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
|
||||
|
||||
|
||||
// Initialize vulkan:
|
||||
createInstance();
|
||||
findPhysicalDevice();
|
||||
createDevice();
|
||||
|
||||
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
|
||||
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
|
||||
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
|
||||
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
|
||||
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
|
||||
uint64_t mod;
|
||||
uint32_t size, offset, pitch1, pitch2;
|
||||
int fd[CHAIN_SIZE];
|
||||
uint64_t modifiers[32];
|
||||
int count = getAvailiableModifiersList(modifiers, 32, VK_FORMAT_R16_UNORM);
|
||||
auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
|
||||
for(int i = 0; i < CHAIN_SIZE; i++)
|
||||
{
|
||||
createUBO(i);
|
||||
createImageDumbDmabuf2(chain[i].image0, chain[i].imageView0, chain[i].imageMemory0, chain[i].image1, chain[i].imageView1, chain[i].imageMemory1,
|
||||
fd[i], mod, size, offset, pitch1, pitch2);
|
||||
}
|
||||
|
||||
createDescriptorSetLayout();
|
||||
createDescriptorPool();
|
||||
createComputePipeline();
|
||||
createCommandPool();
|
||||
for(int i = 0; i < CHAIN_SIZE; i++)
|
||||
{
|
||||
prepareImage(i);
|
||||
createDescriptorSet(i);
|
||||
createCommandBuffer(i);
|
||||
}
|
||||
int frameNum = 0;
|
||||
|
||||
while(frameNum++ < 1000)
|
||||
{
|
||||
int chidx = frameNum & 3;
|
||||
// Finally, run the recorded command buffer.
|
||||
runCommandBuffer(chidx);
|
||||
//usleep(10000);
|
||||
recorder_frame4(r, chidx);
|
||||
chain[chidx].pMappedUBO->frameNum = frameNum;
|
||||
//usleep(10000);
|
||||
|
||||
}
|
||||
for(int i = 0; i < CHAIN_SIZE; i++)
|
||||
{
|
||||
if(chain[i].running)
|
||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[i].fence, VK_TRUE, 100000000000));
|
||||
}
|
||||
exit(0);
|
||||
|
||||
//vkDestroyFence(device, fence, NULL);
|
||||
//vkUnmapMemory(device, bufferMemory);
|
||||
|
||||
// The former command rendered a mandelbrot set to a buffer.
|
||||
// Save that buffer as a png on disk.
|
||||
//saveRenderedImage();
|
||||
|
||||
// Clean up all vulkan resources.
|
||||
cleanup();
|
||||
}
|
||||
#if 0
|
||||
void saveRenderedImage() {
|
||||
void* mappedMemory = NULL;
|
||||
#if 1
|
||||
// Map the buffer memory, so that we can read from it on the CPU.
|
||||
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
|
||||
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
|
||||
FILE *f = fopen("out.bin","wb");
|
||||
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
|
||||
fclose(f);
|
||||
|
||||
// Done reading, so unmap.
|
||||
vkUnmapMemory(device, imageMemory0);
|
||||
#else
|
||||
static char mem[WIDTH * HEIGHT*4];
|
||||
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
|
||||
// Now we save the acquired color data to a .png.
|
||||
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
|
||||
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
|
||||
FILE *f = fopen("out.bin","wb");
|
||||
fwrite(mem, 4, WIDTH * HEIGHT, f);
|
||||
fclose(f);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
void cleanup() {
|
||||
/*
|
||||
Clean up all Vulkan Resources.
|
||||
|
@ -1366,11 +1436,11 @@ public:
|
|||
func(instance, debugReportCallback, NULL);
|
||||
}
|
||||
|
||||
vkFreeMemory(device, bufferMemory, NULL);
|
||||
vkDestroyBuffer(device, buffer, NULL);
|
||||
vkFreeMemory(device, imageMemory0, NULL);
|
||||
vkDestroyImageView(device, imageView0, NULL);
|
||||
vkDestroyImage(device, image0, NULL);
|
||||
//vkFreeMemory(device, bufferMemory, NULL);
|
||||
//vkDestroyBuffer(device, buffer, NULL);
|
||||
//vkFreeMemory(device, imageMemory0, NULL);
|
||||
//vkDestroyImageView(device, imageView0, NULL);
|
||||
//vkDestroyImage(device, image0, NULL);
|
||||
vkDestroyShaderModule(device, computeShaderModule, NULL);
|
||||
vkDestroyDescriptorPool(device, descriptorPool, NULL);
|
||||
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL);
|
||||
|
|
Loading…
Reference in a new issue