vulkan-playground/vkcompute.cpp

1481 lines
60 KiB
C++

/*
* Copyright (c) 2017 Eric Arnebäck
* Copyright (c) 2024 mittorn
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <vulkan/vulkan.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <math.h>
#include <sys/mman.h>
#include <drm_fourcc.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <stdlib.h>
#include <unistd.h>
//#include "vaapi-recorder.h"
#include "vaapi_encoder_h264.h"
#include "vaapi_encoder_hevc.h"
struct DrmHelper
{
char *mapped_buffer;
size_t mapped_size;
int drm_fd = -1;
//unsigned int handle;
int buffer_fd = -1;
int pitch;
void Destroy()
{
if(mapped_buffer && mapped_size)
{
munmap(mapped_buffer, mapped_size);
mapped_buffer = NULL;
mapped_size = 0;
}
if(buffer_fd != -1)
close(buffer_fd);
buffer_fd = -1;
if(drm_fd != -1)
{
/* if(handle)
{
drm_mode_destroy_dumb req = {};
req.handle = handle;
ioctl(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &req);
}
handle = 0;
*/
close(drm_fd);
drm_fd = -1;
}
}
int Allocate(unsigned int width, unsigned int height)
{
unsigned int handle;
Destroy();
// todo: autodetect drm devices, allow device selection
drm_fd = open("/dev/dri/card0", O_RDWR);
drm_mode_create_dumb buffer = { 0 };
buffer.width = width;
buffer.height = height;
buffer.handle = 0;
buffer.bpp = 32; //Bits per pixel
buffer.flags = 0;
int ret = ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &buffer);
pitch = buffer.pitch;
drm_prime_handle prime = {};
prime.handle = buffer.handle;
if(ret >= 0)
{
handle = buffer.handle;
prime.flags = DRM_RDWR;
ret = ioctl(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime);
mapped_size = width * height * 4;
if(ret >= 0)
{
mapped_buffer = (char*)mmap(NULL, mapped_size , PROT_WRITE, MAP_SHARED, prime.fd, 0);
buffer_fd = prime.fd;
}
else
printf("DRM_IOCTL_PRIME_HANDLE_TO_FD failed\n");
}
else
printf("DRM_IOCTL_MODE_CREATE_DUMB failed\n");
if((void*)mapped_buffer == MAP_FAILED || !mapped_buffer)
{
printf("DRI3: not availiable\n");
mapped_buffer = NULL;
Destroy();
}
// else
{
drm_mode_destroy_dumb req = {};
req.handle = handle;
ioctl(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &req);
}
return buffer_fd;
}
};
DrmHelper gDrm;
const int WIDTH = 1920; // Size of rendered mandelbrot set.
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
/// TODO: why it even should depend on NDEBUG???
#ifdef NDEBUG
const bool enableValidationLayers = false;
#else
const bool enableValidationLayers = true;
#endif
// Used for validating return values of Vulkan API calls.
#define VK_CHECK_RESULT(f) \
{ \
VkResult res = (f); \
if (res != VK_SUCCESS) \
{ \
printf("Fatal : VkResult is %d in %s at line %d\n", res, __FILE__, __LINE__); \
assert(res == VK_SUCCESS); \
} \
}
#define CHAIN_SIZE 4
/*
The application launches a compute shader that renders the mandelbrot set,
by rendering it into a storage buffer.
The storage buffer is then read from the GPU, and saved as .png.
*/
struct ComputeApplication {
// The pixels of the rendered mandelbrot set are in this format:
struct Pixel {
float r, g, b, a;
};
/*
In order to use Vulkan, you must create an instance.
*/
VkInstance instance;
VkDebugReportCallbackEXT debugReportCallback;
/*
The physical device is some device on the system that supports usage of Vulkan.
Often, it is simply a graphics card that supports Vulkan.
*/
VkPhysicalDevice physicalDevice;
/*
Then we have the logical device VkDevice, which basically allows
us to interact with the physical device.
*/
VkDevice device;
/*
The pipeline specifies the pipeline that all graphics and compute commands pass though in Vulkan.
We will be creating a simple compute pipeline in this application.
*/
VkPipeline pipeline;
VkPipelineLayout pipelineLayout;
VkShaderModule computeShaderModule;
/*
The command buffer is used to record commands, that will be submitted to a queue.
To allocate such command buffers, we use a command pool.
*/
VkCommandPool commandPool;
/*
Descriptors represent resources in shaders. They allow us to use things like
uniform buffers, storage buffers and images in GLSL.
A single descriptor represents a single resource, and several descriptors are organized
into descriptor sets, which are basically just collections of descriptors.
*/
VkDescriptorPool descriptorPool;
VkDescriptorSetLayout descriptorSetLayout;
VkCommandBuffer commandBuffer;
struct UBO{
float frameNum;
};
struct FrameContext
{
VkDescriptorSet descriptorSet;
VkBuffer ubo;
VkDeviceMemory uboMemory;
VkImage image0;
VkDeviceMemory imageMemory0;
VkImageView imageView0;
VkImage image1;
// todo: single memory block?
VkDeviceMemory imageMemory1;
VkImageView imageView1;
VkCommandBuffer commandBuffer;
UBO *pMappedUBO = NULL;
VkFence fence;
bool running = false;
} chain[CHAIN_SIZE];
/*
The mandelbrot set will be rendered to this buffer.
The memory that backs the buffer is bufferMemory.
*/
//uint32_t bufferSize; // size of `buffer` in bytes.
const char * enabledLayers[16];
size_t enabledLayersCount = 0;
/*
In order to execute commands on a device(GPU), the commands must be submitted
to a queue. The commands are stored in a command buffer, and this command buffer
is given to the queue.
There will be different kinds of queues on the device. Not all queues support
graphics operations, for instance. For this application, we at least want a queue
that supports compute operations.
*/
VkQueue queue; // a queue supporting compute operations.
/*
Groups of queues that have the same capabilities(for instance, they all supports graphics and computer operations),
are grouped into queue families.
When submitting a command buffer, you must specify to which queue in the family you are submitting to.
This variable keeps track of the index of that queue in its family.
*/
uint32_t queueFamilyIndex;
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
VkDebugReportFlagsEXT flags,
VkDebugReportObjectTypeEXT objectType,
uint64_t object,
size_t location,
int32_t messageCode,
const char* pLayerPrefix,
const char* pMessage,
void* pUserData) {
printf("Debug Report: %s: %s\n", pLayerPrefix, pMessage);
//_exit(1);
return VK_FALSE;
}
void createInstance() {
const char * enabledExtensions[16];
uint32_t enabledExtensionsCount = 0;
/*
By enabling validation layers, Vulkan will emit warnings if the API
is used incorrectly. We shall enable the layer VK_LAYER_LUNARG_standard_validation,
which is basically a collection of several useful validation layers.
*/
if (enableValidationLayers) {
/*
We get all supported layers with vkEnumerateInstanceLayerProperties.
*/
uint32_t layerCount;
vkEnumerateInstanceLayerProperties(&layerCount, NULL);
VkLayerProperties layerProperties[layerCount];
vkEnumerateInstanceLayerProperties(&layerCount, layerProperties);
/*
And then we simply check if VK_LAYER_LUNARG_standard_validation is among the supported layers.
*/
bool foundLayer = false;
for (VkLayerProperties prop : layerProperties) {
if (strcmp("VK_LAYER_KHRONOS_validation", prop.layerName) == 0) {
foundLayer = true;
break;
}
}
if (!foundLayer) {
printf("Layer VK_LAYER_LUNARG_standard_validation not supported\n");
}
else
enabledLayers[enabledLayersCount++] = "VK_LAYER_KHRONOS_validation"; // Alright, we can use this layer.
/*
We need to enable an extension named VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
in order to be able to print the warnings emitted by the validation layer.
So again, we just check if the extension is among the supported extensions.
*/
uint32_t extensionCount;
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, NULL);
VkExtensionProperties extensionProperties[extensionCount];
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, extensionProperties);
bool foundExtension = false;
for (VkExtensionProperties prop : extensionProperties) {
if (strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, prop.extensionName) == 0) {
foundExtension = true;
break;
}
}
if (!foundExtension) {
printf("Extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME not supported\n");
}
else enabledExtensions[enabledExtensionsCount++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME;
}
/*
Next, we actually create the instance.
*/
/*
Contains application info. This is actually not that important.
The only real important field is apiVersion.
*/
VkApplicationInfo applicationInfo = {};
applicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
applicationInfo.pApplicationName = "vkComputeQueueTest";
applicationInfo.applicationVersion = 0;
applicationInfo.pEngineName = "streamingengine";
applicationInfo.engineVersion = 0;
applicationInfo.apiVersion = VK_API_VERSION_1_1;;
VkInstanceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.flags = 0;
createInfo.pApplicationInfo = &applicationInfo;
// Give our desired layers and extensions to vulkan.
createInfo.enabledLayerCount = enabledLayersCount;
createInfo.ppEnabledLayerNames = enabledLayers;
createInfo.enabledExtensionCount = enabledExtensionsCount;
createInfo.ppEnabledExtensionNames = enabledExtensions;
/*
Actually create the instance.
Having created the instance, we can actually start using vulkan.
*/
VK_CHECK_RESULT(vkCreateInstance(
&createInfo,
NULL,
&instance));
/*
Register a callback function for the extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME, so that warnings emitted from the validation
layer are actually printed.
*/
if (enableValidationLayers) {
VkDebugReportCallbackCreateInfoEXT createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT;
createInfo.pfnCallback = &debugReportCallbackFn;
// We have to explicitly load this function.
auto vkCreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT");
if (vkCreateDebugReportCallbackEXT == nullptr) {
printf("Could not load vkCreateDebugReportCallbackEXT\n");
return;
}
// Create and register callback.
VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &createInfo, NULL, &debugReportCallback));
}
}
void findPhysicalDevice() {
/*
In this function, we find a physical device that can be used with Vulkan.
*/
/*
So, first we will list all physical devices on the system with vkEnumeratePhysicalDevices .
*/
uint32_t deviceCount;
vkEnumeratePhysicalDevices(instance, &deviceCount, NULL);
if (deviceCount == 0) {
printf("could not find a device with vulkan support\n");
return;
}
VkPhysicalDevice devices[deviceCount];
vkEnumeratePhysicalDevices(instance, &deviceCount, devices);
/*
Next, we choose a device that can be used for our purposes.
With VkPhysicalDeviceFeatures(), we can retrieve a fine-grained list of physical features supported by the device.
However, in this demo, we are simply launching a simple compute shader, and there are no
special physical features demanded for this task.
With VkPhysicalDeviceProperties(), we can obtain a list of physical device properties. Most importantly,
we obtain a list of physical device limitations. For this application, we launch a compute shader,
and the maximum size of the workgroups and total number of compute shader invocations is limited by the physical device,
and we should ensure that the limitations named maxComputeWorkGroupCount, maxComputeWorkGroupInvocations and
maxComputeWorkGroupSize are not exceeded by our application. Moreover, we are using a storage buffer in the compute shader,
and we should ensure that it is not larger than the device can handle, by checking the limitation maxStorageBufferRange.
However, in our application, the workgroup size and total number of shader invocations is relatively small, and the storage buffer is
not that large, and thus a vast majority of devices will be able to handle it. This can be verified by looking at some devices at_
http://vulkan.gpuinfo.org/
Therefore, to keep things simple and clean, we will not perform any such checks here, and just pick the first physical
device in the list. But in a real and serious application, those limitations should certainly be taken into account.
*/
for (VkPhysicalDevice device : devices) {
if (true) { // As above stated, we do no feature checks, so just accept.
physicalDevice = device;
break;
}
}
}
// Returns the index of a queue family that supports compute operations.
uint32_t getComputeQueueFamilyIndex() {
uint32_t queueFamilyCount;
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL);
// Retrieve all queue families.
VkQueueFamilyProperties queueFamilies[queueFamilyCount];
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilies);
// Now find a family that supports compute.
uint32_t i = 0;
for (; i < queueFamilyCount; ++i) {
VkQueueFamilyProperties props = queueFamilies[i];
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
// found a queue with compute. We're done!
break;
}
}
if (i == queueFamilyCount) {
printf("could not find a queue family that supports operations\n");
return -1;
}
return i;
}
void createDevice() {
/*
We create the logical device in this function.
*/
/*
When creating the device, we also specify what queues it has.
*/
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueFamilyIndex = getComputeQueueFamilyIndex(); // find queue family with compute capability.
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
queueCreateInfo.queueCount = 1; // create one queue in this family. We don't need more.
float queuePriorities = 1.0; // we only have one queue, so this is not that imporant.
queueCreateInfo.pQueuePriorities = &queuePriorities;
/*
Now we create the logical device. The logical device allows us to interact with the physical
device.
*/
VkDeviceCreateInfo deviceCreateInfo = {};
// Specify any desired device features here. We do not need any for this application, though.
VkPhysicalDeviceFeatures deviceFeatures = {};
const char *deviceExtensions[16];
uint32_t deviceExtensionsCount = 0;
deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME;
//deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME;
deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME;
deviceExtensions[deviceExtensionsCount++] = VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME;
deviceExtensions[deviceExtensionsCount++] = VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME;
deviceExtensions[deviceExtensionsCount++] = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME;
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.enabledLayerCount = enabledLayersCount; // need to specify validation layers here as well.
deviceCreateInfo.ppEnabledLayerNames = enabledLayers;
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; // when creating the logical device, we also specify what queues it has.
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.enabledExtensionCount = deviceExtensionsCount;
deviceCreateInfo.ppEnabledExtensionNames = deviceExtensions;
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, NULL, &device)); // create logical device.
// Get a handle to the only member of the queue family.
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
}
// find memory type with desired properties.
uint32_t findMemoryType(uint32_t memoryTypeBits, VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memoryProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties);
/*
How does this search work?
See the documentation of VkPhysicalDeviceMemoryProperties for a detailed description.
*/
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
if ((memoryTypeBits & (1 << i)) &&
((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties))
return i;
}
return -1;
}
void createUBO(int chidx) {
/*
We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later.
*/
VkBufferCreateInfo bufferCreateInfo = {};
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferCreateInfo.size = sizeof(UBO); // buffer size in bytes.
bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; // buffer is used as a storage buffer.
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time.
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &chain[chidx].ubo)); // create buffer.
/*
But the buffer doesn't allocate memory for itself, so we must do that manually.
*/
/*
First, we find the memory requirements for the buffer.
*/
VkMemoryRequirements memoryRequirements;
vkGetBufferMemoryRequirements(device, chain[chidx].ubo, &memoryRequirements);
/*
Now use obtained memory requirements info to allocate the memory for the buffer.
*/
VkMemoryAllocateInfo allocateInfo = {};
allocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
/*
There are several types of memory that can be allocated, and we must choose a memory type that:
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
this flag.
*/
allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &chain[chidx].uboMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindBufferMemory(device, chain[chidx].ubo, chain[chidx].uboMemory, 0));
vkMapMemory(device, chain[chidx].uboMemory, 0, sizeof(UBO), 0, (void**)&chain[chidx].pMappedUBO);
}
int getAvailiableModifiersList(uint64_t *modifiers2, size_t len, VkFormat fmt)
{
VkDrmFormatModifierPropertiesEXT modifiers[len];
VkDrmFormatModifierPropertiesListEXT formatList = {VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT};
VkFormatProperties2 prop = {VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
prop.pNext = &formatList;
formatList.drmFormatModifierCount = len;
formatList.pDrmFormatModifierProperties = modifiers;
int count = 0;
vkGetPhysicalDeviceFormatProperties2(physicalDevice, fmt, &prop);
for(int i = 0; i < formatList.drmFormatModifierCount; i++)
{
modifiers2[count++] = modifiers[i].drmFormatModifier;
printf("mod %llx %d %d\n", modifiers[i].drmFormatModifier, modifiers[i].drmFormatModifierPlaneCount, (int)modifiers[i].drmFormatModifierTilingFeatures);
}
return count;
}
// create and import dmabuf
void createImageDumbDmabuf(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
/*
We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later.
*/
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
// todo: do we need SAMPLED?
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
// external image stuff
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
imageCreateInfo.pNext = &extInfo;
VkImageDrmFormatModifierExplicitCreateInfoEXT drmModInfo = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT};
int drmfd = gDrm.Allocate(WIDTH, HEIGHT);
VkSubresourceLayout layout;
layout.arrayPitch = 0;
layout.depthPitch = 0;
layout.offset = 0;
layout.size = 0;
layout.rowPitch = gDrm.pitch;
//layout.
drmModInfo.drmFormatModifierPlaneCount = 1;
drmModInfo.drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
drmModInfo.pPlaneLayouts = &layout;
extInfo.pNext = &drmModInfo;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
/*
But the buffer doesn't allocate memory for itself, so we must do that manually.
*/
/*
First, we find the memory requirements for the buffer.
*/
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
importInfo.fd = drmfd;
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedAllocInfo.image = image;
importInfo.pNext = &dedicatedAllocInfo;
VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, drmfd, &fdProps);
/*
Now use obtained memory requirements info to allocate the memory for the buffer.
*/
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
allocateInfo.pNext = &importInfo;
/*
There are several types of memory that can be allocated, and we must choose a memory type that:
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
this flag.
*/
allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
view.image = image;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = VK_FORMAT_R8G8B8A8_UNORM;
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
}
// create and import dmabuf
void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, VkImage &image1, VkImageView &imageView1, VkDeviceMemory &imageMemory1, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2, bool p010) {
/*
We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later.
*/
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM;
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
// todo: do we need SAMPLED?
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
// external image stuff
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
imageCreateInfo.pNext = &extInfo;
VkImageDrmFormatModifierExplicitCreateInfoEXT drmModInfo = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT};
VkSubresourceLayout layout;
layout.arrayPitch = 0;
layout.depthPitch = 0;
layout.offset = 0;
layout.size = 0;
layout.rowPitch = pitch1;
//layout.
drmModInfo.drmFormatModifierPlaneCount = 1;
drmModInfo.drmFormatModifier = mod;
drmModInfo.pPlaneLayouts = &layout;
extInfo.pNext = &drmModInfo;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
imageCreateInfo.format = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM;
imageCreateInfo.extent = { WIDTH/2, HEIGHT/2, 1 };
layout.offset = 0;//2088960;
layout.rowPitch = pitch2;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image1)); // create image.
/*
But the buffer doesn't allocate memory for itself, so we must do that manually.
*/
/*
First, we find the memory requirements for the buffer.
*/
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
importInfo.fd = fd;
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedAllocInfo.image = image;
//importInfo.pNext = &dedicatedAllocInfo;
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo2 = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedAllocInfo2.image = image1;
//dedicatedAllocInfo.pNext = &dedicatedAllocInfo2;
VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fdProps);
/*
Now use obtained memory requirements info to allocate the memory for the buffer.
*/
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
allocateInfo.allocationSize = size;//memoryRequirements.size; // specify required memory.
allocateInfo.pNext = &importInfo;
/*
There are several types of memory that can be allocated, and we must choose a memory type that:
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
this flag.
*/
allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
VK_CHECK_RESULT(vkBindImageMemory(device, image1, imageMemory, offset));
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
view.image = image;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM;
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
view.format = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM;
view.image = image1;
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
}
#if 0
// create and import dmabuf as opaque fd, allows any tiling
void createImageDumbOpaque(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
/*
We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later.
*/
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
// todo: do we need SAMPLED?
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
// external image stuff
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
imageCreateInfo.pNext = &extInfo;
int drmfd = gDrm.Allocate(WIDTH, HEIGHT);
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
/*
But the buffer doesn't allocate memory for itself, so we must do that manually.
*/
/*
First, we find the memory requirements for the buffer.
*/
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
importInfo.fd = drmfd;
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedAllocInfo.image = image;
importInfo.pNext = &dedicatedAllocInfo;
//VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
//PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
//vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, drmfd, &fdProps);
/*
Now use obtained memory requirements info to allocate the memory for the buffer.
*/
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
allocateInfo.pNext = &importInfo;
/*
There are several types of memory that can be allocated, and we must choose a memory type that:
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
this flag.
*/
allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits /*& fdProps.memoryTypeBits*/, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
view.image = image;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = VK_FORMAT_R8G8B8A8_UNORM;
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
}
int prime_fd, prime_fd_uv;
// create and import dmabuf as opaque fd, allows any tiling
// create and import dmabuf
void createImageExportableDmabuf(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, int &fd, int width, int height, VkFormat format) {
/*
We will now create a buffer. We will render the mandelbrot set into this buffer
in a computer shade later.
*/
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = format;
imageCreateInfo.extent = { (unsigned int)width, (unsigned int)height, 1 };
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = 1;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
// todo: do we need SAMPLED?
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
// external image stuff
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
imageCreateInfo.pNext = &extInfo;
VkImageDrmFormatModifierListCreateInfoEXT modifierList = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT};
uint64_t modifiers2[32];
modifierList.drmFormatModifierCount = getAvailiableModifiersList(modifiers2, 32, format);
modifierList.pDrmFormatModifiers = modifiers2;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
imageCreateInfo.format = VK_FORMAT_R8G8_UNORM;
imageCreateInfo.extent = { WIDTH/2, HEIGHT/2, 1 };
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image1)); // create image.
/*
But the buffer doesn't allocate memory for itself, so we must do that manually.
*/
/*
First, we find the memory requirements for the buffer.
*/
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
VkExportMemoryAllocateInfo exportInfo = {VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedAllocInfo.image = image;
dedicatedAllocInfo.pNext = &exportInfo;
/*
Now use obtained memory requirements info to allocate the memory for the buffer.
*/
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
allocateInfo.allocationSize = 3133440;//memoryRequirements.size; // specify required memory.
// dedicated info unneeded?
allocateInfo.pNext = &exportInfo;//&dedicatedAllocInfo;
/*
There are several types of memory that can be allocated, and we must choose a memory type that:
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
this flag.
*/
allocateInfo.memoryTypeIndex = findMemoryType(
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); // VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);//
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
VK_CHECK_RESULT(vkBindImageMemory(device, image1, imageMemory, 2088960));
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
view.image = image;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = format;
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
view.format = VK_FORMAT_R8G8_UNORM;
view.image = image1;
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
VkMemoryGetFdInfoKHR getFdInfo = { VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR};
getFdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
getFdInfo.memory = imageMemory;
PFN_vkGetMemoryFdKHR pfnvkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdKHR");
pfnvkGetMemoryFdKHR(device, &getFdInfo, &fd);
VkImageDrmFormatModifierPropertiesEXT imageModifiers = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT};
PFN_vkGetImageDrmFormatModifierPropertiesEXT pfnvkGetImageDrmFormatModifierPropertiesEXT =(PFN_vkGetImageDrmFormatModifierPropertiesEXT)vkGetInstanceProcAddr(instance, "vkGetImageDrmFormatModifierPropertiesEXT");
pfnvkGetImageDrmFormatModifierPropertiesEXT(device, image, &imageModifiers);
printf("imageModifier %llx\n", imageModifiers.drmFormatModifier);
// todo: get subresource plane info (vkGetImageSubresourceLayout)
}
#endif
void createDescriptorSetLayout() {
/*
Here we specify a descriptor set layout. This allows us to bind our descriptors to
resources in the shader.
*/
/*
Here we specify a binding of type VK_DESCRIPTOR_TYPE_STORAGE_BUFFER to the binding point
0. This binds to
layout(std140, binding = 0) buffer buf
in the compute shader.
*/
VkDescriptorSetLayoutBinding descriptorSetLayoutBinding[3] = {};
descriptorSetLayoutBinding[0].binding = 0; // binding = 0
descriptorSetLayoutBinding[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptorSetLayoutBinding[0].descriptorCount = 1;
descriptorSetLayoutBinding[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
descriptorSetLayoutBinding[1].binding = 1; // binding = 0
descriptorSetLayoutBinding[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptorSetLayoutBinding[1].descriptorCount = 1;
descriptorSetLayoutBinding[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
descriptorSetLayoutBinding[2].binding = 2; // binding = 1
descriptorSetLayoutBinding[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorSetLayoutBinding[2].descriptorCount = 1;
descriptorSetLayoutBinding[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {};
descriptorSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
descriptorSetLayoutCreateInfo.bindingCount = 3; // only a single binding in this descriptor set layout.
descriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBinding;
// Create the descriptor set layout.
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout));
}
void createDescriptorPool()
{
/*
So we will allocate a descriptor set here.
But we need to first create a descriptor pool to do that.
*/
/*
Our descriptor pool can only allocate a single storage buffer.
*/
VkDescriptorPoolSize descriptorPoolSize[2] = {};
descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
descriptorPoolSize[0].descriptorCount = 2*CHAIN_SIZE;
descriptorPoolSize[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorPoolSize[1].descriptorCount = 1*CHAIN_SIZE;
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptorPoolCreateInfo.maxSets = CHAIN_SIZE; // we only need to allocate one descriptor set from the pool.
descriptorPoolCreateInfo.poolSizeCount = 2;
descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize;
// create descriptor pool.
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool));
}
void createDescriptorSet(int chidx) {
/*
With the pool allocated, we can now allocate the descriptor set.
*/
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {};
descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptorSetAllocateInfo.descriptorPool = descriptorPool; // pool to allocate from.
descriptorSetAllocateInfo.descriptorSetCount = 1; // allocate a single descriptor set.
descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout;
// allocate descriptor set.
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &chain[chidx].descriptorSet));
/*
Next, we need to connect our actual storage buffer with the descrptor.
We use vkUpdateDescriptorSets() to update the descriptor set.
*/
// Specify the buffer to bind to the descriptor.
VkDescriptorImageInfo descriptorImageInfo[2] = {};
descriptorImageInfo[0].imageView = chain[chidx].imageView0;
descriptorImageInfo[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
descriptorImageInfo[1].imageView = chain[chidx].imageView1;
descriptorImageInfo[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
VkWriteDescriptorSet writeDescriptorSet[2] = {};
writeDescriptorSet[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet[0].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
writeDescriptorSet[0].dstBinding = 0; // write to the first, and only binding.
writeDescriptorSet[0].descriptorCount = 1; // update a single descriptor.
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
writeDescriptorSet[0].pImageInfo = &descriptorImageInfo[0];
writeDescriptorSet[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet[1].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
writeDescriptorSet[1].dstBinding = 1; // write to the first, and only binding.
writeDescriptorSet[1].descriptorCount = 1; // update a single descriptor.
writeDescriptorSet[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
writeDescriptorSet[1].pImageInfo = &descriptorImageInfo[1];
VkDescriptorBufferInfo descriptorBufferInfo = {};
descriptorBufferInfo.buffer = chain[chidx].ubo;
descriptorBufferInfo.offset = 0;
descriptorBufferInfo.range = sizeof(UBO);
// perform the update of the descriptor set.
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, NULL);
writeDescriptorSet[0].dstBinding = 2;
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
writeDescriptorSet[0].pBufferInfo = &descriptorBufferInfo;
writeDescriptorSet[0].pImageInfo = NULL;
vkUpdateDescriptorSets(device, 1, writeDescriptorSet, 0, NULL);
}
// Read file into array of bytes, and cast to uint32_t*, then return.
// The data has been padded, so that it fits into an array uint32_t.
uint32_t* readFile(uint32_t& length, const char* filename) {
FILE* fp = fopen(filename, "rb");
if (fp == NULL) {
printf("Could not find or open file: %s\n", filename);
}
// get file size.
fseek(fp, 0, SEEK_END);
long filesize = ftell(fp);
fseek(fp, 0, SEEK_SET);
long filesizepadded = long(ceil(filesize / 4.0)) * 4;
// read file contents.
char *str = (char*)malloc(filesizepadded);
fread(str, filesize, sizeof(char), fp);
fclose(fp);
// data padding.
for (int i = filesize; i < filesizepadded; i++) {
str[i] = 0;
}
length = filesizepadded;
return (uint32_t *)str;
}
void createComputePipeline() {
/*
We create a compute pipeline here.
*/
/*
Create a shader module. A shader module basically just encapsulates some shader code.
*/
uint32_t filelength;
// the code in comp.spv was created by running the command:
// glslangValidator.exe -V shader.comp
uint32_t* code = readFile(filelength, "image.spv");
VkShaderModuleCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
createInfo.pCode = code;
createInfo.codeSize = filelength;
VK_CHECK_RESULT(vkCreateShaderModule(device, &createInfo, NULL, &computeShaderModule));
free(code);
/*
Now let us actually create the compute pipeline.
A compute pipeline is very simple compared to a graphics pipeline.
It only consists of a single stage with a compute shader.
So first we specify the compute shader stage, and it's entry point(main).
*/
VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {};
shaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
shaderStageCreateInfo.module = computeShaderModule;
shaderStageCreateInfo.pName = "main";
/*
The pipeline layout allows the pipeline to access descriptor sets.
So we just specify the descriptor set layout we created earlier.
*/
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {};
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelineLayoutCreateInfo.setLayoutCount = 1;
pipelineLayoutCreateInfo.pSetLayouts = &descriptorSetLayout;
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, NULL, &pipelineLayout));
VkComputePipelineCreateInfo pipelineCreateInfo = {};
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pipelineCreateInfo.stage = shaderStageCreateInfo;
pipelineCreateInfo.layout = pipelineLayout;
/*
Now, we finally create the compute pipeline.
*/
VK_CHECK_RESULT(vkCreateComputePipelines(
device, VK_NULL_HANDLE,
1, &pipelineCreateInfo,
NULL, &pipeline));
}
void createCommandPool()
{
/*
We are getting closer to the end. In order to send commands to the device(GPU),
we must first record commands into a command buffer.
To allocate a command buffer, we must first create a command pool. So let us do that.
*/
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
// the queue family of this command pool. All command buffers allocated from this command pool,
// must be submitted to queues of this family ONLY.
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool));
/*
Now allocate a command buffer from the command pool.
*/
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
// if the command buffer is primary, it can be directly submitted to queues.
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
// submitted to a queue. To keep things simple, we use a primary command buffer.
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer.
}
void prepareImage(int chidx)
{
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands.
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
VkImageMemoryBarrier imageMemoryBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
imageMemoryBarrier.image = chain[chidx].image0;
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
// imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
// imageMemoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
imageMemoryBarrier.image = chain[chidx].image1;
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
VkFence fence;
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1; // submit a single command buffer
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
vkDestroyFence(device, fence, NULL);
vkResetCommandBuffer(commandBuffer, 0);
}
void createCommandBuffer(int chidx) {
/*
Now allocate a command buffer from the command pool.
*/
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
// if the command buffer is primary, it can be directly submitted to queues.
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
// submitted to a queue. To keep things simple, we use a primary command buffer.
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &chain[chidx].commandBuffer)); // allocate command buffer.
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
VK_CHECK_RESULT(vkBeginCommandBuffer(chain[chidx].commandBuffer, &beginInfo)); // start recording commands.
/*
We need to bind a pipeline, AND a descriptor set before we dispatch.
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
*/
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
/*
Now we shall start recording commands into the newly allocated command buffer.
*/
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
/*
Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader.
The number of workgroups is specified in the arguments.
If you are already familiar with compute shaders from OpenGL, this should be nothing new to you.
*/
vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1);
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
/*
We create a fence.
*/
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &chain[chidx].fence));
}
void waitFence(int chidx)
{
if(chain[chidx].running)
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
chain[chidx].running = false;
}
void runCommandBuffer(int chidx) {
/*
Now we shall finally submit the recorded command buffer to a queue.
*/
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1; // submit a single command buffer
submitInfo.pCommandBuffers = &chain[chidx].commandBuffer; // the command buffer to submit.
waitFence(chidx);
vkResetFences(device, 1, &chain[chidx].fence);
/*
We submit the command buffer on the queue, at the same time giving a fence.
*/
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, chain[chidx].fence));
chain[chidx].running = true;
/*
The command will not have finished executing until the fence is signalled.
So we wait here.
We will directly after this read our buffer from the GPU,
and we will not be sure that the command has finished executing unless we wait for the fence.
Hence, we use a fence here.
*/
//VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
}
template <typename Codec>
void run(bool p010, const char *filename) {
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
//bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
// Initialize vulkan:
createInstance();
findPhysicalDevice();
createDevice();
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
uint64_t mod;
uint32_t size, offset, pitch1, pitch2;
int fd[CHAIN_SIZE];
uint64_t modifiers[32];
int count = getAvailiableModifiersList(modifiers, 32, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
//auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
Codec enc = {};
enc.Setup(drm_fd, WIDTH, HEIGHT, filename, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
for(int i = 0; i < CHAIN_SIZE; i++)
{
createUBO(i);
createImageDumbDmabuf2(chain[i].image0, chain[i].imageView0, chain[i].imageMemory0, chain[i].image1, chain[i].imageView1, chain[i].imageMemory1,
fd[i], mod, size, offset, pitch1, pitch2, p010);
}
createDescriptorSetLayout();
createDescriptorPool();
createComputePipeline();
createCommandPool();
for(int i = 0; i < CHAIN_SIZE; i++)
{
prepareImage(i);
createDescriptorSet(i);
createCommandBuffer(i);
}
int frameNum = 0;
while(frameNum++ < 1000)
{
int chidx = frameNum & 3;
// Finally, run the recorded command buffer.
runCommandBuffer(chidx);
#ifndef SKIP_FENCE_SYNC
waitFence(chidx);
#endif
//recorder_frame4(r, chidx);
if(frameNum == 1)
enc.EncodeIDR(chidx);
else
enc.EncodeP(chidx);
chain[chidx].pMappedUBO->frameNum = frameNum;
}
for(int i = 0; i < CHAIN_SIZE; i++)
{
waitFence(i);
vkDestroyFence(device, chain[i].fence, NULL);
vkUnmapMemory(device, chain[i].uboMemory);
vkFreeMemory(device, chain[i].uboMemory, NULL);
vkDestroyBuffer(device, chain[i].ubo, NULL);
vkFreeMemory(device, chain[i].imageMemory0, NULL);
//vkFreeMemory(device, chain[i].imageMemory1, NULL);
vkDestroyImageView(device, chain[i].imageView0, NULL);
vkDestroyImage(device, chain[i].image0, NULL);
vkDestroyImageView(device, chain[i].imageView1, NULL);
vkDestroyImage(device, chain[i].image1, NULL);
}
// The former command rendered a mandelbrot set to a buffer.
// Save that buffer as a png on disk.
//saveRenderedImage();
// Clean up all vulkan resources.
cleanup();
}
#if 0
void saveRenderedImage() {
void* mappedMemory = NULL;
#if 1
// Map the buffer memory, so that we can read from it on the CPU.
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
FILE *f = fopen("out.bin","wb");
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
fclose(f);
// Done reading, so unmap.
vkUnmapMemory(device, imageMemory0);
#else
static char mem[WIDTH * HEIGHT*4];
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
// Now we save the acquired color data to a .png.
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
FILE *f = fopen("out.bin","wb");
fwrite(mem, 4, WIDTH * HEIGHT, f);
fclose(f);
#endif
}
#endif
void cleanup() {
/*
Clean up all Vulkan Resources.
*/
if (enableValidationLayers) {
// destroy callback.
auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT");
if (func == nullptr) {
printf("Could not load vkDestroyDebugReportCallbackEXT\n");
return;
}
func(instance, debugReportCallback, NULL);
}
//vkFreeMemory(device, bufferMemory, NULL);
//vkDestroyBuffer(device, buffer, NULL);
//vkFreeMemory(device, imageMemory0, NULL);
//vkDestroyImageView(device, imageView0, NULL);
//vkDestroyImage(device, image0, NULL);
vkDestroyShaderModule(device, computeShaderModule, NULL);
vkDestroyDescriptorPool(device, descriptorPool, NULL);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL);
vkDestroyPipelineLayout(device, pipelineLayout, NULL);
vkDestroyPipeline(device, pipeline, NULL);
vkDestroyCommandPool(device, commandPool, NULL);
vkDestroyDevice(device, NULL);
vkDestroyInstance(instance, NULL);
}
};
int main(int argc, char **argv) {
ComputeApplication app;
bool hevc = argc > 1;
bool p010 = false;
if(hevc)
p010 = atoi(argv[1]);
if(hevc)
app.run<VaapiEncoderHEVC>(p010, "out.265");
else
app.run<VaapiEncoderH264>(false, "out.264");
return EXIT_SUCCESS;
}