1481 lines
60 KiB
C++
1481 lines
60 KiB
C++
/*
|
|
* Copyright (c) 2017 Eric Arnebäck
|
|
* Copyright (c) 2024 mittorn
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <sys/mman.h>
|
|
#include <drm_fourcc.h>
|
|
#include <fcntl.h>
|
|
#include <sys/ioctl.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
//#include "vaapi-recorder.h"
|
|
#include "vaapi_encoder_h264.h"
|
|
#include "vaapi_encoder_hevc.h"
|
|
struct DrmHelper
|
|
{
|
|
char *mapped_buffer;
|
|
size_t mapped_size;
|
|
int drm_fd = -1;
|
|
//unsigned int handle;
|
|
int buffer_fd = -1;
|
|
int pitch;
|
|
void Destroy()
|
|
{
|
|
if(mapped_buffer && mapped_size)
|
|
{
|
|
munmap(mapped_buffer, mapped_size);
|
|
mapped_buffer = NULL;
|
|
mapped_size = 0;
|
|
}
|
|
if(buffer_fd != -1)
|
|
close(buffer_fd);
|
|
buffer_fd = -1;
|
|
if(drm_fd != -1)
|
|
{
|
|
/* if(handle)
|
|
{
|
|
drm_mode_destroy_dumb req = {};
|
|
req.handle = handle;
|
|
ioctl(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &req);
|
|
}
|
|
handle = 0;
|
|
*/
|
|
close(drm_fd);
|
|
drm_fd = -1;
|
|
}
|
|
}
|
|
int Allocate(unsigned int width, unsigned int height)
|
|
{
|
|
unsigned int handle;
|
|
Destroy();
|
|
// todo: autodetect drm devices, allow device selection
|
|
drm_fd = open("/dev/dri/card0", O_RDWR);
|
|
drm_mode_create_dumb buffer = { 0 };
|
|
buffer.width = width;
|
|
buffer.height = height;
|
|
buffer.handle = 0;
|
|
buffer.bpp = 32; //Bits per pixel
|
|
buffer.flags = 0;
|
|
int ret = ioctl(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &buffer);
|
|
pitch = buffer.pitch;
|
|
drm_prime_handle prime = {};
|
|
prime.handle = buffer.handle;
|
|
if(ret >= 0)
|
|
{
|
|
handle = buffer.handle;
|
|
prime.flags = DRM_RDWR;
|
|
ret = ioctl(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime);
|
|
mapped_size = width * height * 4;
|
|
if(ret >= 0)
|
|
{
|
|
mapped_buffer = (char*)mmap(NULL, mapped_size , PROT_WRITE, MAP_SHARED, prime.fd, 0);
|
|
buffer_fd = prime.fd;
|
|
}
|
|
else
|
|
printf("DRM_IOCTL_PRIME_HANDLE_TO_FD failed\n");
|
|
}
|
|
else
|
|
printf("DRM_IOCTL_MODE_CREATE_DUMB failed\n");
|
|
if((void*)mapped_buffer == MAP_FAILED || !mapped_buffer)
|
|
{
|
|
printf("DRI3: not availiable\n");
|
|
mapped_buffer = NULL;
|
|
Destroy();
|
|
}
|
|
// else
|
|
{
|
|
drm_mode_destroy_dumb req = {};
|
|
req.handle = handle;
|
|
ioctl(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &req);
|
|
}
|
|
return buffer_fd;
|
|
}
|
|
};
|
|
|
|
DrmHelper gDrm;
|
|
|
|
const int WIDTH = 1920; // Size of rendered mandelbrot set.
|
|
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
|
|
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
|
|
|
|
/// TODO: why it even should depend on NDEBUG???
|
|
#ifdef NDEBUG
|
|
const bool enableValidationLayers = false;
|
|
#else
|
|
const bool enableValidationLayers = true;
|
|
#endif
|
|
|
|
// Used for validating return values of Vulkan API calls.
|
|
#define VK_CHECK_RESULT(f) \
|
|
{ \
|
|
VkResult res = (f); \
|
|
if (res != VK_SUCCESS) \
|
|
{ \
|
|
printf("Fatal : VkResult is %d in %s at line %d\n", res, __FILE__, __LINE__); \
|
|
assert(res == VK_SUCCESS); \
|
|
} \
|
|
}
|
|
#define CHAIN_SIZE 4
|
|
/*
|
|
The application launches a compute shader that renders the mandelbrot set,
|
|
by rendering it into a storage buffer.
|
|
The storage buffer is then read from the GPU, and saved as .png.
|
|
*/
|
|
struct ComputeApplication {
|
|
// The pixels of the rendered mandelbrot set are in this format:
|
|
struct Pixel {
|
|
float r, g, b, a;
|
|
};
|
|
|
|
/*
|
|
In order to use Vulkan, you must create an instance.
|
|
*/
|
|
VkInstance instance;
|
|
|
|
VkDebugReportCallbackEXT debugReportCallback;
|
|
/*
|
|
The physical device is some device on the system that supports usage of Vulkan.
|
|
Often, it is simply a graphics card that supports Vulkan.
|
|
*/
|
|
VkPhysicalDevice physicalDevice;
|
|
/*
|
|
Then we have the logical device VkDevice, which basically allows
|
|
us to interact with the physical device.
|
|
*/
|
|
VkDevice device;
|
|
|
|
/*
|
|
The pipeline specifies the pipeline that all graphics and compute commands pass though in Vulkan.
|
|
|
|
We will be creating a simple compute pipeline in this application.
|
|
*/
|
|
VkPipeline pipeline;
|
|
VkPipelineLayout pipelineLayout;
|
|
VkShaderModule computeShaderModule;
|
|
|
|
/*
|
|
The command buffer is used to record commands, that will be submitted to a queue.
|
|
|
|
To allocate such command buffers, we use a command pool.
|
|
*/
|
|
VkCommandPool commandPool;
|
|
|
|
/*
|
|
|
|
Descriptors represent resources in shaders. They allow us to use things like
|
|
uniform buffers, storage buffers and images in GLSL.
|
|
|
|
A single descriptor represents a single resource, and several descriptors are organized
|
|
into descriptor sets, which are basically just collections of descriptors.
|
|
*/
|
|
VkDescriptorPool descriptorPool;
|
|
VkDescriptorSetLayout descriptorSetLayout;
|
|
VkCommandBuffer commandBuffer;
|
|
|
|
struct UBO{
|
|
float frameNum;
|
|
};
|
|
struct FrameContext
|
|
{
|
|
VkDescriptorSet descriptorSet;
|
|
VkBuffer ubo;
|
|
VkDeviceMemory uboMemory;
|
|
VkImage image0;
|
|
VkDeviceMemory imageMemory0;
|
|
VkImageView imageView0;
|
|
VkImage image1;
|
|
// todo: single memory block?
|
|
VkDeviceMemory imageMemory1;
|
|
VkImageView imageView1;
|
|
VkCommandBuffer commandBuffer;
|
|
UBO *pMappedUBO = NULL;
|
|
VkFence fence;
|
|
bool running = false;
|
|
|
|
} chain[CHAIN_SIZE];
|
|
|
|
/*
|
|
The mandelbrot set will be rendered to this buffer.
|
|
|
|
The memory that backs the buffer is bufferMemory.
|
|
*/
|
|
|
|
|
|
|
|
//uint32_t bufferSize; // size of `buffer` in bytes.
|
|
|
|
const char * enabledLayers[16];
|
|
size_t enabledLayersCount = 0;
|
|
|
|
/*
|
|
In order to execute commands on a device(GPU), the commands must be submitted
|
|
to a queue. The commands are stored in a command buffer, and this command buffer
|
|
is given to the queue.
|
|
|
|
There will be different kinds of queues on the device. Not all queues support
|
|
graphics operations, for instance. For this application, we at least want a queue
|
|
that supports compute operations.
|
|
*/
|
|
VkQueue queue; // a queue supporting compute operations.
|
|
|
|
/*
|
|
Groups of queues that have the same capabilities(for instance, they all supports graphics and computer operations),
|
|
are grouped into queue families.
|
|
|
|
When submitting a command buffer, you must specify to which queue in the family you are submitting to.
|
|
This variable keeps track of the index of that queue in its family.
|
|
*/
|
|
uint32_t queueFamilyIndex;
|
|
|
|
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
|
|
VkDebugReportFlagsEXT flags,
|
|
VkDebugReportObjectTypeEXT objectType,
|
|
uint64_t object,
|
|
size_t location,
|
|
int32_t messageCode,
|
|
const char* pLayerPrefix,
|
|
const char* pMessage,
|
|
void* pUserData) {
|
|
|
|
printf("Debug Report: %s: %s\n", pLayerPrefix, pMessage);
|
|
//_exit(1);
|
|
|
|
return VK_FALSE;
|
|
}
|
|
|
|
void createInstance() {
|
|
const char * enabledExtensions[16];
|
|
uint32_t enabledExtensionsCount = 0;
|
|
|
|
/*
|
|
By enabling validation layers, Vulkan will emit warnings if the API
|
|
is used incorrectly. We shall enable the layer VK_LAYER_LUNARG_standard_validation,
|
|
which is basically a collection of several useful validation layers.
|
|
*/
|
|
if (enableValidationLayers) {
|
|
/*
|
|
We get all supported layers with vkEnumerateInstanceLayerProperties.
|
|
*/
|
|
uint32_t layerCount;
|
|
vkEnumerateInstanceLayerProperties(&layerCount, NULL);
|
|
|
|
VkLayerProperties layerProperties[layerCount];
|
|
vkEnumerateInstanceLayerProperties(&layerCount, layerProperties);
|
|
|
|
/*
|
|
And then we simply check if VK_LAYER_LUNARG_standard_validation is among the supported layers.
|
|
*/
|
|
bool foundLayer = false;
|
|
for (VkLayerProperties prop : layerProperties) {
|
|
|
|
if (strcmp("VK_LAYER_KHRONOS_validation", prop.layerName) == 0) {
|
|
foundLayer = true;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
if (!foundLayer) {
|
|
printf("Layer VK_LAYER_LUNARG_standard_validation not supported\n");
|
|
}
|
|
else
|
|
enabledLayers[enabledLayersCount++] = "VK_LAYER_KHRONOS_validation"; // Alright, we can use this layer.
|
|
|
|
/*
|
|
We need to enable an extension named VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
|
|
in order to be able to print the warnings emitted by the validation layer.
|
|
|
|
So again, we just check if the extension is among the supported extensions.
|
|
*/
|
|
|
|
uint32_t extensionCount;
|
|
|
|
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, NULL);
|
|
VkExtensionProperties extensionProperties[extensionCount];
|
|
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, extensionProperties);
|
|
|
|
bool foundExtension = false;
|
|
for (VkExtensionProperties prop : extensionProperties) {
|
|
if (strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, prop.extensionName) == 0) {
|
|
foundExtension = true;
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
if (!foundExtension) {
|
|
printf("Extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME not supported\n");
|
|
}
|
|
else enabledExtensions[enabledExtensionsCount++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME;
|
|
}
|
|
|
|
/*
|
|
Next, we actually create the instance.
|
|
|
|
*/
|
|
|
|
/*
|
|
Contains application info. This is actually not that important.
|
|
The only real important field is apiVersion.
|
|
*/
|
|
VkApplicationInfo applicationInfo = {};
|
|
applicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
|
applicationInfo.pApplicationName = "vkComputeQueueTest";
|
|
applicationInfo.applicationVersion = 0;
|
|
applicationInfo.pEngineName = "streamingengine";
|
|
applicationInfo.engineVersion = 0;
|
|
applicationInfo.apiVersion = VK_API_VERSION_1_1;;
|
|
|
|
VkInstanceCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
|
createInfo.flags = 0;
|
|
createInfo.pApplicationInfo = &applicationInfo;
|
|
|
|
// Give our desired layers and extensions to vulkan.
|
|
createInfo.enabledLayerCount = enabledLayersCount;
|
|
createInfo.ppEnabledLayerNames = enabledLayers;
|
|
createInfo.enabledExtensionCount = enabledExtensionsCount;
|
|
createInfo.ppEnabledExtensionNames = enabledExtensions;
|
|
|
|
/*
|
|
Actually create the instance.
|
|
Having created the instance, we can actually start using vulkan.
|
|
*/
|
|
VK_CHECK_RESULT(vkCreateInstance(
|
|
&createInfo,
|
|
NULL,
|
|
&instance));
|
|
|
|
/*
|
|
Register a callback function for the extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME, so that warnings emitted from the validation
|
|
layer are actually printed.
|
|
*/
|
|
if (enableValidationLayers) {
|
|
VkDebugReportCallbackCreateInfoEXT createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
|
|
createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT;
|
|
createInfo.pfnCallback = &debugReportCallbackFn;
|
|
|
|
// We have to explicitly load this function.
|
|
auto vkCreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT");
|
|
if (vkCreateDebugReportCallbackEXT == nullptr) {
|
|
printf("Could not load vkCreateDebugReportCallbackEXT\n");
|
|
return;
|
|
}
|
|
|
|
// Create and register callback.
|
|
VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &createInfo, NULL, &debugReportCallback));
|
|
}
|
|
|
|
}
|
|
|
|
void findPhysicalDevice() {
|
|
/*
|
|
In this function, we find a physical device that can be used with Vulkan.
|
|
*/
|
|
|
|
/*
|
|
So, first we will list all physical devices on the system with vkEnumeratePhysicalDevices .
|
|
*/
|
|
uint32_t deviceCount;
|
|
vkEnumeratePhysicalDevices(instance, &deviceCount, NULL);
|
|
if (deviceCount == 0) {
|
|
printf("could not find a device with vulkan support\n");
|
|
return;
|
|
}
|
|
|
|
VkPhysicalDevice devices[deviceCount];
|
|
vkEnumeratePhysicalDevices(instance, &deviceCount, devices);
|
|
|
|
/*
|
|
Next, we choose a device that can be used for our purposes.
|
|
|
|
With VkPhysicalDeviceFeatures(), we can retrieve a fine-grained list of physical features supported by the device.
|
|
However, in this demo, we are simply launching a simple compute shader, and there are no
|
|
special physical features demanded for this task.
|
|
|
|
With VkPhysicalDeviceProperties(), we can obtain a list of physical device properties. Most importantly,
|
|
we obtain a list of physical device limitations. For this application, we launch a compute shader,
|
|
and the maximum size of the workgroups and total number of compute shader invocations is limited by the physical device,
|
|
and we should ensure that the limitations named maxComputeWorkGroupCount, maxComputeWorkGroupInvocations and
|
|
maxComputeWorkGroupSize are not exceeded by our application. Moreover, we are using a storage buffer in the compute shader,
|
|
and we should ensure that it is not larger than the device can handle, by checking the limitation maxStorageBufferRange.
|
|
|
|
However, in our application, the workgroup size and total number of shader invocations is relatively small, and the storage buffer is
|
|
not that large, and thus a vast majority of devices will be able to handle it. This can be verified by looking at some devices at_
|
|
http://vulkan.gpuinfo.org/
|
|
|
|
Therefore, to keep things simple and clean, we will not perform any such checks here, and just pick the first physical
|
|
device in the list. But in a real and serious application, those limitations should certainly be taken into account.
|
|
|
|
*/
|
|
for (VkPhysicalDevice device : devices) {
|
|
if (true) { // As above stated, we do no feature checks, so just accept.
|
|
physicalDevice = device;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns the index of a queue family that supports compute operations.
|
|
uint32_t getComputeQueueFamilyIndex() {
|
|
uint32_t queueFamilyCount;
|
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL);
|
|
|
|
// Retrieve all queue families.
|
|
VkQueueFamilyProperties queueFamilies[queueFamilyCount];
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilies);
|
|
|
|
// Now find a family that supports compute.
|
|
uint32_t i = 0;
|
|
for (; i < queueFamilyCount; ++i) {
|
|
VkQueueFamilyProperties props = queueFamilies[i];
|
|
|
|
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
|
|
// found a queue with compute. We're done!
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == queueFamilyCount) {
|
|
printf("could not find a queue family that supports operations\n");
|
|
return -1;
|
|
}
|
|
|
|
return i;
|
|
}
|
|
|
|
void createDevice() {
|
|
/*
|
|
We create the logical device in this function.
|
|
*/
|
|
|
|
/*
|
|
When creating the device, we also specify what queues it has.
|
|
*/
|
|
VkDeviceQueueCreateInfo queueCreateInfo = {};
|
|
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
queueFamilyIndex = getComputeQueueFamilyIndex(); // find queue family with compute capability.
|
|
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
|
queueCreateInfo.queueCount = 1; // create one queue in this family. We don't need more.
|
|
float queuePriorities = 1.0; // we only have one queue, so this is not that imporant.
|
|
queueCreateInfo.pQueuePriorities = &queuePriorities;
|
|
|
|
/*
|
|
Now we create the logical device. The logical device allows us to interact with the physical
|
|
device.
|
|
*/
|
|
VkDeviceCreateInfo deviceCreateInfo = {};
|
|
|
|
// Specify any desired device features here. We do not need any for this application, though.
|
|
VkPhysicalDeviceFeatures deviceFeatures = {};
|
|
const char *deviceExtensions[16];
|
|
uint32_t deviceExtensionsCount = 0;
|
|
deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME;
|
|
//deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME;
|
|
deviceExtensions[deviceExtensionsCount++] = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME;
|
|
deviceExtensions[deviceExtensionsCount++] = VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME;
|
|
deviceExtensions[deviceExtensionsCount++] = VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME;
|
|
deviceExtensions[deviceExtensionsCount++] = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME;
|
|
|
|
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
|
deviceCreateInfo.enabledLayerCount = enabledLayersCount; // need to specify validation layers here as well.
|
|
deviceCreateInfo.ppEnabledLayerNames = enabledLayers;
|
|
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; // when creating the logical device, we also specify what queues it has.
|
|
deviceCreateInfo.queueCreateInfoCount = 1;
|
|
deviceCreateInfo.enabledExtensionCount = deviceExtensionsCount;
|
|
deviceCreateInfo.ppEnabledExtensionNames = deviceExtensions;
|
|
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
|
|
|
|
VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, NULL, &device)); // create logical device.
|
|
|
|
// Get a handle to the only member of the queue family.
|
|
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
|
|
}
|
|
|
|
// find memory type with desired properties.
|
|
uint32_t findMemoryType(uint32_t memoryTypeBits, VkMemoryPropertyFlags properties) {
|
|
VkPhysicalDeviceMemoryProperties memoryProperties;
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties);
|
|
|
|
/*
|
|
How does this search work?
|
|
See the documentation of VkPhysicalDeviceMemoryProperties for a detailed description.
|
|
*/
|
|
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
|
|
if ((memoryTypeBits & (1 << i)) &&
|
|
((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties))
|
|
return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void createUBO(int chidx) {
|
|
/*
|
|
We will now create a buffer. We will render the mandelbrot set into this buffer
|
|
in a computer shade later.
|
|
*/
|
|
|
|
VkBufferCreateInfo bufferCreateInfo = {};
|
|
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
bufferCreateInfo.size = sizeof(UBO); // buffer size in bytes.
|
|
bufferCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; // buffer is used as a storage buffer.
|
|
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time.
|
|
|
|
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &chain[chidx].ubo)); // create buffer.
|
|
|
|
/*
|
|
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
|
*/
|
|
|
|
/*
|
|
First, we find the memory requirements for the buffer.
|
|
*/
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetBufferMemoryRequirements(device, chain[chidx].ubo, &memoryRequirements);
|
|
|
|
/*
|
|
Now use obtained memory requirements info to allocate the memory for the buffer.
|
|
*/
|
|
VkMemoryAllocateInfo allocateInfo = {};
|
|
allocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
|
|
/*
|
|
There are several types of memory that can be allocated, and we must choose a memory type that:
|
|
|
|
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
|
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
|
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
|
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
|
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
|
this flag.
|
|
*/
|
|
allocateInfo.memoryTypeIndex = findMemoryType(
|
|
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &chain[chidx].uboMemory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindBufferMemory(device, chain[chidx].ubo, chain[chidx].uboMemory, 0));
|
|
vkMapMemory(device, chain[chidx].uboMemory, 0, sizeof(UBO), 0, (void**)&chain[chidx].pMappedUBO);
|
|
}
|
|
int getAvailiableModifiersList(uint64_t *modifiers2, size_t len, VkFormat fmt)
|
|
{
|
|
VkDrmFormatModifierPropertiesEXT modifiers[len];
|
|
VkDrmFormatModifierPropertiesListEXT formatList = {VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT};
|
|
VkFormatProperties2 prop = {VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
|
|
prop.pNext = &formatList;
|
|
formatList.drmFormatModifierCount = len;
|
|
formatList.pDrmFormatModifierProperties = modifiers;
|
|
int count = 0;
|
|
vkGetPhysicalDeviceFormatProperties2(physicalDevice, fmt, &prop);
|
|
for(int i = 0; i < formatList.drmFormatModifierCount; i++)
|
|
{
|
|
modifiers2[count++] = modifiers[i].drmFormatModifier;
|
|
printf("mod %llx %d %d\n", modifiers[i].drmFormatModifier, modifiers[i].drmFormatModifierPlaneCount, (int)modifiers[i].drmFormatModifierTilingFeatures);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// create and import dmabuf
|
|
void createImageDumbDmabuf(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
|
|
/*
|
|
We will now create a buffer. We will render the mandelbrot set into this buffer
|
|
in a computer shade later.
|
|
*/
|
|
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
|
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
|
|
imageCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
|
|
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
|
|
imageCreateInfo.mipLevels = 1;
|
|
imageCreateInfo.arrayLayers = 1;
|
|
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
|
|
// todo: do we need SAMPLED?
|
|
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
|
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
// external image stuff
|
|
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
|
|
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
|
|
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
imageCreateInfo.pNext = &extInfo;
|
|
VkImageDrmFormatModifierExplicitCreateInfoEXT drmModInfo = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT};
|
|
int drmfd = gDrm.Allocate(WIDTH, HEIGHT);
|
|
VkSubresourceLayout layout;
|
|
layout.arrayPitch = 0;
|
|
layout.depthPitch = 0;
|
|
layout.offset = 0;
|
|
layout.size = 0;
|
|
layout.rowPitch = gDrm.pitch;
|
|
//layout.
|
|
drmModInfo.drmFormatModifierPlaneCount = 1;
|
|
drmModInfo.drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
|
|
drmModInfo.pPlaneLayouts = &layout;
|
|
extInfo.pNext = &drmModInfo;
|
|
|
|
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
|
|
|
|
/*
|
|
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
|
*/
|
|
|
|
/*
|
|
First, we find the memory requirements for the buffer.
|
|
*/
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
|
|
|
|
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
|
|
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
importInfo.fd = drmfd;
|
|
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
|
|
dedicatedAllocInfo.image = image;
|
|
importInfo.pNext = &dedicatedAllocInfo;
|
|
VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
|
|
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
|
|
vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, drmfd, &fdProps);
|
|
|
|
/*
|
|
Now use obtained memory requirements info to allocate the memory for the buffer.
|
|
*/
|
|
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
|
|
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
|
|
allocateInfo.pNext = &importInfo;
|
|
/*
|
|
There are several types of memory that can be allocated, and we must choose a memory type that:
|
|
|
|
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
|
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
|
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
|
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
|
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
|
this flag.
|
|
*/
|
|
allocateInfo.memoryTypeIndex = findMemoryType(
|
|
memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
|
|
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
|
view.image = image;
|
|
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
view.format = VK_FORMAT_R8G8B8A8_UNORM;
|
|
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
|
|
}
|
|
|
|
// create and import dmabuf
|
|
void createImageDumbDmabuf2(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, VkImage &image1, VkImageView &imageView1, VkDeviceMemory &imageMemory1, int fd, uint64_t mod, uint32_t size, uint32_t offset, uint32_t pitch1, uint32_t pitch2, bool p010) {
|
|
/*
|
|
We will now create a buffer. We will render the mandelbrot set into this buffer
|
|
in a computer shade later.
|
|
*/
|
|
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
|
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
|
|
imageCreateInfo.format = p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM;
|
|
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
|
|
imageCreateInfo.mipLevels = 1;
|
|
imageCreateInfo.arrayLayers = 1;
|
|
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
|
|
// todo: do we need SAMPLED?
|
|
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
|
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
// external image stuff
|
|
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
|
|
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
|
|
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
imageCreateInfo.pNext = &extInfo;
|
|
VkImageDrmFormatModifierExplicitCreateInfoEXT drmModInfo = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT};
|
|
VkSubresourceLayout layout;
|
|
layout.arrayPitch = 0;
|
|
layout.depthPitch = 0;
|
|
layout.offset = 0;
|
|
layout.size = 0;
|
|
layout.rowPitch = pitch1;
|
|
//layout.
|
|
drmModInfo.drmFormatModifierPlaneCount = 1;
|
|
drmModInfo.drmFormatModifier = mod;
|
|
drmModInfo.pPlaneLayouts = &layout;
|
|
extInfo.pNext = &drmModInfo;
|
|
|
|
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
|
|
imageCreateInfo.format = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM;
|
|
imageCreateInfo.extent = { WIDTH/2, HEIGHT/2, 1 };
|
|
layout.offset = 0;//2088960;
|
|
layout.rowPitch = pitch2;
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image1)); // create image.
|
|
|
|
|
|
/*
|
|
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
|
*/
|
|
|
|
/*
|
|
First, we find the memory requirements for the buffer.
|
|
*/
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
|
|
|
|
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
|
|
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
importInfo.fd = fd;
|
|
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
|
|
dedicatedAllocInfo.image = image;
|
|
//importInfo.pNext = &dedicatedAllocInfo;
|
|
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo2 = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
|
|
dedicatedAllocInfo2.image = image1;
|
|
//dedicatedAllocInfo.pNext = &dedicatedAllocInfo2;
|
|
VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
|
|
PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
|
|
vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fdProps);
|
|
|
|
/*
|
|
Now use obtained memory requirements info to allocate the memory for the buffer.
|
|
*/
|
|
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
|
|
allocateInfo.allocationSize = size;//memoryRequirements.size; // specify required memory.
|
|
allocateInfo.pNext = &importInfo;
|
|
/*
|
|
There are several types of memory that can be allocated, and we must choose a memory type that:
|
|
|
|
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
|
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
|
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
|
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
|
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
|
this flag.
|
|
*/
|
|
allocateInfo.memoryTypeIndex = findMemoryType(
|
|
memoryRequirements.memoryTypeBits & fdProps.memoryTypeBits, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image1, imageMemory, offset));
|
|
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
|
view.image = image;
|
|
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
view.format = p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM;
|
|
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
|
|
view.format = p010?VK_FORMAT_R16G16_UNORM:VK_FORMAT_R8G8_UNORM;
|
|
view.image = image1;
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
|
|
}
|
|
#if 0
|
|
// create and import dmabuf as opaque fd, allows any tiling
|
|
void createImageDumbOpaque(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory) {
|
|
/*
|
|
We will now create a buffer. We will render the mandelbrot set into this buffer
|
|
in a computer shade later.
|
|
*/
|
|
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
|
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
|
|
imageCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
|
|
imageCreateInfo.extent = { WIDTH, HEIGHT, 1 };
|
|
imageCreateInfo.mipLevels = 1;
|
|
imageCreateInfo.arrayLayers = 1;
|
|
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
|
|
// todo: do we need SAMPLED?
|
|
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
|
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
// external image stuff
|
|
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
|
|
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
|
|
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
|
|
imageCreateInfo.pNext = &extInfo;
|
|
int drmfd = gDrm.Allocate(WIDTH, HEIGHT);
|
|
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
|
|
|
|
/*
|
|
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
|
*/
|
|
|
|
/*
|
|
First, we find the memory requirements for the buffer.
|
|
*/
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
|
|
|
|
VkImportMemoryFdInfoKHR importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR};
|
|
importInfo.handleType =VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
|
|
importInfo.fd = drmfd;
|
|
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
|
|
dedicatedAllocInfo.image = image;
|
|
importInfo.pNext = &dedicatedAllocInfo;
|
|
//VkMemoryFdPropertiesKHR fdProps = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR};
|
|
//PFN_vkGetMemoryFdPropertiesKHR vkGetMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdPropertiesKHR");
|
|
//vkGetMemoryFdProperties(device,VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, drmfd, &fdProps);
|
|
|
|
/*
|
|
Now use obtained memory requirements info to allocate the memory for the buffer.
|
|
*/
|
|
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
|
|
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
|
|
allocateInfo.pNext = &importInfo;
|
|
/*
|
|
There are several types of memory that can be allocated, and we must choose a memory type that:
|
|
|
|
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
|
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
|
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
|
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
|
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
|
this flag.
|
|
*/
|
|
allocateInfo.memoryTypeIndex = findMemoryType(
|
|
memoryRequirements.memoryTypeBits /*& fdProps.memoryTypeBits*/, 0);//VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
|
|
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
|
view.image = image;
|
|
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
view.format = VK_FORMAT_R8G8B8A8_UNORM;
|
|
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
|
|
}
|
|
int prime_fd, prime_fd_uv;
|
|
|
|
// create and import dmabuf as opaque fd, allows any tiling
|
|
// create and import dmabuf
|
|
void createImageExportableDmabuf(VkImage &image, VkImageView &imageView, VkDeviceMemory &imageMemory, int &fd, int width, int height, VkFormat format) {
|
|
/*
|
|
We will now create a buffer. We will render the mandelbrot set into this buffer
|
|
in a computer shade later.
|
|
*/
|
|
VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
|
|
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
|
|
imageCreateInfo.format = format;
|
|
imageCreateInfo.extent = { (unsigned int)width, (unsigned int)height, 1 };
|
|
imageCreateInfo.mipLevels = 1;
|
|
imageCreateInfo.arrayLayers = 1;
|
|
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
imageCreateInfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;//VK_IMAGE_TILING_LINEAR;
|
|
// todo: do we need SAMPLED?
|
|
imageCreateInfo.usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
|
imageCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
// external image stuff
|
|
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //PREINITIALIZED;
|
|
VkExternalMemoryImageCreateInfo extInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
|
|
extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
imageCreateInfo.pNext = &extInfo;
|
|
|
|
VkImageDrmFormatModifierListCreateInfoEXT modifierList = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT};
|
|
uint64_t modifiers2[32];
|
|
modifierList.drmFormatModifierCount = getAvailiableModifiersList(modifiers2, 32, format);
|
|
modifierList.pDrmFormatModifiers = modifiers2;
|
|
|
|
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image)); // create image.
|
|
|
|
imageCreateInfo.format = VK_FORMAT_R8G8_UNORM;
|
|
imageCreateInfo.extent = { WIDTH/2, HEIGHT/2, 1 };
|
|
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, NULL, &image1)); // create image.
|
|
|
|
/*
|
|
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
|
*/
|
|
|
|
/*
|
|
First, we find the memory requirements for the buffer.
|
|
*/
|
|
VkMemoryRequirements memoryRequirements;
|
|
vkGetImageMemoryRequirements(device, image, &memoryRequirements);
|
|
|
|
VkExportMemoryAllocateInfo exportInfo = {VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
|
|
exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
|
|
VkMemoryDedicatedAllocateInfo dedicatedAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
|
|
dedicatedAllocInfo.image = image;
|
|
dedicatedAllocInfo.pNext = &exportInfo;
|
|
|
|
/*
|
|
Now use obtained memory requirements info to allocate the memory for the buffer.
|
|
*/
|
|
VkMemoryAllocateInfo allocateInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
|
|
allocateInfo.allocationSize = 3133440;//memoryRequirements.size; // specify required memory.
|
|
// dedicated info unneeded?
|
|
allocateInfo.pNext = &exportInfo;//&dedicatedAllocInfo;
|
|
/*
|
|
There are several types of memory that can be allocated, and we must choose a memory type that:
|
|
|
|
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
|
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
|
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
|
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
|
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
|
this flag.
|
|
*/
|
|
allocateInfo.memoryTypeIndex = findMemoryType(
|
|
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); // VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);//
|
|
|
|
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &imageMemory)); // allocate memory on device.
|
|
|
|
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image, imageMemory, 0));
|
|
VK_CHECK_RESULT(vkBindImageMemory(device, image1, imageMemory, 2088960));
|
|
VkImageViewCreateInfo view = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
|
view.image = image;
|
|
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
view.format = format;
|
|
view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView));
|
|
view.format = VK_FORMAT_R8G8_UNORM;
|
|
view.image = image1;
|
|
VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &imageView1));
|
|
|
|
VkMemoryGetFdInfoKHR getFdInfo = { VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR};
|
|
getFdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
|
getFdInfo.memory = imageMemory;
|
|
PFN_vkGetMemoryFdKHR pfnvkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr(instance, "vkGetMemoryFdKHR");
|
|
pfnvkGetMemoryFdKHR(device, &getFdInfo, &fd);
|
|
VkImageDrmFormatModifierPropertiesEXT imageModifiers = {VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT};
|
|
PFN_vkGetImageDrmFormatModifierPropertiesEXT pfnvkGetImageDrmFormatModifierPropertiesEXT =(PFN_vkGetImageDrmFormatModifierPropertiesEXT)vkGetInstanceProcAddr(instance, "vkGetImageDrmFormatModifierPropertiesEXT");
|
|
pfnvkGetImageDrmFormatModifierPropertiesEXT(device, image, &imageModifiers);
|
|
printf("imageModifier %llx\n", imageModifiers.drmFormatModifier);
|
|
// todo: get subresource plane info (vkGetImageSubresourceLayout)
|
|
}
|
|
#endif
|
|
void createDescriptorSetLayout() {
|
|
/*
|
|
Here we specify a descriptor set layout. This allows us to bind our descriptors to
|
|
resources in the shader.
|
|
|
|
*/
|
|
|
|
/*
|
|
Here we specify a binding of type VK_DESCRIPTOR_TYPE_STORAGE_BUFFER to the binding point
|
|
0. This binds to
|
|
|
|
layout(std140, binding = 0) buffer buf
|
|
|
|
in the compute shader.
|
|
*/
|
|
VkDescriptorSetLayoutBinding descriptorSetLayoutBinding[3] = {};
|
|
descriptorSetLayoutBinding[0].binding = 0; // binding = 0
|
|
descriptorSetLayoutBinding[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
|
descriptorSetLayoutBinding[0].descriptorCount = 1;
|
|
descriptorSetLayoutBinding[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
descriptorSetLayoutBinding[1].binding = 1; // binding = 0
|
|
descriptorSetLayoutBinding[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
|
descriptorSetLayoutBinding[1].descriptorCount = 1;
|
|
descriptorSetLayoutBinding[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
descriptorSetLayoutBinding[2].binding = 2; // binding = 1
|
|
descriptorSetLayoutBinding[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
descriptorSetLayoutBinding[2].descriptorCount = 1;
|
|
descriptorSetLayoutBinding[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
|
|
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {};
|
|
descriptorSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
|
descriptorSetLayoutCreateInfo.bindingCount = 3; // only a single binding in this descriptor set layout.
|
|
descriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBinding;
|
|
|
|
// Create the descriptor set layout.
|
|
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout));
|
|
}
|
|
void createDescriptorPool()
|
|
{
|
|
/*
|
|
So we will allocate a descriptor set here.
|
|
But we need to first create a descriptor pool to do that.
|
|
*/
|
|
|
|
/*
|
|
Our descriptor pool can only allocate a single storage buffer.
|
|
*/
|
|
VkDescriptorPoolSize descriptorPoolSize[2] = {};
|
|
descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
|
descriptorPoolSize[0].descriptorCount = 2*CHAIN_SIZE;
|
|
descriptorPoolSize[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
descriptorPoolSize[1].descriptorCount = 1*CHAIN_SIZE;
|
|
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
|
|
descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
|
descriptorPoolCreateInfo.maxSets = CHAIN_SIZE; // we only need to allocate one descriptor set from the pool.
|
|
descriptorPoolCreateInfo.poolSizeCount = 2;
|
|
descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize;
|
|
|
|
// create descriptor pool.
|
|
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool));
|
|
}
|
|
|
|
void createDescriptorSet(int chidx) {
|
|
/*
|
|
With the pool allocated, we can now allocate the descriptor set.
|
|
*/
|
|
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {};
|
|
descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
|
descriptorSetAllocateInfo.descriptorPool = descriptorPool; // pool to allocate from.
|
|
descriptorSetAllocateInfo.descriptorSetCount = 1; // allocate a single descriptor set.
|
|
descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout;
|
|
|
|
// allocate descriptor set.
|
|
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &chain[chidx].descriptorSet));
|
|
|
|
/*
|
|
Next, we need to connect our actual storage buffer with the descrptor.
|
|
We use vkUpdateDescriptorSets() to update the descriptor set.
|
|
*/
|
|
|
|
// Specify the buffer to bind to the descriptor.
|
|
|
|
VkDescriptorImageInfo descriptorImageInfo[2] = {};
|
|
descriptorImageInfo[0].imageView = chain[chidx].imageView0;
|
|
descriptorImageInfo[0].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
|
descriptorImageInfo[1].imageView = chain[chidx].imageView1;
|
|
descriptorImageInfo[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
|
|
|
VkWriteDescriptorSet writeDescriptorSet[2] = {};
|
|
writeDescriptorSet[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
writeDescriptorSet[0].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
|
|
writeDescriptorSet[0].dstBinding = 0; // write to the first, and only binding.
|
|
writeDescriptorSet[0].descriptorCount = 1; // update a single descriptor.
|
|
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
|
|
writeDescriptorSet[0].pImageInfo = &descriptorImageInfo[0];
|
|
writeDescriptorSet[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
writeDescriptorSet[1].dstSet = chain[chidx].descriptorSet; // write to this descriptor set.
|
|
writeDescriptorSet[1].dstBinding = 1; // write to the first, and only binding.
|
|
writeDescriptorSet[1].descriptorCount = 1; // update a single descriptor.
|
|
writeDescriptorSet[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; // storage buffer.
|
|
writeDescriptorSet[1].pImageInfo = &descriptorImageInfo[1];
|
|
|
|
VkDescriptorBufferInfo descriptorBufferInfo = {};
|
|
descriptorBufferInfo.buffer = chain[chidx].ubo;
|
|
descriptorBufferInfo.offset = 0;
|
|
descriptorBufferInfo.range = sizeof(UBO);
|
|
// perform the update of the descriptor set.
|
|
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, NULL);
|
|
writeDescriptorSet[0].dstBinding = 2;
|
|
writeDescriptorSet[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
writeDescriptorSet[0].pBufferInfo = &descriptorBufferInfo;
|
|
writeDescriptorSet[0].pImageInfo = NULL;
|
|
vkUpdateDescriptorSets(device, 1, writeDescriptorSet, 0, NULL);
|
|
}
|
|
|
|
// Read file into array of bytes, and cast to uint32_t*, then return.
|
|
// The data has been padded, so that it fits into an array uint32_t.
|
|
uint32_t* readFile(uint32_t& length, const char* filename) {
|
|
|
|
FILE* fp = fopen(filename, "rb");
|
|
if (fp == NULL) {
|
|
printf("Could not find or open file: %s\n", filename);
|
|
}
|
|
|
|
// get file size.
|
|
fseek(fp, 0, SEEK_END);
|
|
long filesize = ftell(fp);
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
long filesizepadded = long(ceil(filesize / 4.0)) * 4;
|
|
|
|
// read file contents.
|
|
char *str = (char*)malloc(filesizepadded);
|
|
fread(str, filesize, sizeof(char), fp);
|
|
fclose(fp);
|
|
|
|
// data padding.
|
|
for (int i = filesize; i < filesizepadded; i++) {
|
|
str[i] = 0;
|
|
}
|
|
|
|
length = filesizepadded;
|
|
return (uint32_t *)str;
|
|
}
|
|
|
|
void createComputePipeline() {
|
|
/*
|
|
We create a compute pipeline here.
|
|
*/
|
|
|
|
/*
|
|
Create a shader module. A shader module basically just encapsulates some shader code.
|
|
*/
|
|
uint32_t filelength;
|
|
// the code in comp.spv was created by running the command:
|
|
// glslangValidator.exe -V shader.comp
|
|
uint32_t* code = readFile(filelength, "image.spv");
|
|
VkShaderModuleCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
|
createInfo.pCode = code;
|
|
createInfo.codeSize = filelength;
|
|
|
|
VK_CHECK_RESULT(vkCreateShaderModule(device, &createInfo, NULL, &computeShaderModule));
|
|
free(code);
|
|
|
|
/*
|
|
Now let us actually create the compute pipeline.
|
|
A compute pipeline is very simple compared to a graphics pipeline.
|
|
It only consists of a single stage with a compute shader.
|
|
|
|
So first we specify the compute shader stage, and it's entry point(main).
|
|
*/
|
|
VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {};
|
|
shaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
shaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
shaderStageCreateInfo.module = computeShaderModule;
|
|
shaderStageCreateInfo.pName = "main";
|
|
|
|
/*
|
|
The pipeline layout allows the pipeline to access descriptor sets.
|
|
So we just specify the descriptor set layout we created earlier.
|
|
*/
|
|
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {};
|
|
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
|
pipelineLayoutCreateInfo.setLayoutCount = 1;
|
|
pipelineLayoutCreateInfo.pSetLayouts = &descriptorSetLayout;
|
|
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, NULL, &pipelineLayout));
|
|
|
|
VkComputePipelineCreateInfo pipelineCreateInfo = {};
|
|
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
|
pipelineCreateInfo.stage = shaderStageCreateInfo;
|
|
pipelineCreateInfo.layout = pipelineLayout;
|
|
|
|
/*
|
|
Now, we finally create the compute pipeline.
|
|
*/
|
|
VK_CHECK_RESULT(vkCreateComputePipelines(
|
|
device, VK_NULL_HANDLE,
|
|
1, &pipelineCreateInfo,
|
|
NULL, &pipeline));
|
|
}
|
|
void createCommandPool()
|
|
{
|
|
/*
|
|
We are getting closer to the end. In order to send commands to the device(GPU),
|
|
we must first record commands into a command buffer.
|
|
To allocate a command buffer, we must first create a command pool. So let us do that.
|
|
*/
|
|
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
|
|
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
|
// the queue family of this command pool. All command buffers allocated from this command pool,
|
|
// must be submitted to queues of this family ONLY.
|
|
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
|
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool));
|
|
/*
|
|
Now allocate a command buffer from the command pool.
|
|
*/
|
|
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
|
|
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
|
|
// if the command buffer is primary, it can be directly submitted to queues.
|
|
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
|
|
// submitted to a queue. To keep things simple, we use a primary command buffer.
|
|
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
|
|
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer.
|
|
|
|
}
|
|
void prepareImage(int chidx)
|
|
{
|
|
VkCommandBufferBeginInfo beginInfo = {};
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
|
|
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands.
|
|
|
|
|
|
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
VkImageMemoryBarrier imageMemoryBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
|
|
imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
|
imageMemoryBarrier.image = chain[chidx].image0;
|
|
imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
|
|
// imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
|
// imageMemoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
|
|
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
|
|
imageMemoryBarrier.image = chain[chidx].image1;
|
|
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
|
|
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
|
|
VkFence fence;
|
|
VkFenceCreateInfo fenceCreateInfo = {};
|
|
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
fenceCreateInfo.flags = 0;
|
|
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
|
|
|
|
VkSubmitInfo submitInfo = {};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
submitInfo.commandBufferCount = 1; // submit a single command buffer
|
|
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
|
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
|
vkDestroyFence(device, fence, NULL);
|
|
vkResetCommandBuffer(commandBuffer, 0);
|
|
}
|
|
|
|
void createCommandBuffer(int chidx) {
|
|
|
|
/*
|
|
Now allocate a command buffer from the command pool.
|
|
*/
|
|
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
|
|
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
|
|
// if the command buffer is primary, it can be directly submitted to queues.
|
|
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
|
|
// submitted to a queue. To keep things simple, we use a primary command buffer.
|
|
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
|
|
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &chain[chidx].commandBuffer)); // allocate command buffer.
|
|
VkCommandBufferBeginInfo beginInfo = {};
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
beginInfo.flags = 0; // the buffer is only submitted and used once in this application.
|
|
VK_CHECK_RESULT(vkBeginCommandBuffer(chain[chidx].commandBuffer, &beginInfo)); // start recording commands.
|
|
/*
|
|
We need to bind a pipeline, AND a descriptor set before we dispatch.
|
|
|
|
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
|
|
*/
|
|
vkCmdBindPipeline(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
/*
|
|
Now we shall start recording commands into the newly allocated command buffer.
|
|
*/
|
|
vkCmdBindDescriptorSets(chain[chidx].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &chain[chidx].descriptorSet, 0, NULL);
|
|
|
|
/*
|
|
Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader.
|
|
The number of workgroups is specified in the arguments.
|
|
If you are already familiar with compute shaders from OpenGL, this should be nothing new to you.
|
|
*/
|
|
vkCmdDispatch(chain[chidx].commandBuffer, (uint32_t)ceil(WIDTH/2 / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT/2 / float(WORKGROUP_SIZE)), 1);
|
|
|
|
VK_CHECK_RESULT(vkEndCommandBuffer(chain[chidx].commandBuffer)); // end recording commands.
|
|
/*
|
|
We create a fence.
|
|
*/
|
|
VkFenceCreateInfo fenceCreateInfo = {};
|
|
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
fenceCreateInfo.flags = 0;
|
|
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &chain[chidx].fence));
|
|
|
|
}
|
|
void waitFence(int chidx)
|
|
{
|
|
if(chain[chidx].running)
|
|
VK_CHECK_RESULT(vkWaitForFences(device, 1, &chain[chidx].fence, VK_TRUE, 100000000000));
|
|
chain[chidx].running = false;
|
|
}
|
|
|
|
void runCommandBuffer(int chidx) {
|
|
/*
|
|
Now we shall finally submit the recorded command buffer to a queue.
|
|
*/
|
|
|
|
VkSubmitInfo submitInfo = {};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
submitInfo.commandBufferCount = 1; // submit a single command buffer
|
|
submitInfo.pCommandBuffers = &chain[chidx].commandBuffer; // the command buffer to submit.
|
|
waitFence(chidx);
|
|
|
|
vkResetFences(device, 1, &chain[chidx].fence);
|
|
|
|
/*
|
|
We submit the command buffer on the queue, at the same time giving a fence.
|
|
*/
|
|
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, chain[chidx].fence));
|
|
chain[chidx].running = true;
|
|
/*
|
|
The command will not have finished executing until the fence is signalled.
|
|
So we wait here.
|
|
We will directly after this read our buffer from the GPU,
|
|
and we will not be sure that the command has finished executing unless we wait for the fence.
|
|
Hence, we use a fence here.
|
|
*/
|
|
//VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
|
}
|
|
template <typename Codec>
|
|
void run(bool p010, const char *filename) {
|
|
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
|
|
//bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
|
|
|
|
|
|
// Initialize vulkan:
|
|
createInstance();
|
|
findPhysicalDevice();
|
|
createDevice();
|
|
|
|
//createImageExportableDmabuf(image0, imageView0, imageMemory0, prime_fd, WIDTH, HEIGHT, VK_FORMAT_R8_UNORM);
|
|
//createImageExportableDmabuf(image1, imageView1, imageMemory1, prime_fd_uv, WIDTH/2, HEIGHT/2, VK_FORMAT_R8G8_UNORM);
|
|
int drm_fd = drm_fd = open("/dev/dri/renderD128", O_RDWR);
|
|
//auto *r = vaapi_recorder_create2(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4);
|
|
//auto *r = vaapi_recorder_create3(drm_fd, WIDTH, HEIGHT, "out.264", prime_fd, WIDTH * 4, prime_fd_uv, WIDTH * 2);
|
|
uint64_t mod;
|
|
uint32_t size, offset, pitch1, pitch2;
|
|
int fd[CHAIN_SIZE];
|
|
uint64_t modifiers[32];
|
|
int count = getAvailiableModifiersList(modifiers, 32, p010?VK_FORMAT_R16_UNORM:VK_FORMAT_R8_UNORM);
|
|
//auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
|
|
Codec enc = {};
|
|
|
|
enc.Setup(drm_fd, WIDTH, HEIGHT, filename, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
createUBO(i);
|
|
createImageDumbDmabuf2(chain[i].image0, chain[i].imageView0, chain[i].imageMemory0, chain[i].image1, chain[i].imageView1, chain[i].imageMemory1,
|
|
fd[i], mod, size, offset, pitch1, pitch2, p010);
|
|
}
|
|
|
|
createDescriptorSetLayout();
|
|
createDescriptorPool();
|
|
createComputePipeline();
|
|
createCommandPool();
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
prepareImage(i);
|
|
createDescriptorSet(i);
|
|
createCommandBuffer(i);
|
|
}
|
|
int frameNum = 0;
|
|
|
|
while(frameNum++ < 1000)
|
|
{
|
|
int chidx = frameNum & 3;
|
|
// Finally, run the recorded command buffer.
|
|
runCommandBuffer(chidx);
|
|
#ifndef SKIP_FENCE_SYNC
|
|
waitFence(chidx);
|
|
#endif
|
|
//recorder_frame4(r, chidx);
|
|
if(frameNum == 1)
|
|
enc.EncodeIDR(chidx);
|
|
else
|
|
enc.EncodeP(chidx);
|
|
chain[chidx].pMappedUBO->frameNum = frameNum;
|
|
|
|
}
|
|
for(int i = 0; i < CHAIN_SIZE; i++)
|
|
{
|
|
waitFence(i);
|
|
vkDestroyFence(device, chain[i].fence, NULL);
|
|
vkUnmapMemory(device, chain[i].uboMemory);
|
|
vkFreeMemory(device, chain[i].uboMemory, NULL);
|
|
vkDestroyBuffer(device, chain[i].ubo, NULL);
|
|
vkFreeMemory(device, chain[i].imageMemory0, NULL);
|
|
//vkFreeMemory(device, chain[i].imageMemory1, NULL);
|
|
vkDestroyImageView(device, chain[i].imageView0, NULL);
|
|
vkDestroyImage(device, chain[i].image0, NULL);
|
|
vkDestroyImageView(device, chain[i].imageView1, NULL);
|
|
vkDestroyImage(device, chain[i].image1, NULL);
|
|
}
|
|
// The former command rendered a mandelbrot set to a buffer.
|
|
// Save that buffer as a png on disk.
|
|
//saveRenderedImage();
|
|
|
|
// Clean up all vulkan resources.
|
|
cleanup();
|
|
}
|
|
#if 0
|
|
void saveRenderedImage() {
|
|
void* mappedMemory = NULL;
|
|
#if 1
|
|
// Map the buffer memory, so that we can read from it on the CPU.
|
|
vkMapMemory(device, imageMemory0, 0, VK_WHOLE_SIZE, 0, &mappedMemory);
|
|
//Pixel* pmappedMemory = (Pixel *)mappedMemory;
|
|
FILE *f = fopen("out.bin","wb");
|
|
fwrite(mappedMemory, 4, WIDTH * HEIGHT, f);
|
|
fclose(f);
|
|
|
|
// Done reading, so unmap.
|
|
vkUnmapMemory(device, imageMemory0);
|
|
#else
|
|
static char mem[WIDTH * HEIGHT*4];
|
|
memcpy(mem, gDrm.mapped_buffer, WIDTH * HEIGHT * 4);
|
|
// Now we save the acquired color data to a .png.
|
|
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
|
|
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
|
|
FILE *f = fopen("out.bin","wb");
|
|
fwrite(mem, 4, WIDTH * HEIGHT, f);
|
|
fclose(f);
|
|
#endif
|
|
}
|
|
#endif
|
|
void cleanup() {
|
|
/*
|
|
Clean up all Vulkan Resources.
|
|
*/
|
|
|
|
if (enableValidationLayers) {
|
|
// destroy callback.
|
|
auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT");
|
|
if (func == nullptr) {
|
|
printf("Could not load vkDestroyDebugReportCallbackEXT\n");
|
|
return;
|
|
}
|
|
func(instance, debugReportCallback, NULL);
|
|
}
|
|
|
|
//vkFreeMemory(device, bufferMemory, NULL);
|
|
//vkDestroyBuffer(device, buffer, NULL);
|
|
//vkFreeMemory(device, imageMemory0, NULL);
|
|
//vkDestroyImageView(device, imageView0, NULL);
|
|
//vkDestroyImage(device, image0, NULL);
|
|
vkDestroyShaderModule(device, computeShaderModule, NULL);
|
|
vkDestroyDescriptorPool(device, descriptorPool, NULL);
|
|
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL);
|
|
vkDestroyPipelineLayout(device, pipelineLayout, NULL);
|
|
vkDestroyPipeline(device, pipeline, NULL);
|
|
vkDestroyCommandPool(device, commandPool, NULL);
|
|
vkDestroyDevice(device, NULL);
|
|
vkDestroyInstance(instance, NULL);
|
|
}
|
|
};
|
|
|
|
int main(int argc, char **argv) {
|
|
ComputeApplication app;
|
|
|
|
bool hevc = argc > 1;
|
|
bool p010 = false;
|
|
if(hevc)
|
|
p010 = atoi(argv[1]);
|
|
if(hevc)
|
|
app.run<VaapiEncoderHEVC>(p010, "out.265");
|
|
else
|
|
app.run<VaapiEncoderH264>(false, "out.264");
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|