Demo from https://github.com/Erkaman/vulkan_minimal_compute/, remove STL and fix validation layer
This commit is contained in:
parent
e5f3981b3b
commit
a1d19963d7
2 changed files with 863 additions and 0 deletions
56
comp.comp
Normal file
56
comp.comp
Normal file
|
@ -0,0 +1,56 @@
|
|||
#version 450
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
#define WIDTH 3200
|
||||
#define HEIGHT 2400
|
||||
#define WORKGROUP_SIZE 32
|
||||
layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1 ) in;
|
||||
|
||||
struct Pixel{
|
||||
vec4 value;
|
||||
};
|
||||
|
||||
layout(std140, binding = 0) buffer buf
|
||||
{
|
||||
Pixel imageData[];
|
||||
};
|
||||
|
||||
void main() {
|
||||
|
||||
/*
|
||||
In order to fit the work into workgroups, some unnecessary threads are launched.
|
||||
We terminate those threads here.
|
||||
*/
|
||||
if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT)
|
||||
return;
|
||||
|
||||
float x = float(gl_GlobalInvocationID.x) / float(WIDTH);
|
||||
float y = float(gl_GlobalInvocationID.y) / float(HEIGHT);
|
||||
|
||||
/*
|
||||
What follows is code for rendering the mandelbrot set.
|
||||
*/
|
||||
vec2 uv = vec2(x,y);
|
||||
float n = 0.0;
|
||||
vec2 c = vec2(-.445, 0.0) + (uv - 0.5)*(2.0+ 1.7*0.2 ),
|
||||
z = vec2(0.0);
|
||||
const int M =128;
|
||||
for (int i = 0; i<M; i++)
|
||||
{
|
||||
z = vec2(z.x*z.x - z.y*z.y, 2.*z.x*z.y) + c;
|
||||
if (dot(z, z) > 2) break;
|
||||
n++;
|
||||
}
|
||||
|
||||
// we use a simple cosine palette to determine color:
|
||||
// http://iquilezles.org/www/articles/palettes/palettes.htm
|
||||
float t = float(n) / float(M);
|
||||
vec3 d = vec3(0.3, 0.3 ,0.5);
|
||||
vec3 e = vec3(-0.2, -0.3 ,-0.5);
|
||||
vec3 f = vec3(2.1, 2.0, 3.0);
|
||||
vec3 g = vec3(0.0, 0.1, 0.0);
|
||||
vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0);
|
||||
|
||||
// store the rendered mandelbrot set into a storage buffer:
|
||||
imageData[WIDTH * gl_GlobalInvocationID.y + gl_GlobalInvocationID.x].value = color;
|
||||
}
|
807
vkcompute.cpp
Normal file
807
vkcompute.cpp
Normal file
|
@ -0,0 +1,807 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Eric Arnebäck
|
||||
* Copyright (c) 2024 mittorn
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
//#include "lodepng.h" //Used for png encoding.
|
||||
|
||||
const int WIDTH = 3200; // Size of rendered mandelbrot set.
|
||||
const int HEIGHT = 2400; // Size of renderered mandelbrot set.
|
||||
const int WORKGROUP_SIZE = 32; // Workgroup size in compute shader.
|
||||
|
||||
/// TODO: why it even should depend on NDEBUG???
|
||||
#ifdef NDEBUG
|
||||
const bool enableValidationLayers = false;
|
||||
#else
|
||||
const bool enableValidationLayers = true;
|
||||
#endif
|
||||
|
||||
// Used for validating return values of Vulkan API calls.
|
||||
#define VK_CHECK_RESULT(f) \
|
||||
{ \
|
||||
VkResult res = (f); \
|
||||
if (res != VK_SUCCESS) \
|
||||
{ \
|
||||
printf("Fatal : VkResult is %d in %s at line %d\n", res, __FILE__, __LINE__); \
|
||||
assert(res == VK_SUCCESS); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
The application launches a compute shader that renders the mandelbrot set,
|
||||
by rendering it into a storage buffer.
|
||||
The storage buffer is then read from the GPU, and saved as .png.
|
||||
*/
|
||||
class ComputeApplication {
|
||||
private:
|
||||
// The pixels of the rendered mandelbrot set are in this format:
|
||||
struct Pixel {
|
||||
float r, g, b, a;
|
||||
};
|
||||
|
||||
/*
|
||||
In order to use Vulkan, you must create an instance.
|
||||
*/
|
||||
VkInstance instance;
|
||||
|
||||
VkDebugReportCallbackEXT debugReportCallback;
|
||||
/*
|
||||
The physical device is some device on the system that supports usage of Vulkan.
|
||||
Often, it is simply a graphics card that supports Vulkan.
|
||||
*/
|
||||
VkPhysicalDevice physicalDevice;
|
||||
/*
|
||||
Then we have the logical device VkDevice, which basically allows
|
||||
us to interact with the physical device.
|
||||
*/
|
||||
VkDevice device;
|
||||
|
||||
/*
|
||||
The pipeline specifies the pipeline that all graphics and compute commands pass though in Vulkan.
|
||||
|
||||
We will be creating a simple compute pipeline in this application.
|
||||
*/
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout pipelineLayout;
|
||||
VkShaderModule computeShaderModule;
|
||||
|
||||
/*
|
||||
The command buffer is used to record commands, that will be submitted to a queue.
|
||||
|
||||
To allocate such command buffers, we use a command pool.
|
||||
*/
|
||||
VkCommandPool commandPool;
|
||||
VkCommandBuffer commandBuffer;
|
||||
|
||||
/*
|
||||
|
||||
Descriptors represent resources in shaders. They allow us to use things like
|
||||
uniform buffers, storage buffers and images in GLSL.
|
||||
|
||||
A single descriptor represents a single resource, and several descriptors are organized
|
||||
into descriptor sets, which are basically just collections of descriptors.
|
||||
*/
|
||||
VkDescriptorPool descriptorPool;
|
||||
VkDescriptorSet descriptorSet;
|
||||
VkDescriptorSetLayout descriptorSetLayout;
|
||||
|
||||
/*
|
||||
The mandelbrot set will be rendered to this buffer.
|
||||
|
||||
The memory that backs the buffer is bufferMemory.
|
||||
*/
|
||||
VkBuffer buffer;
|
||||
VkDeviceMemory bufferMemory;
|
||||
|
||||
uint32_t bufferSize; // size of `buffer` in bytes.
|
||||
|
||||
const char * enabledLayers[16];
|
||||
size_t enabledLayersCount = 0;
|
||||
|
||||
/*
|
||||
In order to execute commands on a device(GPU), the commands must be submitted
|
||||
to a queue. The commands are stored in a command buffer, and this command buffer
|
||||
is given to the queue.
|
||||
|
||||
There will be different kinds of queues on the device. Not all queues support
|
||||
graphics operations, for instance. For this application, we at least want a queue
|
||||
that supports compute operations.
|
||||
*/
|
||||
VkQueue queue; // a queue supporting compute operations.
|
||||
|
||||
/*
|
||||
Groups of queues that have the same capabilities(for instance, they all supports graphics and computer operations),
|
||||
are grouped into queue families.
|
||||
|
||||
When submitting a command buffer, you must specify to which queue in the family you are submitting to.
|
||||
This variable keeps track of the index of that queue in its family.
|
||||
*/
|
||||
uint32_t queueFamilyIndex;
|
||||
|
||||
public:
|
||||
void run() {
|
||||
// Buffer size of the storage buffer that will contain the rendered mandelbrot set.
|
||||
bufferSize = sizeof(Pixel) * WIDTH * HEIGHT;
|
||||
|
||||
// Initialize vulkan:
|
||||
createInstance();
|
||||
findPhysicalDevice();
|
||||
createDevice();
|
||||
createBuffer();
|
||||
createDescriptorSetLayout();
|
||||
createDescriptorSet();
|
||||
createComputePipeline();
|
||||
createCommandBuffer();
|
||||
|
||||
// Finally, run the recorded command buffer.
|
||||
runCommandBuffer();
|
||||
|
||||
// The former command rendered a mandelbrot set to a buffer.
|
||||
// Save that buffer as a png on disk.
|
||||
saveRenderedImage();
|
||||
|
||||
// Clean up all vulkan resources.
|
||||
cleanup();
|
||||
}
|
||||
|
||||
void saveRenderedImage() {
|
||||
void* mappedMemory = NULL;
|
||||
// Map the buffer memory, so that we can read from it on the CPU.
|
||||
vkMapMemory(device, bufferMemory, 0, bufferSize, 0, &mappedMemory);
|
||||
Pixel* pmappedMemory = (Pixel *)mappedMemory;
|
||||
|
||||
// Done reading, so unmap.
|
||||
vkUnmapMemory(device, bufferMemory);
|
||||
|
||||
// Now we save the acquired color data to a .png.
|
||||
// unsigned error = lodepng::encode("mandelbrot.png", image, WIDTH, HEIGHT);
|
||||
//if (error) printf("encoder error %d: %s", error, lodepng_error_text(error));
|
||||
}
|
||||
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
|
||||
VkDebugReportFlagsEXT flags,
|
||||
VkDebugReportObjectTypeEXT objectType,
|
||||
uint64_t object,
|
||||
size_t location,
|
||||
int32_t messageCode,
|
||||
const char* pLayerPrefix,
|
||||
const char* pMessage,
|
||||
void* pUserData) {
|
||||
|
||||
printf("Debug Report: %s: %s\n", pLayerPrefix, pMessage);
|
||||
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
void createInstance() {
|
||||
const char * enabledExtensions[16];
|
||||
uint32_t enabledExtensionsCount = 0;
|
||||
|
||||
/*
|
||||
By enabling validation layers, Vulkan will emit warnings if the API
|
||||
is used incorrectly. We shall enable the layer VK_LAYER_LUNARG_standard_validation,
|
||||
which is basically a collection of several useful validation layers.
|
||||
*/
|
||||
if (enableValidationLayers) {
|
||||
/*
|
||||
We get all supported layers with vkEnumerateInstanceLayerProperties.
|
||||
*/
|
||||
uint32_t layerCount;
|
||||
vkEnumerateInstanceLayerProperties(&layerCount, NULL);
|
||||
|
||||
VkLayerProperties layerProperties[layerCount];
|
||||
vkEnumerateInstanceLayerProperties(&layerCount, layerProperties);
|
||||
|
||||
/*
|
||||
And then we simply check if VK_LAYER_LUNARG_standard_validation is among the supported layers.
|
||||
*/
|
||||
bool foundLayer = false;
|
||||
for (VkLayerProperties prop : layerProperties) {
|
||||
|
||||
if (strcmp("VK_LAYER_KHRONOS_validation", prop.layerName) == 0) {
|
||||
foundLayer = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!foundLayer) {
|
||||
printf("Layer VK_LAYER_LUNARG_standard_validation not supported\n");
|
||||
}
|
||||
else
|
||||
enabledLayers[enabledLayersCount++] = "VK_LAYER_KHRONOS_validation"; // Alright, we can use this layer.
|
||||
|
||||
/*
|
||||
We need to enable an extension named VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
|
||||
in order to be able to print the warnings emitted by the validation layer.
|
||||
|
||||
So again, we just check if the extension is among the supported extensions.
|
||||
*/
|
||||
|
||||
uint32_t extensionCount;
|
||||
|
||||
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, NULL);
|
||||
VkExtensionProperties extensionProperties[extensionCount];
|
||||
vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, extensionProperties);
|
||||
|
||||
bool foundExtension = false;
|
||||
for (VkExtensionProperties prop : extensionProperties) {
|
||||
if (strcmp(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, prop.extensionName) == 0) {
|
||||
foundExtension = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!foundExtension) {
|
||||
printf("Extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME not supported\n");
|
||||
}
|
||||
else enabledExtensions[enabledExtensionsCount++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME;
|
||||
}
|
||||
|
||||
/*
|
||||
Next, we actually create the instance.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
Contains application info. This is actually not that important.
|
||||
The only real important field is apiVersion.
|
||||
*/
|
||||
VkApplicationInfo applicationInfo = {};
|
||||
applicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||
applicationInfo.pApplicationName = "vkComputeQueueTest";
|
||||
applicationInfo.applicationVersion = 0;
|
||||
applicationInfo.pEngineName = "streamingengine";
|
||||
applicationInfo.engineVersion = 0;
|
||||
applicationInfo.apiVersion = VK_API_VERSION_1_0;;
|
||||
|
||||
VkInstanceCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
createInfo.flags = 0;
|
||||
createInfo.pApplicationInfo = &applicationInfo;
|
||||
|
||||
// Give our desired layers and extensions to vulkan.
|
||||
createInfo.enabledLayerCount = enabledLayersCount;
|
||||
createInfo.ppEnabledLayerNames = enabledLayers;
|
||||
createInfo.enabledExtensionCount = enabledExtensionsCount;
|
||||
createInfo.ppEnabledExtensionNames = enabledExtensions;
|
||||
|
||||
/*
|
||||
Actually create the instance.
|
||||
Having created the instance, we can actually start using vulkan.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkCreateInstance(
|
||||
&createInfo,
|
||||
NULL,
|
||||
&instance));
|
||||
|
||||
/*
|
||||
Register a callback function for the extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME, so that warnings emitted from the validation
|
||||
layer are actually printed.
|
||||
*/
|
||||
if (enableValidationLayers) {
|
||||
VkDebugReportCallbackCreateInfoEXT createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
|
||||
createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT;
|
||||
createInfo.pfnCallback = &debugReportCallbackFn;
|
||||
|
||||
// We have to explicitly load this function.
|
||||
auto vkCreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugReportCallbackEXT");
|
||||
if (vkCreateDebugReportCallbackEXT == nullptr) {
|
||||
printf("Could not load vkCreateDebugReportCallbackEXT\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Create and register callback.
|
||||
VK_CHECK_RESULT(vkCreateDebugReportCallbackEXT(instance, &createInfo, NULL, &debugReportCallback));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void findPhysicalDevice() {
|
||||
/*
|
||||
In this function, we find a physical device that can be used with Vulkan.
|
||||
*/
|
||||
|
||||
/*
|
||||
So, first we will list all physical devices on the system with vkEnumeratePhysicalDevices .
|
||||
*/
|
||||
uint32_t deviceCount;
|
||||
vkEnumeratePhysicalDevices(instance, &deviceCount, NULL);
|
||||
if (deviceCount == 0) {
|
||||
printf("could not find a device with vulkan support\n");
|
||||
return;
|
||||
}
|
||||
|
||||
VkPhysicalDevice devices[deviceCount];
|
||||
vkEnumeratePhysicalDevices(instance, &deviceCount, devices);
|
||||
|
||||
/*
|
||||
Next, we choose a device that can be used for our purposes.
|
||||
|
||||
With VkPhysicalDeviceFeatures(), we can retrieve a fine-grained list of physical features supported by the device.
|
||||
However, in this demo, we are simply launching a simple compute shader, and there are no
|
||||
special physical features demanded for this task.
|
||||
|
||||
With VkPhysicalDeviceProperties(), we can obtain a list of physical device properties. Most importantly,
|
||||
we obtain a list of physical device limitations. For this application, we launch a compute shader,
|
||||
and the maximum size of the workgroups and total number of compute shader invocations is limited by the physical device,
|
||||
and we should ensure that the limitations named maxComputeWorkGroupCount, maxComputeWorkGroupInvocations and
|
||||
maxComputeWorkGroupSize are not exceeded by our application. Moreover, we are using a storage buffer in the compute shader,
|
||||
and we should ensure that it is not larger than the device can handle, by checking the limitation maxStorageBufferRange.
|
||||
|
||||
However, in our application, the workgroup size and total number of shader invocations is relatively small, and the storage buffer is
|
||||
not that large, and thus a vast majority of devices will be able to handle it. This can be verified by looking at some devices at_
|
||||
http://vulkan.gpuinfo.org/
|
||||
|
||||
Therefore, to keep things simple and clean, we will not perform any such checks here, and just pick the first physical
|
||||
device in the list. But in a real and serious application, those limitations should certainly be taken into account.
|
||||
|
||||
*/
|
||||
for (VkPhysicalDevice device : devices) {
|
||||
if (true) { // As above stated, we do no feature checks, so just accept.
|
||||
physicalDevice = device;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the index of a queue family that supports compute operations.
|
||||
uint32_t getComputeQueueFamilyIndex() {
|
||||
uint32_t queueFamilyCount;
|
||||
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, NULL);
|
||||
|
||||
// Retrieve all queue families.
|
||||
VkQueueFamilyProperties queueFamilies[queueFamilyCount];
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilies);
|
||||
|
||||
// Now find a family that supports compute.
|
||||
uint32_t i = 0;
|
||||
for (; i < queueFamilyCount; ++i) {
|
||||
VkQueueFamilyProperties props = queueFamilies[i];
|
||||
|
||||
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
|
||||
// found a queue with compute. We're done!
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == queueFamilyCount) {
|
||||
printf("could not find a queue family that supports operations\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void createDevice() {
|
||||
/*
|
||||
We create the logical device in this function.
|
||||
*/
|
||||
|
||||
/*
|
||||
When creating the device, we also specify what queues it has.
|
||||
*/
|
||||
VkDeviceQueueCreateInfo queueCreateInfo = {};
|
||||
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queueFamilyIndex = getComputeQueueFamilyIndex(); // find queue family with compute capability.
|
||||
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
||||
queueCreateInfo.queueCount = 1; // create one queue in this family. We don't need more.
|
||||
float queuePriorities = 1.0; // we only have one queue, so this is not that imporant.
|
||||
queueCreateInfo.pQueuePriorities = &queuePriorities;
|
||||
|
||||
/*
|
||||
Now we create the logical device. The logical device allows us to interact with the physical
|
||||
device.
|
||||
*/
|
||||
VkDeviceCreateInfo deviceCreateInfo = {};
|
||||
|
||||
// Specify any desired device features here. We do not need any for this application, though.
|
||||
VkPhysicalDeviceFeatures deviceFeatures = {};
|
||||
|
||||
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
deviceCreateInfo.enabledLayerCount = enabledLayersCount; // need to specify validation layers here as well.
|
||||
deviceCreateInfo.ppEnabledLayerNames = enabledLayers;
|
||||
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo; // when creating the logical device, we also specify what queues it has.
|
||||
deviceCreateInfo.queueCreateInfoCount = 1;
|
||||
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
|
||||
|
||||
VK_CHECK_RESULT(vkCreateDevice(physicalDevice, &deviceCreateInfo, NULL, &device)); // create logical device.
|
||||
|
||||
// Get a handle to the only member of the queue family.
|
||||
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
|
||||
}
|
||||
|
||||
// find memory type with desired properties.
|
||||
uint32_t findMemoryType(uint32_t memoryTypeBits, VkMemoryPropertyFlags properties) {
|
||||
VkPhysicalDeviceMemoryProperties memoryProperties;
|
||||
|
||||
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties);
|
||||
|
||||
/*
|
||||
How does this search work?
|
||||
See the documentation of VkPhysicalDeviceMemoryProperties for a detailed description.
|
||||
*/
|
||||
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
|
||||
if ((memoryTypeBits & (1 << i)) &&
|
||||
((memoryProperties.memoryTypes[i].propertyFlags & properties) == properties))
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void createBuffer() {
|
||||
/*
|
||||
We will now create a buffer. We will render the mandelbrot set into this buffer
|
||||
in a computer shade later.
|
||||
*/
|
||||
|
||||
VkBufferCreateInfo bufferCreateInfo = {};
|
||||
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
bufferCreateInfo.size = bufferSize; // buffer size in bytes.
|
||||
bufferCreateInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; // buffer is used as a storage buffer.
|
||||
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; // buffer is exclusive to a single queue family at a time.
|
||||
|
||||
VK_CHECK_RESULT(vkCreateBuffer(device, &bufferCreateInfo, NULL, &buffer)); // create buffer.
|
||||
|
||||
/*
|
||||
But the buffer doesn't allocate memory for itself, so we must do that manually.
|
||||
*/
|
||||
|
||||
/*
|
||||
First, we find the memory requirements for the buffer.
|
||||
*/
|
||||
VkMemoryRequirements memoryRequirements;
|
||||
vkGetBufferMemoryRequirements(device, buffer, &memoryRequirements);
|
||||
|
||||
/*
|
||||
Now use obtained memory requirements info to allocate the memory for the buffer.
|
||||
*/
|
||||
VkMemoryAllocateInfo allocateInfo = {};
|
||||
allocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
||||
allocateInfo.allocationSize = memoryRequirements.size; // specify required memory.
|
||||
/*
|
||||
There are several types of memory that can be allocated, and we must choose a memory type that:
|
||||
|
||||
1) Satisfies the memory requirements(memoryRequirements.memoryTypeBits).
|
||||
2) Satifies our own usage requirements. We want to be able to read the buffer memory from the GPU to the CPU
|
||||
with vkMapMemory, so we set VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT.
|
||||
Also, by setting VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, memory written by the device(GPU) will be easily
|
||||
visible to the host(CPU), without having to call any extra flushing commands. So mainly for convenience, we set
|
||||
this flag.
|
||||
*/
|
||||
allocateInfo.memoryTypeIndex = findMemoryType(
|
||||
memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
|
||||
VK_CHECK_RESULT(vkAllocateMemory(device, &allocateInfo, NULL, &bufferMemory)); // allocate memory on device.
|
||||
|
||||
// Now associate that allocated memory with the buffer. With that, the buffer is backed by actual memory.
|
||||
VK_CHECK_RESULT(vkBindBufferMemory(device, buffer, bufferMemory, 0));
|
||||
}
|
||||
|
||||
void createDescriptorSetLayout() {
|
||||
/*
|
||||
Here we specify a descriptor set layout. This allows us to bind our descriptors to
|
||||
resources in the shader.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
Here we specify a binding of type VK_DESCRIPTOR_TYPE_STORAGE_BUFFER to the binding point
|
||||
0. This binds to
|
||||
|
||||
layout(std140, binding = 0) buffer buf
|
||||
|
||||
in the compute shader.
|
||||
*/
|
||||
VkDescriptorSetLayoutBinding descriptorSetLayoutBinding = {};
|
||||
descriptorSetLayoutBinding.binding = 0; // binding = 0
|
||||
descriptorSetLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
descriptorSetLayoutBinding.descriptorCount = 1;
|
||||
descriptorSetLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {};
|
||||
descriptorSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
descriptorSetLayoutCreateInfo.bindingCount = 1; // only a single binding in this descriptor set layout.
|
||||
descriptorSetLayoutCreateInfo.pBindings = &descriptorSetLayoutBinding;
|
||||
|
||||
// Create the descriptor set layout.
|
||||
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, &descriptorSetLayout));
|
||||
}
|
||||
|
||||
void createDescriptorSet() {
|
||||
/*
|
||||
So we will allocate a descriptor set here.
|
||||
But we need to first create a descriptor pool to do that.
|
||||
*/
|
||||
|
||||
/*
|
||||
Our descriptor pool can only allocate a single storage buffer.
|
||||
*/
|
||||
VkDescriptorPoolSize descriptorPoolSize = {};
|
||||
descriptorPoolSize.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
descriptorPoolSize.descriptorCount = 1;
|
||||
|
||||
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {};
|
||||
descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
descriptorPoolCreateInfo.maxSets = 1; // we only need to allocate one descriptor set from the pool.
|
||||
descriptorPoolCreateInfo.poolSizeCount = 1;
|
||||
descriptorPoolCreateInfo.pPoolSizes = &descriptorPoolSize;
|
||||
|
||||
// create descriptor pool.
|
||||
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &descriptorPool));
|
||||
|
||||
/*
|
||||
With the pool allocated, we can now allocate the descriptor set.
|
||||
*/
|
||||
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {};
|
||||
descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
descriptorSetAllocateInfo.descriptorPool = descriptorPool; // pool to allocate from.
|
||||
descriptorSetAllocateInfo.descriptorSetCount = 1; // allocate a single descriptor set.
|
||||
descriptorSetAllocateInfo.pSetLayouts = &descriptorSetLayout;
|
||||
|
||||
// allocate descriptor set.
|
||||
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet));
|
||||
|
||||
/*
|
||||
Next, we need to connect our actual storage buffer with the descrptor.
|
||||
We use vkUpdateDescriptorSets() to update the descriptor set.
|
||||
*/
|
||||
|
||||
// Specify the buffer to bind to the descriptor.
|
||||
VkDescriptorBufferInfo descriptorBufferInfo = {};
|
||||
descriptorBufferInfo.buffer = buffer;
|
||||
descriptorBufferInfo.offset = 0;
|
||||
descriptorBufferInfo.range = bufferSize;
|
||||
|
||||
VkWriteDescriptorSet writeDescriptorSet = {};
|
||||
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writeDescriptorSet.dstSet = descriptorSet; // write to this descriptor set.
|
||||
writeDescriptorSet.dstBinding = 0; // write to the first, and only binding.
|
||||
writeDescriptorSet.descriptorCount = 1; // update a single descriptor.
|
||||
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; // storage buffer.
|
||||
writeDescriptorSet.pBufferInfo = &descriptorBufferInfo;
|
||||
|
||||
// perform the update of the descriptor set.
|
||||
vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, NULL);
|
||||
}
|
||||
|
||||
// Read file into array of bytes, and cast to uint32_t*, then return.
|
||||
// The data has been padded, so that it fits into an array uint32_t.
|
||||
uint32_t* readFile(uint32_t& length, const char* filename) {
|
||||
|
||||
FILE* fp = fopen(filename, "rb");
|
||||
if (fp == NULL) {
|
||||
printf("Could not find or open file: %s\n", filename);
|
||||
}
|
||||
|
||||
// get file size.
|
||||
fseek(fp, 0, SEEK_END);
|
||||
long filesize = ftell(fp);
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
long filesizepadded = long(ceil(filesize / 4.0)) * 4;
|
||||
|
||||
// read file contents.
|
||||
char *str = new char[filesizepadded];
|
||||
fread(str, filesize, sizeof(char), fp);
|
||||
fclose(fp);
|
||||
|
||||
// data padding.
|
||||
for (int i = filesize; i < filesizepadded; i++) {
|
||||
str[i] = 0;
|
||||
}
|
||||
|
||||
length = filesizepadded;
|
||||
return (uint32_t *)str;
|
||||
}
|
||||
|
||||
void createComputePipeline() {
|
||||
/*
|
||||
We create a compute pipeline here.
|
||||
*/
|
||||
|
||||
/*
|
||||
Create a shader module. A shader module basically just encapsulates some shader code.
|
||||
*/
|
||||
uint32_t filelength;
|
||||
// the code in comp.spv was created by running the command:
|
||||
// glslangValidator.exe -V shader.comp
|
||||
uint32_t* code = readFile(filelength, "comp.spv");
|
||||
VkShaderModuleCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
createInfo.pCode = code;
|
||||
createInfo.codeSize = filelength;
|
||||
|
||||
VK_CHECK_RESULT(vkCreateShaderModule(device, &createInfo, NULL, &computeShaderModule));
|
||||
delete[] code;
|
||||
|
||||
/*
|
||||
Now let us actually create the compute pipeline.
|
||||
A compute pipeline is very simple compared to a graphics pipeline.
|
||||
It only consists of a single stage with a compute shader.
|
||||
|
||||
So first we specify the compute shader stage, and it's entry point(main).
|
||||
*/
|
||||
VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {};
|
||||
shaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
shaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
shaderStageCreateInfo.module = computeShaderModule;
|
||||
shaderStageCreateInfo.pName = "main";
|
||||
|
||||
/*
|
||||
The pipeline layout allows the pipeline to access descriptor sets.
|
||||
So we just specify the descriptor set layout we created earlier.
|
||||
*/
|
||||
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {};
|
||||
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||
pipelineLayoutCreateInfo.setLayoutCount = 1;
|
||||
pipelineLayoutCreateInfo.pSetLayouts = &descriptorSetLayout;
|
||||
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, NULL, &pipelineLayout));
|
||||
|
||||
VkComputePipelineCreateInfo pipelineCreateInfo = {};
|
||||
pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||
pipelineCreateInfo.stage = shaderStageCreateInfo;
|
||||
pipelineCreateInfo.layout = pipelineLayout;
|
||||
|
||||
/*
|
||||
Now, we finally create the compute pipeline.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkCreateComputePipelines(
|
||||
device, VK_NULL_HANDLE,
|
||||
1, &pipelineCreateInfo,
|
||||
NULL, &pipeline));
|
||||
}
|
||||
|
||||
void createCommandBuffer() {
|
||||
/*
|
||||
We are getting closer to the end. In order to send commands to the device(GPU),
|
||||
we must first record commands into a command buffer.
|
||||
To allocate a command buffer, we must first create a command pool. So let us do that.
|
||||
*/
|
||||
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
|
||||
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
commandPoolCreateInfo.flags = 0;
|
||||
// the queue family of this command pool. All command buffers allocated from this command pool,
|
||||
// must be submitted to queues of this family ONLY.
|
||||
commandPoolCreateInfo.queueFamilyIndex = queueFamilyIndex;
|
||||
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &commandPool));
|
||||
|
||||
/*
|
||||
Now allocate a command buffer from the command pool.
|
||||
*/
|
||||
VkCommandBufferAllocateInfo commandBufferAllocateInfo = {};
|
||||
commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
commandBufferAllocateInfo.commandPool = commandPool; // specify the command pool to allocate from.
|
||||
// if the command buffer is primary, it can be directly submitted to queues.
|
||||
// A secondary buffer has to be called from some primary command buffer, and cannot be directly
|
||||
// submitted to a queue. To keep things simple, we use a primary command buffer.
|
||||
commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
commandBufferAllocateInfo.commandBufferCount = 1; // allocate a single command buffer.
|
||||
VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); // allocate command buffer.
|
||||
|
||||
/*
|
||||
Now we shall start recording commands into the newly allocated command buffer.
|
||||
*/
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; // the buffer is only submitted and used once in this application.
|
||||
VK_CHECK_RESULT(vkBeginCommandBuffer(commandBuffer, &beginInfo)); // start recording commands.
|
||||
|
||||
/*
|
||||
We need to bind a pipeline, AND a descriptor set before we dispatch.
|
||||
|
||||
The validation layer will NOT give warnings if you forget these, so be very careful not to forget them.
|
||||
*/
|
||||
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0, 1, &descriptorSet, 0, NULL);
|
||||
|
||||
/*
|
||||
Calling vkCmdDispatch basically starts the compute pipeline, and executes the compute shader.
|
||||
The number of workgroups is specified in the arguments.
|
||||
If you are already familiar with compute shaders from OpenGL, this should be nothing new to you.
|
||||
*/
|
||||
vkCmdDispatch(commandBuffer, (uint32_t)ceil(WIDTH / float(WORKGROUP_SIZE)), (uint32_t)ceil(HEIGHT / float(WORKGROUP_SIZE)), 1);
|
||||
|
||||
VK_CHECK_RESULT(vkEndCommandBuffer(commandBuffer)); // end recording commands.
|
||||
}
|
||||
|
||||
void runCommandBuffer() {
|
||||
/*
|
||||
Now we shall finally submit the recorded command buffer to a queue.
|
||||
*/
|
||||
|
||||
VkSubmitInfo submitInfo = {};
|
||||
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submitInfo.commandBufferCount = 1; // submit a single command buffer
|
||||
submitInfo.pCommandBuffers = &commandBuffer; // the command buffer to submit.
|
||||
|
||||
/*
|
||||
We create a fence.
|
||||
*/
|
||||
VkFence fence;
|
||||
VkFenceCreateInfo fenceCreateInfo = {};
|
||||
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fenceCreateInfo.flags = 0;
|
||||
VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, NULL, &fence));
|
||||
|
||||
/*
|
||||
We submit the command buffer on the queue, at the same time giving a fence.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, fence));
|
||||
/*
|
||||
The command will not have finished executing until the fence is signalled.
|
||||
So we wait here.
|
||||
We will directly after this read our buffer from the GPU,
|
||||
and we will not be sure that the command has finished executing unless we wait for the fence.
|
||||
Hence, we use a fence here.
|
||||
*/
|
||||
VK_CHECK_RESULT(vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000));
|
||||
|
||||
vkDestroyFence(device, fence, NULL);
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
/*
|
||||
Clean up all Vulkan Resources.
|
||||
*/
|
||||
|
||||
if (enableValidationLayers) {
|
||||
// destroy callback.
|
||||
auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT");
|
||||
if (func == nullptr) {
|
||||
printf("Could not load vkDestroyDebugReportCallbackEXT\n");
|
||||
return;
|
||||
}
|
||||
func(instance, debugReportCallback, NULL);
|
||||
}
|
||||
|
||||
vkFreeMemory(device, bufferMemory, NULL);
|
||||
vkDestroyBuffer(device, buffer, NULL);
|
||||
vkDestroyShaderModule(device, computeShaderModule, NULL);
|
||||
vkDestroyDescriptorPool(device, descriptorPool, NULL);
|
||||
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, NULL);
|
||||
vkDestroyPipelineLayout(device, pipelineLayout, NULL);
|
||||
vkDestroyPipeline(device, pipeline, NULL);
|
||||
vkDestroyCommandPool(device, commandPool, NULL);
|
||||
vkDestroyDevice(device, NULL);
|
||||
vkDestroyInstance(instance, NULL);
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
ComputeApplication app;
|
||||
|
||||
app.run();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue