p010/nv12 formats

This commit is contained in:
mittorn 2024-10-29 17:16:46 +03:00
parent ba7d9abe25
commit 13ff80a9d0

View file

@ -57,10 +57,10 @@ struct DecoderImage
};
void CreateSoftwareImage(VkInstance inst, VulkanDevice &dev, DecoderImage &image, const VkSamplerYcbcrConversionInfo &ycbcr_info)
void CreateSoftwareImage(VkInstance inst, VulkanDevice &dev, DecoderImage &image, const VkSamplerYcbcrConversionInfo &ycbcr_info, VkFormat format)
{
CallWith(Image2dInfo(VK_IMAGE_USAGE_SAMPLED_BIT, VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, 1920, 1080,
CallWith(Image2dInfo(VK_IMAGE_USAGE_SAMPLED_BIT, format, 1920, 1080,
$(flags) = VK_IMAGE_CREATE_DISJOINT_BIT,
$(tiling) = VK_IMAGE_TILING_LINEAR,
$(initialLayout) = VK_IMAGE_LAYOUT_PREINITIALIZED ),
@ -98,7 +98,7 @@ void CreateSoftwareImage(VkInstance inst, VulkanDevice &dev, DecoderImage &image
CallWith($M(VkImageViewCreateInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, &ycbcr_info},
$(image) = image.image, $(viewType) = VK_IMAGE_VIEW_TYPE_2D,
$(format) = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
$(format) = format,
$(subresourceRange) = SubresourceRange()),
vkCreateImageView(dev.device, &ref, NULL, &image.image_view));
vkMapMemory(dev.device, image.image_memory, 0, image_size, 0, (void**)&image.pMappedData);
@ -139,12 +139,39 @@ static int WaitActiveFrame()
pthread_mutex_unlock(&gFF.mutex);
return ret;
}
template <bool p010>
static enum AVPixelFormat my_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt)
{
return AV_PIX_FMT_YUV420P;
return p010?AV_PIX_FMT_YUV420P10LE:AV_PIX_FMT_YUV420P;
}
template<typename pixel>
void ConvertFrame(AVFrame *frame, const DecoderImage &img)
{
constexpr uint8_t pad = sizeof(pixel) > 1?6:0;
for(int i = 0; i < frame->height; i++)
{
pixel *data = (pixel*)(img.pMappedData + img.stride0 * i);
pixel *src = (pixel*)(frame->data[0] + frame->linesize[0]*i);
for(int j = 0; j < frame->linesize[0]/sizeof(pixel); j++)
data[j] = src[j] << pad;
}
// ffmpeg cannot output 2 planes
for(int i = 0; i < frame->height / 2; i++)
{
pixel *data = (pixel*)(img.pMappedData + img.memory_offset_plane1 + img.stride1*i);
pixel *src1 = (pixel*)(frame->data[1] + frame->linesize[1]*i);
pixel *src2 = (pixel*)(frame->data[2] + frame->linesize[2]*i);
for(int j = 0; j < frame->linesize[1]/sizeof(pixel); j++)
{
data[j*2] = src1[j] << pad;
data[j*2+1] = src2[j] << pad;
}
}
}
template <bool p010>
static void *DecoderThread(void*)
{
AVFormatContext *input_ctx = NULL;
@ -156,7 +183,7 @@ static void *DecoderThread(void*)
avformat_find_stream_info(input_ctx, NULL);
video = av_find_best_stream(input_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, PointerWrap(&decoder), 0);
decoder_ctx = avcodec_alloc_context3(decoder);
decoder_ctx->get_format = my_get_format;
decoder_ctx->get_format = my_get_format<p010>;
avcodec_open2(decoder_ctx, decoder, NULL);
packet = av_packet_alloc();
AVFrame *frame = av_frame_alloc();
@ -174,19 +201,10 @@ static void *DecoderThread(void*)
if(res < 0)
break;
idx++;
DecoderImage &img = gFF.images[idx & 3];
// todo: wait fence?
for(int i = 0; i < frame->height; i++)
memcpy(img.pMappedData + img.memory_offset_plane0 + img.stride0 * i, frame->data[0] + frame->linesize[0]*i, frame->linesize[0]);
// ffmpeg cannot NV12????
for(int i = 0; i < frame->height / 2; i++)
for(int j = 0; j < frame->linesize[1]; j++)
{
*(img.pMappedData + img.memory_offset_plane1 + img.stride1*i +j * 2) = *(frame->data[1] + frame->linesize[1]* i + j);
*(img.pMappedData + img.memory_offset_plane1 + img.stride1*i +j * 2+1) = *(frame->data[2] + frame->linesize[2]* i + j);
}
//memcpy(img.pMappedData + img.memory_offset_plane1 , frame->data[1], frame->linesize[1] * frame->height/2);
//assert(!frame->data[2]);
if constexpr(p010)
ConvertFrame<uint16_t>(frame,gFF.images[idx & 3]);
else
ConvertFrame<uint8_t>(frame,gFF.images[idx & 3]);
pthread_mutex_lock(&gFF.mutex);
gFF.decode_index = idx;
pthread_cond_signal(&gFF.cond);
@ -200,18 +218,22 @@ static void *DecoderThread(void*)
}
return NULL;
}
template <bool p010>
static void SetupFFMpeg(VkInstance inst, VulkanDevice &dev)
{
PFN_vkCreateSamplerYcbcrConversion pvkCreateSamplerYcbcrConversion = (PFN_vkCreateSamplerYcbcrConversion)vkGetInstanceProcAddr(inst, "vkCreateSamplerYcbcrConversionKHR");
VkFormat format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
// intel does not support 10 bit, causing validation errors (but works!), but 16 bit layout is very similar;
if constexpr(p010)
format = VK_FORMAT_G16_B16R16_2PLANE_420_UNORM;//VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
CallWith($M(VkSamplerYcbcrConversionCreateInfo{VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO},
$(format) = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
$(format) = format,
$(ycbcrModel) = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
$(ycbcrRange) = VK_SAMPLER_YCBCR_RANGE_ITU_FULL,
$(xChromaOffset) = VK_CHROMA_LOCATION_COSITED_EVEN, // zero
$(yChromaOffset) = VK_CHROMA_LOCATION_COSITED_EVEN,
$(xChromaOffset) = VK_CHROMA_LOCATION_MIDPOINT, // zero
$(yChromaOffset) = VK_CHROMA_LOCATION_MIDPOINT,
$(chromaFilter) = VK_FILTER_NEAREST), // zero
pvkCreateSamplerYcbcrConversion(dev.device, &ref, NULL, &gFF.ycbcr_sampler_conversion));
@ -229,10 +251,10 @@ static void SetupFFMpeg(VkInstance inst, VulkanDevice &dev)
vkCreateSampler(dev.device, &ref, NULL, &gFF.ycbcr_sampler);
);
for(int i = 0; i < MAX_DECODER_FRAMES; i++)
CreateSoftwareImage(inst, dev, gFF.images[i], ycbcr_info);
CreateSoftwareImage(inst, dev, gFF.images[i], ycbcr_info, format);
pthread_mutex_init(&gFF.mutex, NULL);
pthread_cond_init(&gFF.cond, NULL);
pthread_create(&gFF.thread, NULL, &DecoderThread, NULL);
pthread_create(&gFF.thread, NULL, &DecoderThread<p010>, NULL);
}
@ -494,7 +516,7 @@ struct GraphicsApplication
CreateSwapchain(width,height, !compute);
printf("%d swapchain images\n", numSwapchainImages);
dev.CreateAndMap(uboBuf, sizeof(UBO));
SetupFFMpeg(context.instance, dev);
SetupFFMpeg<false>(context.instance, dev);
for(int i = 0; i < RECONSTRUCTION_TARGET_FRAMES; i++)
{
VkSemaphoreCreateInfo semaphoreCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};