New refactored HEVC encoder implementation

This commit is contained in:
mittorn 2024-10-12 00:19:00 +03:00
parent aec6bc23a6
commit e110e8b3d0
4 changed files with 1288 additions and 4 deletions

102
bitstream.h Normal file
View file

@ -0,0 +1,102 @@
#ifndef BITSTREAM_H
#define BITSTREAM_H
#include <stdlib.h>
struct BaseBitstream
{
constexpr static int max_size_in_dword = 4096;
unsigned int buffer[max_size_in_dword] = {0};
int bit_offset = 0;
static unsigned int Swap32(unsigned int val)
{
unsigned char *pval = (unsigned char *)&val;
return ((pval[0] << 24) |
(pval[1] << 16) |
(pval[2] << 8) |
(pval[3] << 0));
}
inline void End()
{
int pos = (bit_offset >> 5);
int orig_bit_offset = (bit_offset & 0x1f);
int bit_left = 32 - orig_bit_offset;
if (orig_bit_offset) {
buffer[pos] = Swap32((buffer[pos] << bit_left));
}
}
inline void PutUI(unsigned int val, int size_in_bits)
{
int pos = (bit_offset >> 5);
int orig_bit_offset = (bit_offset & 0x1f);
int bit_left = 32 - orig_bit_offset;
if (!size_in_bits)
return;
bit_offset += size_in_bits;
if (bit_left > size_in_bits) {
buffer[pos] = (buffer[pos] << size_in_bits | val);
} else {
size_in_bits -= bit_left;
buffer[pos] = (buffer[pos] << bit_left) | (val >> size_in_bits);
buffer[pos] = Swap32(buffer[pos]);
if (pos + 1 == max_size_in_dword) {
abort();
}
buffer[pos + 1] = val;
}
}
inline void PutUE(unsigned int val)
{
int size_in_bits = 0;
int tmp_val = ++val;
while (tmp_val) {
tmp_val >>= 1;
size_in_bits++;
}
PutUI(0, size_in_bits - 1); /* leading zero */
PutUI(val, size_in_bits);
}
inline void PutSE(int val)
{
unsigned int new_val;
if (val <= 0)
new_val = -2 * val;
else
new_val = 2 * val - 1;
PutUE(new_val);
}
inline void ByteAligning(int bit)
{
int orig_bit_offset = (bit_offset & 0x7);
int bit_left = 8 - orig_bit_offset;
int new_val;
if (!orig_bit_offset)
return;
if (bit)
new_val = (1 << bit_left) - 1;
else
new_val = 0;
PutUI(new_val, bit_left);
}
inline void RBSPTrailingBits()
{
PutUI(1, 1);
ByteAligning(0);
}
};
#endif // BITSTREAM_H

320
vaapi_encoder.h Normal file
View file

@ -0,0 +1,320 @@
#ifndef VAAPI_ENCODER_H
#define VAAPI_ENCODER_H
#include <cassert>
#include <cstdio>
#include <unistd.h>
#include <va/va.h>
#include <va/va_drm.h>
#include <va/va_drmcommon.h>
#include <pthread.h>
#include <fcntl.h>
#include "bitstream.h"
#define CHAIN_SIZE 4
struct VaapiEncoder
{
VADisplay dpy = nullptr;
VAContextID ctx;
VASurfaceID inputFrames[CHAIN_SIZE];
int width, height;
int frame_count;
int destroying;
VASurfaceID reference_picture[3];
pthread_t output_thread = 0;
pthread_mutex_t mutex;
pthread_cond_t output_cond;
VABufferID output_buf;
int output_fd = -1;
VAConfigAttrib attrib[VAConfigAttribTypeMax];
VAConfigID cfg;
inline void Destroy()
{
if(output_fd >= 0)
close(output_fd);
output_fd = -1;
if(output_thread)
{
pthread_mutex_lock(&mutex);
destroying = 1;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
pthread_join(output_thread, NULL);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&output_cond);
}
if(dpy)
{
if(ctx != VA_INVALID_ID)
{
vaDestroyContext(dpy, ctx);
vaDestroyConfig(dpy, cfg);
cfg = VA_INVALID_ID;
ctx = VA_INVALID_ID;
vaDestroySurfaces(dpy, reference_picture, 3);
vaDestroySurfaces(dpy, inputFrames, CHAIN_SIZE);
if(output_buf != VA_INVALID_ID)
vaDestroyBuffer(dpy, output_buf);
output_buf = VA_INVALID_ID;
}
vaTerminate(dpy);
dpy = nullptr;
}
}
bool SetupVA(VAProfile profile, int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
VASurfaceAttrib va_attribs[5];
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
VADRMPRIMESurfaceDescriptor drmSurface = {0};
VAStatus status;
int major, minor;
this->width = width;
this->height = height;
frame_count = 0;
destroying = 0;
dpy = vaGetDisplayDRM(drm_fd);
output_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
output_buf = VA_INVALID_ID;
status = vaInitialize(dpy, &major, &minor);
if(status != VA_STATUS_SUCCESS)
{
return false;
}
va_attrib_extbuf.pixel_format = VA_FOURCC_P010;
va_attrib_extbuf.width = width;
va_attrib_extbuf.height = height;
va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING;
va_attrib_extbuf.private_data = NULL;
VADRMFormatModifierList modList;
modList.modifiers = modifiers;
modList.num_modifiers = modifierscount;
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
va_attribs[1].type = VASurfaceAttribUsageHint;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypeInteger;
va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
va_attribs[2].type = VASurfaceAttribPixelFormat;
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[2].value.type = VAGenericValueTypeInteger;
va_attribs[2].value.value.i = VA_FOURCC_P010;
va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[3].value.type = VAGenericValueTypePointer;
va_attribs[3].value.value.p = &va_attrib_extbuf;
va_attribs[4].type = VASurfaceAttribDRMFormatModifiers;
va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[4].value.type = VAGenericValueTypePointer;
va_attribs[4].value.value.p = &modList;
status = vaCreateSurfaces(dpy, VA_RT_FORMAT_YUV420_10,
width, height, inputFrames, CHAIN_SIZE,
&va_attribs[0], 5);
for(int i = 0; i < CHAIN_SIZE; i++)
{
status = vaExportSurfaceHandle(dpy, inputFrames[i], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface );
//printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier);
dmabuf_fd[i] = drmSurface.objects[0].fd;
if(status != VA_STATUS_SUCCESS)
{
return false;
}
}
*mod = drmSurface.objects[0].drm_format_modifier;
*size = drmSurface.objects[0].size;
*offset = drmSurface.layers[1].offset[0];
*pitch1 = drmSurface.layers[0].pitch[0];
*pitch2 = drmSurface.layers[1].pitch[0];
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&output_cond, NULL);
pthread_create(&output_thread, NULL, &OutputThread, this);
for (int i = 0; i < VAConfigAttribTypeMax; i++)
attrib[i].type = (VAConfigAttribType)i;
vaGetConfigAttributes(dpy, profile, VAEntrypointEncSlice, attrib, VAConfigAttribTypeMax);
return true;
}
void PushOutput(VABufferID buf)
{
pthread_mutex_lock(&mutex);
/* The mutex is never released while encoding, so this point should
* never be reached if input.valid is true. */
assert(output_buf == VA_INVALID_ID);
output_buf = buf;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
}
static void *OutputThread(void *data)
{
VaapiEncoder *r = (VaapiEncoder*)data;
pthread_mutex_lock(&r->mutex);
while (!r->destroying) {
if (r->output_buf == VA_INVALID_ID)
pthread_cond_wait(&r->output_cond, &r->mutex);
/* If the thread is awaken by destroy_worker_thread(),
* there might not be valid input */
if (r->output_buf == VA_INVALID_ID)
continue;
vaSyncBuffer(r->dpy, r->output_buf, UINT64_MAX);
r->WriteOutput(r->output_buf);
vaDestroyBuffer(r->dpy, r->output_buf);
r->output_buf = VA_INVALID_ID;
}
pthread_mutex_unlock(&r->mutex);
return NULL;
}
bool WriteOutput(VABufferID buf)
{
VACodedBufferSegment *segment;
VAStatus status;
int count = 0;
status = vaMapBuffer(dpy, buf, (void **) &segment);
if (status != VA_STATUS_SUCCESS)
return false;
do
{
if (segment->status & VA_CODED_BUF_STATUS_SLICE_OVERFLOW_MASK) {
vaUnmapBuffer(dpy, buf);
// todo: warning or mark for restart in lq?
return false;
}
assert(segment->size);
count += write(output_fd, segment->buf, segment->size);
segment = (VACodedBufferSegment *)segment->next;
}
while(segment);
vaUnmapBuffer(dpy, buf);
if (count < 0)
return false;
return true;
}
bool CreatePackedBuffer(VABufferID &par, VABufferID &dat, VAEncPackedHeaderType type, const BaseBitstream &buf )
{
VAEncPackedHeaderParameterBuffer packed_header;
VAStatus status;
packed_header.type = type;
packed_header.bit_length = buf.bit_offset;
packed_header.has_emulation_bytes = 0;
status = vaCreateBuffer(dpy, ctx,
VAEncPackedHeaderParameterBufferType,
sizeof packed_header, 1, &packed_header,
&par);
if (status != VA_STATUS_SUCCESS)
return false;
status = vaCreateBuffer(dpy, ctx,
VAEncPackedHeaderDataBufferType,
(buf.bit_offset + 7) / 8, 1, (void*)buf.buffer, &dat);
if (status != VA_STATUS_SUCCESS) {
vaDestroyBuffer(dpy, par);
return false;
}
return true;
}
bool CreateContext(VAProfile profile, uint32_t format, uint32_t fourcc, uint32_t rc)
{
VASurfaceAttrib attrs[2] = { {VASurfaceAttribMemoryType, VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},},
{VASurfaceAttribPixelFormat,VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},} };
VAConfigAttrib cfg_attrib[2];
VAStatus status;
attrs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
attrs[1].value.value.i = fourcc;
status = vaCreateSurfaces(dpy, format,
width, height,
reference_picture, 3,
attrs, 2);
if(status != VA_STATUS_SUCCESS)
return false;
cfg_attrib[0].type = VAConfigAttribRTFormat;
cfg_attrib[0].value = format;
cfg_attrib[1].type = VAConfigAttribRateControl;
cfg_attrib[1].value = rc;
status = vaCreateConfig(dpy, profile,
VAEntrypointEncSlice, cfg_attrib, 2,
&cfg);
if(status != VA_STATUS_SUCCESS)
return false;
status = vaCreateContext(dpy, cfg, width, height, VA_PROGRESSIVE, NULL, 0, &ctx);
if(status != VA_STATUS_SUCCESS)
{
vaDestroyConfig(dpy, cfg);
cfg = VA_INVALID_ID;
return false;
}
return true;
}
VABufferID CreateOutputBuf(int output_size)
{
VABufferID output_buf;
VAStatus status;
status = vaCreateBuffer(dpy, ctx,
VAEncCodedBufferType, output_size,
1, NULL, &output_buf);
if (status == VA_STATUS_SUCCESS)
return output_buf;
abort();
return VA_INVALID_ID;
}
template <typename T>
VABufferID CreateMiscParameterBuffer(VAEncMiscParameterType tp, const T &data)
{
struct d{
VAEncMiscParameterType type;
union{
T data;
uint32_t pad[1];
} d;
} buf;
buf.type = tp;
buf.d.data = data;
assert(sizeof(buf) == sizeof(VAEncMiscParameterBuffer) + sizeof(T));
VABufferID buffer;
if( vaCreateBuffer(dpy, ctx, VAEncMiscParameterBufferType, sizeof(buf), 1, &buf, &buffer) == VA_STATUS_SUCCESS)
return buffer;
abort();
return VA_INVALID_ID;
}
template <typename T>
VABufferID CreateParamererBuffer(VABufferType tp, const T &data)
{
VABufferID buffer;
if( vaCreateBuffer(dpy, ctx, tp, sizeof(data), 1, (void*)&data, &buffer) == VA_STATUS_SUCCESS)
return buffer;
abort();
return VA_INVALID_ID;
}
};
#endif // VAAPI_ENCODER_H

855
vaapi_encoder_hevc.h Normal file
View file

@ -0,0 +1,855 @@
#ifndef VAAPI_ENCODER_HEVC_H
#define VAAPI_ENCODER_HEVC_H
#include "vaapi_encoder.h"
#include <cstring>
#include <va/va_enc_hevc.h>
#define USE_P010 1
#define NAL_REF_IDC_NONE 0
#define NAL_REF_IDC_LOW 1
#define NAL_REF_IDC_MEDIUM 2
#define NAL_REF_IDC_HIGH 3
// SLICE TYPE HEVC ENUM
enum {
SLICE_B = 0,
SLICE_P = 1,
SLICE_I = 2,
};
#define IS_I_SLICE(type) (SLICE_I == (type))
#define IS_P_SLICE(type) (SLICE_P == (type))
#define IS_B_SLICE(type) (SLICE_B == (type))
#define ENTROPY_MODE_CAVLC 0
#define ENTROPY_MODE_CABAC 1
#define PROFILE_IDC_MAIN 1
#define PROFILE_IDC_MAIN10 2
enum NALUType {
NALU_TRAIL_N = 0x00, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
NALU_TRAIL_R = 0x01, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
NALU_TSA_N = 0x02, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
NALU_TSA_R = 0x03, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
NALU_STSA_N = 0x04, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
NALU_STSA_R = 0x05, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
NALU_RADL_N = 0x06, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
NALU_RADL_R = 0x07, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
NALU_RASL_N = 0x08, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
NALU_RASL_R = 0x09, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
/* 0x0a..0x0f - Reserved */
NALU_BLA_W_LP = 0x10, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_BLA_W_DLP = 0x11, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_BLA_N_LP = 0x12, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_IDR_W_DLP = 0x13, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
NALU_IDR_N_LP = 0x14, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
NALU_CRA = 0x15, // Coded slice segment of an CRA picture - slice_segment_layer_rbsp, VLC
/* 0x16..0x1f - Reserved */
NALU_VPS = 0x20, // Video parameter set - video_parameter_set_rbsp, non-VLC
NALU_SPS = 0x21, // Sequence parameter set - seq_parameter_set_rbsp, non-VLC
NALU_PPS = 0x22, // Picture parameter set - pic_parameter_set_rbsp, non-VLC
NALU_AUD = 0x23, // Access unit delimiter - access_unit_delimiter_rbsp, non-VLC
NALU_EOS = 0x24, // End of sequence - end_of_seq_rbsp, non-VLC
NALU_EOB = 0x25, // End of bitsteam - end_of_bitsteam_rbsp, non-VLC
NALU_FD = 0x26, // Filler data - filler_data_rbsp, non-VLC
NALU_PREFIX_SEI = 0x27, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
NALU_SUFFIX_SEI = 0x28, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
/* 0x29..0x2f - Reserved */
/* 0x30..0x3f - Unspecified */
//this should be the last element of this enum
//chagne this value if NAL unit type increased
MAX_HEVC_NAL_TYPE = 0x3f,
};
struct BaseBitstreamHEVC : BaseBitstream
{
inline void NalStartCodePrefix(int nal_unit_type)
{
if (nal_unit_type == NALU_VPS ||
nal_unit_type == NALU_SPS ||
nal_unit_type == NALU_PPS ||
nal_unit_type == NALU_AUD)
PutUI(0x00000001, 32);
else
PutUI(0x000001, 24);
}
inline void NalHeader(int nal_unit_type)
{
PutUI(0, 1); /* forbidden_zero_bit: 0 */
PutUI(nal_unit_type, 6);
PutUI(0, 6);
PutUI(1, 3);
}
void ProfileTier(const VAEncSequenceParameterBufferHEVC *seq)
{
uint32_t i = 0;
PutUI(0, 2); // general_profile_space
PutUI(seq->general_tier_flag, 1); // general_tier_flag
PutUI(seq->general_profile_idc, 5); // general_profile_idc // real_hevc_profile
//for (i = 0; i < 32; i++)
//PutUI(protier_param.general_profile_compatibility_flag[i], 1);
//PutUI(1 << 30, 32);
// ptps->general_profile_compatibility_flag[ptps->general_profile_idc] = 1;
// todo: configurable flags?
for (i = 0; i < 32; i++)
PutUI(i == seq->general_profile_idc, 1);
PutUI(1, 1); //general_progressive_source_flag
PutUI(0, 1); // general_interlaced_source_flag
PutUI(1, 1); // general_non_packed_constraint_flag
PutUI(1, 1); // general_frame_only_constraint_flag
// reserved?
PutUI(0, 16);
PutUI(0, 16);
PutUI(0, 12);
PutUI(seq->general_level_idc, 8); // general_level_idc
//ptps->general_level_idc = 30;
//ptps->general_level_idc = ptps->general_level_idc * 4;
}
};
#define TEMPORAL_ID_NESTING 1
#define POC_BITS 16
#define ALIGN16(x) ((x+15)&~15)
struct PackedVPSHEVC : BaseBitstreamHEVC
{
PackedVPSHEVC(const VAEncSequenceParameterBufferHEVC *seq): BaseBitstreamHEVC()
{
NalStartCodePrefix(NALU_VPS);
NalHeader(NALU_VPS);
uint32_t i = 0;
PutUI(0, 4); // vps.vps_video_parameter_set_id
PutUI(3, 2); //vps_reserved_three_2bits
//vps_base_layer_internal_flag:1
//vps_base_layer_available_flag:1
PutUI(0, 6); //vps_reserved_zero_6bits
// vps_max_layers_minus1:0
PutUI(0, 3); // vps_max_sub_layers_minus1
PutUI(TEMPORAL_ID_NESTING, 1); // vps_temporal_id_nesting_flag
PutUI(0xFFFF, 16); //vps_reserved_0xffff_16bits
ProfileTier(seq);
PutUI(0, 1); // vps.vps_sub_layer_ordering_info_present_flag
// for (i = (vps.vps_sub_layer_ordering_info_present_flag ? 0 : vps.vps_max_sub_layers_minus1); i <= vps.vps_max_sub_layers_minus1; i++) {
// NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
// here just follow the spec 7.3.2.1
// todo: check this. At least, breaks some hevc parsers when set to 0
PutUE(1); //vps.vps_max_dec_pic_buffering_minus1[i]
PutUE(0);//vps.vps_max_num_reorder_pics[i]
PutUE(0);//vps.vps_max_latency_increase_plus1[i]
//}
//*/
PutUI(0, 6); // vps.vps_max_nuh_reserved_zero_layer_id
PutUE(0); // vps.vps_num_op_sets_minus1
PutUI(0, 1); // vps.vps_num_op_sets_minus1
/*&
if (vps.vps_timing_info_present_flag) {
PutUE(vps.vps_num_units_in_tick);
PutUE(vps.vps_time_scale);
PutUE(vps.vps_poc_proportional_to_timing_flag);
if (vps.vps_poc_proportional_to_timing_flag) {
PutUE(vps.vps_num_ticks_poc_diff_one_minus1);
}
PutUE(vps.vps_num_hrd_parameters);
for (i = 0; i < vps.vps_num_hrd_parameters; i++) {
PutUE(vps.hrd_layer_set_idx[i]);
if (i > 0) {
PutUI(vps.cprms_present_flag[i], 1);
}
}
}
*/
// todo: bitstream restrictions?
// no extension flag
PutUI(0, 1);
RBSPTrailingBits();
End();
}
};
struct PackedSPSHEVC : BaseBitstreamHEVC
{
PackedSPSHEVC(const VAEncSequenceParameterBufferHEVC *sps, int width, int height): BaseBitstreamHEVC()
{
NalStartCodePrefix(NALU_SPS);
NalHeader(NALU_SPS);
uint32_t i = 0;
PutUI(0, 4); // sps.sps_video_parameter_set_id
PutUI(0, 3); // sps.sps_max_sub_layers_minus1
PutUI(TEMPORAL_ID_NESTING, 1); // sps.sps_temporal_id_nesting_flag
ProfileTier(sps);
PutUE(0); // sps.sps_seq_parameter_set_id
PutUE(sps->seq_fields.bits.chroma_format_idc); // sps.chroma_format_idc
// 4:2:0
// todo: check if we can use 4:4:4/4:2:2
if (sps->seq_fields.bits.chroma_format_idc == 3) {
PutUI(sps->seq_fields.bits.separate_colour_plane_flag, 1);
}
PutUE(ALIGN16(width) ); //sps.pic_width_in_luma_samples
PutUE(ALIGN16(height)); //sps.pic_height_in_luma_samples
bool conformance_window_flag = ALIGN16(width) != width || ALIGN16(height) != height;
PutUI(1, 1); // sps.conformance_window_flag
if (conformance_window_flag) { // sps.
PutUE(0); // sps.conf_win_left_offset
PutUE((ALIGN16(width) - width) >> 1); // sps.conf_win_right_offset
PutUE(0); // sps.conf_win_top_offset
PutUE((ALIGN16(height) - height) >> 1); // sps.conf_win_bottom_offset
}
PutUE(sps->seq_fields.bits.bit_depth_luma_minus8); //sps.bit_depth_luma_minus8
PutUE(sps->seq_fields.bits.bit_depth_chroma_minus8); //sps.bit_depth_chroma_minus8
PutUE(POC_BITS - 4); // sps.log2_max_pic_order_cnt_lsb_minus4
PutUI(0 , 1); //sps.sps_sub_layer_ordering_info_present_flag
//for (i = (sps.sps_sub_layer_ordering_info_present_flag ? 0 : sps.sps_max_sub_layers_minus1); i <= sps.sps_max_sub_layers_minus1; i++) {
// NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
// here just follow the spec 7.3.2.2
// todo: check. At least, breaks some hevc parsers when set to 0
PutUE(0);//sps.sps_max_dec_pic_buffering_minus1[i]
PutUE(0);//sps.sps_max_num_reorder_pics[i]
PutUE(0); //sps.sps_max_latency_increase_plus1[i]
//}
PutUE(sps->log2_min_luma_coding_block_size_minus3); // sps.log2_min_luma_coding_block_size_minus3
PutUE(sps->log2_diff_max_min_luma_coding_block_size);//sps.log2_diff_max_min_luma_coding_block_size
PutUE(sps->log2_min_transform_block_size_minus2);//sps.log2_min_luma_transform_block_size_minus2
PutUE(sps->log2_diff_max_min_transform_block_size);//sps.log2_diff_max_min_luma_transform_block_size
PutUE(sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_inter
PutUE(sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_intra
assert(!sps->seq_fields.bits.scaling_list_enabled_flag);
// scaling_list_enabled_flag is set as 0 in fill_sps_header() for now
PutUI(0, 1); // sps.scaling_list_enabled_flag
/*if (sps.scaling_list_enabled_flag) {
PutUI(sps.sps_scaling_list_data_present_flag, 1);
if (sps.sps_scaling_list_data_present_flag) {
//scaling_list_data();
}
}*/
PutUI(sps->seq_fields.bits.amp_enabled_flag, 1); // sps.amp_enabled_flag
PutUI(sps->seq_fields.bits.sample_adaptive_offset_enabled_flag, 1); // sps.sample_adaptive_offset_enabled_flag
// pcm_enabled_flag is set as 0 in fill_sps_header() for now
PutUI(sps->seq_fields.bits.pcm_enabled_flag, 1);//sps.pcm_enabled_flag
assert(!sps->seq_fields.bits.pcm_enabled_flag);
if (sps->seq_fields.bits.pcm_enabled_flag) {
PutUI(sps->pcm_sample_bit_depth_luma_minus1, 4);
PutUI(sps->pcm_sample_bit_depth_chroma_minus1, 4);
PutUE(sps->log2_min_pcm_luma_coding_block_size_minus3);
PutUE(sps->log2_max_pcm_luma_coding_block_size_minus3 - sps->log2_min_pcm_luma_coding_block_size_minus3); //log2_diff_max_min_pcm_luma_coding_block_size
PutUI(sps->seq_fields.bits.pcm_loop_filter_disabled_flag, 1);
}
PutUE(1);// sps.num_short_term_ref_pic_sets
PutUE(1); // num_negative_pics
PutUE(0); // num_positive_pics
PutUE(0); //delta_poc_s0_minus1[i]
PutUI(1, 1);//used_by_curr_pic_s0_flag[i]
/*for (i = 0; i < sps.num_short_term_ref_pic_sets; i++) {
pack_short_term_ref_pic_setp(bs, &sps.strp[i], i == 0);
}*/
// long_term_ref_pics_present_flag is set as 0 in fill_sps_header() for now
PutUI(0, 1); // sps.long_term_ref_pics_present_flag
/*if (sps.long_term_ref_pics_present_flag) {
PutUE(sps.num_long_term_ref_pics_sps);
for (i = 0; i < sps.num_long_term_ref_pics_sps; i++) {
PutUE(sps.lt_ref_pic_poc_lsb_sps[i]);
PutUI(sps.used_by_curr_pic_lt_sps_flag[i], 1);
}
}*/
PutUI(sps->seq_fields.bits.sps_temporal_mvp_enabled_flag, 1); // sps.sps_temporal_mvp_enabled_flag
PutUI(sps->seq_fields.bits.strong_intra_smoothing_enabled_flag, 1); //sps.strong_intra_smoothing_enabled_flag
assert(!sps->vui_parameters_present_flag);
PutUI(sps->vui_parameters_present_flag, 1); // sps.vui_parameters_present_flag
PutUI(0, 1); // sps.sps_extension_present_flag
RBSPTrailingBits();
End();
}
};
#define PPS_CABAC_INIT_PRESENT_FLAG 1
struct PackedPPSHEVC : BaseBitstreamHEVC
{
PackedPPSHEVC(const VAEncPictureParameterBufferHEVC *pic): BaseBitstreamHEVC()
{
NalStartCodePrefix(NALU_PPS);
NalHeader(NALU_PPS);
uint32_t i = 0;
bool deblocking_filter_control_present_flag = false;
PutUE(0); // pps.pps_pic_parameter_set_id
PutUE(0); // pps.pps_seq_parameter_set_id
PutUI(pic->pic_fields.bits.dependent_slice_segments_enabled_flag, 1); // pps.dependent_slice_segments_enabled_flag // TODO: !!!
PutUI(0, 1); //pps.output_flag_present_flag
PutUI(0, 3); // pps.num_extra_slice_header_bits
PutUI(pic->pic_fields.bits.sign_data_hiding_enabled_flag, 1); //pps.sign_data_hiding_enabled_flag
PutUI(PPS_CABAC_INIT_PRESENT_FLAG, 1); // pps.cabac_init_present_flag
PutUE(pic->num_ref_idx_l0_default_active_minus1); //pps.num_ref_idx_l0_default_active_minus1
PutUE(pic->num_ref_idx_l1_default_active_minus1); //pps.num_ref_idx_l1_default_active_minus1
PutSE(pic->pic_init_qp - 26); //pps.init_qp_minus26
PutUI(pic->pic_fields.bits.constrained_intra_pred_flag, 1); //pps.constrained_intra_pred_flag
PutUI(pic->pic_fields.bits.transform_skip_enabled_flag, 1); //pps.transform_skip_enabled_flag
PutUI(pic->pic_fields.bits.cu_qp_delta_enabled_flag, 1); //pps.cu_qp_delta_enabled_flag
if (pic->pic_fields.bits.cu_qp_delta_enabled_flag) {
PutUE(pic->diff_cu_qp_delta_depth);//diff_cu_qp_delta_depth
}
PutSE(pic->pps_cb_qp_offset);//pps.pps_cb_qp_offset
PutSE(pic->pps_cr_qp_offset);//pps.pps_cr_qp_offset)
PutUI(0, 1);//pps.pps_slice_chroma_qp_offsets_present_flag
PutUI(pic->pic_fields.bits.weighted_pred_flag, 1);//pps.weighted_pred_flag
PutUI(pic->pic_fields.bits.weighted_bipred_flag, 1); //pps.weighted_bipred_flag
PutUI(pic->pic_fields.bits.transquant_bypass_enabled_flag, 1);//pps.transquant_bypass_enabled_flag
PutUI(pic->pic_fields.bits.tiles_enabled_flag, 1);//pps.tiles_enabled_flag
PutUI(pic->pic_fields.bits.entropy_coding_sync_enabled_flag, 1);//pps.entropy_coding_sync_enabled_flag
if (pic->pic_fields.bits.tiles_enabled_flag) {
bool uniform_spacing_flag = false;
PutUE(pic->num_tile_columns_minus1);
PutUE(pic->num_tile_rows_minus1);
PutUI(uniform_spacing_flag, 1);//uniform_spacing_flag
if (!uniform_spacing_flag) {
for (i = 0; i < pic->num_tile_columns_minus1; i++) {
PutUE(pic->column_width_minus1[i]);
}
for (i = 0; i < pic->num_tile_rows_minus1; i++) {
PutUE(pic->row_height_minus1[i]);
}
}
PutUI(pic->pic_fields.bits.loop_filter_across_tiles_enabled_flag, 1);
}
PutUI(pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag, 1); // pps.pps_loop_filter_across_slices_enabled_flag
PutUI(deblocking_filter_control_present_flag, 1);//pps.deblocking_filter_control_present_flag
if (deblocking_filter_control_present_flag) {
bool deblocking_filter_override_enabled_flag = false;
bool pps_deblocking_filter_disabled_flag = true;
PutUI(deblocking_filter_override_enabled_flag, 1);
PutUI(pps_deblocking_filter_disabled_flag, 1);
int beta_offset_div2 = 0;
int tc_offset_div2 = 0;
if (!pps_deblocking_filter_disabled_flag) {
PutSE(beta_offset_div2);
PutSE(tc_offset_div2);
}
}
// pps_scaling_list_data_present_flag is set as 0 in fill_pps_header() for now
assert(!pic->pic_fields.bits.scaling_list_data_present_flag);
PutUI(pic->pic_fields.bits.scaling_list_data_present_flag, 1);//pps.pps_scaling_list_data_present_flag
/*if (pps.pps_scaling_list_data_present_flag) {
//scaling_list_data();
}*/
PutUI(0, 1);//pps.lists_modification_present_flag
PutUE(0); //pps.log2_parallel_merge_level_minus2
PutUI(0, 1);//pps.slice_segment_header_extension_present_flag
PutUI(0, 1); //pps.pps_extension_present_flag
/*if (pps.pps_extension_present_flag) {
PutUI(pps.pps_range_extension_flag, 1);
PutUI(pps.pps_multilayer_extension_flag, 1);
PutUI(pps.pps_3d_extension_flag, 1);
PutUI(pps.pps_extension_5bits, 1);
}
if (pps.pps_range_extension_flag) {
if (pps.transform_skip_enabled_flag)
PutUE(pps.log2_max_transform_skip_block_size_minus2);
PutUI(pps.cross_component_prediction_enabled_flag, 1);
PutUI(pps.chroma_qp_offset_list_enabled_flag, 1);
if (pps.chroma_qp_offset_list_enabled_flag) {
PutUE(pps.diff_cu_chroma_qp_offset_depth);
PutUE(pps.chroma_qp_offset_list_len_minus1);
for (i = 0; i <= pps.chroma_qp_offset_list_len_minus1; i++) {
PutUE(pps.cb_qp_offset_list[i]);
PutUE(pps.cr_qp_offset_list[i]);
}
}
PutUE(pps.log2_sao_offset_scale_luma);
PutUE(pps.log2_sao_offset_scale_chroma);
}
*/
RBSPTrailingBits();
End();
}
};
struct PackedSliceHEVC : BaseBitstreamHEVC
{
PackedSliceHEVC(int framenum, const VAEncSequenceParameterBufferHEVC *sps, const VAEncSliceParameterBufferHEVC *slice, const VAEncPictureParameterBufferHEVC *pic): BaseBitstreamHEVC()
{
uint8_t nal_unit_type = NALU_TRAIL_R;
//int gop_ref_distance = ip_period;
int i = 0;
bool is_idr = framenum == 0;
int slice_type = is_idr?SLICE_I : SLICE_P;
int short_term_ref_pic_set_sps_flag = 1; // !is_idr;
int slice_qp_delta = slice->slice_qp_delta;
int pic_order_cnt_lsb = framenum;
if (pic_order_cnt_lsb == 0)
nal_unit_type = NALU_IDR_W_DLP;
NalStartCodePrefix(nal_unit_type);
NalHeader(nal_unit_type);
PutUI(1, 1);// first_slice_segment_in_pic_flag
// idr
if (nal_unit_type >= 16 && nal_unit_type <= 23)
PutUI(1, 1); //no_output_of_prior_pics_flag
PutUE(0);//slice_pic_parameter_set_id
/*if (!slice_header->first_slice_segment_in_pic_flag) {
if (slice_header->dependent_slice_segment_flag) {
PutUI(slice_header->dependent_slice_segment_flag, 1);
}
PutUI(slice_header->slice_segment_address,
(uint8_t)(ceil(log(slice_header->picture_height_in_ctus * slice_header->picture_width_in_ctus) / log(2.0))));
}*/
// !slice_header->dependent_slice_segment_flag
if (!slice->slice_fields.bits.dependent_slice_segment_flag) {
/*for (i = 0; i < pps->num_extra_slice_header_bits; i++) {
PutUI(slice_header->slice_reserved_undetermined_flag[i], 1);
}*/
PutUE(slice_type);
/*if (pps->output_flag_present_flag) {
PutUI(slice_header->pic_output_flag, 1);
}*/
if(sps->seq_fields.bits.separate_colour_plane_flag)
PutUI(slice->slice_fields.bits.colour_plane_id, 2);
if (!(nal_unit_type == NALU_IDR_W_DLP || nal_unit_type == NALU_IDR_N_LP)) {
// slice_header->pic_order_cnt_lsb
PutUI(pic_order_cnt_lsb, POC_BITS );//(sps->log2_max_pic_order_cnt_lsb_minus4 + 4)
PutUI(1, 1); // short_term_ref_pic_set_sps_flag
// assume we are only pushing I-slices on IDR frames, SPS only references (n-1)th frame for now
// this should be restored when p-slice references something different or using CRA frames
#if 0
if (!short_term_ref_pic_set_sps_flag) {
// refer to Teddi
if (sps->num_short_term_ref_pic_sets > 0)
PutUI(0, 1); // inter_ref_pic_set_prediction_flag, always 0 for now
PutUE(slice_header->strp.num_negative_pics);
PutUE(slice_header->strp.num_positive_pics);
// below chunks of codes (majorly two big 'for' blocks) are refering both
// Teddi and mv_encoder, they look kind of ugly, however, keep them as these
// since it will be pretty easy to update if change/update in Teddi side.
// According to Teddi, these are CModel Implementation.
int prev = 0;
int frame_cnt_in_gop = slice_header->pic_order_cnt_lsb / 2;
// this is the first big 'for' block
for (i = 0; i < slice_header->strp.num_negative_pics; i++) {
// Low Delay B case
if (1 == gop_ref_distance) {
PutUE(0 /*delta_poc_s0_minus1*/);
} else {
// For Non-BPyramid GOP i.e B0 type
if (num_active_ref_p > 1) {
// DeltaPOC Equals NumB
int DeltaPoc = -(int)(gop_ref_distance);
PutUE(prev - DeltaPoc - 1 /*delta_poc_s0_minus1*/);
} else {
// the big 'if' wraps here is -
// if (!slice_header->short_term_ref_pic_set_sps_flag)
// From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
// either for B-Prymid or first several frames in a GOP in multi-ref cases
// when there are not enough backward refs.
// So though there are really some codes under this 'else'in Teddi, don't
// want to introduce them in MEA to avoid confusion, and put an assert
// here to guard that there is new case we need handle in the future.
assert(0);
}
}
PutUI(1 /*used_by_curr_pic_s0_flag*/, 1);
}
prev = 0;
// this is the second big 'for' block
for (i = 0; i < slice_header->strp.num_positive_pics; i++) {
// Non-BPyramid GOP
if (num_active_ref_p > 1) {
// MultiRef Case
if (frame_cnt_in_gop < gop_ref_distance) {
int DeltaPoc = (int)(gop_ref_distance - frame_cnt_in_gop);
PutUE(DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
} else if (frame_cnt_in_gop > gop_ref_distance) {
int DeltaPoc = (int)(gop_ref_distance * slice_header->strp.num_negative_pics - frame_cnt_in_gop);
PutUE(DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
}
} else {
// the big 'if' wraps here is -
// if (!slice_header->short_term_ref_pic_set_sps_flag)
// From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
// either for B-Prymid or first several frames in a GOP in multi-ref cases
// when there are not enough backward refs.
// So though there are really some codes under this 'else'in Teddi, don't
// want to introduce them in MEA to avoid confusion, and put an assert
// here to guard that there is new case we need handle in the future.
assert(0);
}
PutUI(1 /*used_by_curr_pic_s1_flag*/, 1);
}
} else if (sps->num_short_term_ref_pic_sets > 1)
PutUI(slice_header->short_term_ref_pic_set_idx,
(uint8_t)(ceil(log(sps->num_short_term_ref_pic_sets) / log(2.0))));
#endif
// no long term refs
#if 0
if (sps->long_term_ref_pics_present_flag) {
if (sps->num_long_term_ref_pics_sps > 0)
PutUE(slice_header->num_long_term_sps);
PutUE(slice_header->num_long_term_pics);
}
#endif
if (sps->seq_fields.bits.sps_temporal_mvp_enabled_flag)
PutUI(slice->slice_fields.bits.slice_temporal_mvp_enabled_flag, 1);
}
if (sps->seq_fields.bits.sample_adaptive_offset_enabled_flag ) { // sample_adaptive_offset_enabled_flag
PutUI(slice->slice_fields.bits.slice_sao_luma_flag, 1);// slice_sao_luma_flag
PutUI(slice->slice_fields.bits.slice_sao_chroma_flag, 1);//slice_sao_chroma_flag
}
if (slice_type != SLICE_I) {
PutUI(slice->slice_fields.bits.num_ref_idx_active_override_flag, 1); //num_ref_idx_active_override_flag
if (slice->slice_fields.bits.num_ref_idx_active_override_flag) {
PutUE(slice->num_ref_idx_l0_active_minus1);
//if (slice->slice_type == SLICE_B)
//PutUE(slice->num_ref_idx_l1_active_minus1);
}
#if 0
if (pps->lists_modification_present_flag && slice_header->num_poc_total_cur > 1) {
/* ref_pic_list_modification */
PutUI(slice_header->ref_pic_list_modification_flag_l0, 1);
if (slice_header->ref_pic_list_modification_flag_l0) {
for (i = 0; i <= slice_header->num_ref_idx_l0_active_minus1; i++) {
PutUI(slice_header->list_entry_l0[i],
(uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
}
}
PutUI(slice_header->ref_pic_list_modification_flag_l1, 1);
if (slice_header->ref_pic_list_modification_flag_l1) {
for (i = 0; i <= slice_header->num_ref_idx_l1_active_minus1; i++) {
PutUI(slice_header->list_entry_l1[i],
(uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
}
}
}
#endif
/*if (slice_header->slice_type == SLICE_B) {
PutUI(slice_header->mvd_l1_zero_flag, 1);
}*/
if (PPS_CABAC_INIT_PRESENT_FLAG) {
PutUI(slice->slice_fields.bits.cabac_init_flag, 1); //slice_header->cabac_init_present_flag
}
if (slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) {
int collocated_from_l0_flag = 1;
if (slice->slice_type == SLICE_B) {
collocated_from_l0_flag = slice->slice_fields.bits.collocated_from_l0_flag;
PutUI(collocated_from_l0_flag , 1);
}
if (((collocated_from_l0_flag && (slice->num_ref_idx_l0_active_minus1 > 0)) ||
(!collocated_from_l0_flag && (slice->num_ref_idx_l1_active_minus1 > 0)))) {
PutUE(pic->num_ref_idx_l0_default_active_minus1); // collocated_ref_idx
}
}
PutUE(5 - slice->max_num_merge_cand);//slice_header->five_minus_max_num_merge_cand
}
PutSE(slice_qp_delta);
/*if (pps->chroma_qp_offset_list_enabled_flag) {
PutSE(slice_header->slice_qp_delta_cb);
PutSE(slice_header->slice_qp_delta_cr);
}
if (pps->deblocking_filter_override_enabled_flag) {
PutUI(slice_header->deblocking_filter_override_flag, 1);
}
if (slice_header->deblocking_filter_override_flag) {
PutUI(slice_header->disable_deblocking_filter_flag, 1);
if (!slice_header->disable_deblocking_filter_flag) {
PutSE(slice_header->beta_offset_div2);
PutSE(slice_header->tc_offset_div2);
}
}*/
if (pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag &&
(slice->slice_fields.bits.slice_sao_luma_flag || slice->slice_fields.bits.slice_sao_chroma_flag ||
!slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) {
PutUI(slice->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag, 1);
}
}
if ((pic->pic_fields.bits.tiles_enabled_flag) || (pic->pic_fields.bits.entropy_coding_sync_enabled_flag)) {
int num_entry_point_offsets = 0, offset_len_minus1 = 0;
PutUE(num_entry_point_offsets);
if (num_entry_point_offsets > 0) {
PutUE(offset_len_minus1);
}
}
/*if (pps->slice_segment_header_extension_present_flag) {
int slice_header_extension_length = 0;
PutUE(slice_header_extension_length);
}*/
RBSPTrailingBits();
End();
}
};
struct VaapiEncoderHEVC: VaapiEncoder
{
VAEncSequenceParameterBufferHEVC seq;
VAEncPictureParameterBufferHEVC pic;
VAEncSliceParameterBufferHEVC slice;
void InitParameters()
{
int width_in_mbs, height_in_mbs;
int frame_cropping_flag = 0;
int frame_crop_bottom_offset = 0;
width_in_mbs = (width + 15) / 16;
height_in_mbs = (height + 15) / 16;
// sps
// seems work
seq.seq_fields.bits.sps_temporal_mvp_enabled_flag = 1;
// todo: check if we can use 4:4:4/4:2:2
seq.seq_fields.bits.chroma_format_idc = 1; // 4:2:0
seq.pic_width_in_luma_samples = ALIGN16(width);
seq.pic_height_in_luma_samples = ALIGN16(height);
// todo: tunable block size
seq.log2_diff_max_min_luma_coding_block_size = 2;
seq.log2_diff_max_min_transform_block_size = 3;
seq.log2_min_transform_block_size_minus2 = 0;
seq.max_transform_hierarchy_depth_inter = 2;
seq.max_transform_hierarchy_depth_intra = 2;
seq.seq_fields.bits.amp_enabled_flag = 1;
// broken on intel? or broken bistream?
seq.seq_fields.bits.sample_adaptive_offset_enabled_flag = 0;
seq.intra_idr_period = 32767;
seq.intra_period = 32767;
seq.ip_period = 1;
if(USE_P010)
{
seq.seq_fields.bits.bit_depth_chroma_minus8 = 2;
seq.seq_fields.bits.bit_depth_luma_minus8 = 2;
}
seq.bits_per_second = 150*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50;
// vps
// (none?)
// profile
seq.general_level_idc = 120;
seq.general_profile_idc = USE_P010? 2: 1;
// pps
pic.pic_fields.bits.dependent_slice_segments_enabled_flag = 1; // seens work both
pic.pic_fields.bits.transform_skip_enabled_flag = 1;
// pic
pic.collocated_ref_pic_index = 0;//255;
pic.pic_init_qp = 26;
pic.nal_unit_type = NALU_IDR_W_DLP;
pic.pic_fields.bits.idr_pic_flag = 1;
pic.pic_fields.bits.coding_type = 1;
pic.pic_fields.bits.reference_pic_flag = 1;
// seems work
pic.pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = 1;
pic.pic_fields.bits.cu_qp_delta_enabled_flag = 1; // CBR
if(pic.pic_fields.bits.cu_qp_delta_enabled_flag)
pic.diff_cu_qp_delta_depth = 2;
for(int i = 0; i < 15; i++)
{
pic.reference_frames[i].picture_id = VA_INVALID_SURFACE;
pic.reference_frames[i].flags = VA_PICTURE_HEVC_INVALID;
pic.reference_frames[i].pic_order_cnt = 0;
}
// slice
//slice.slice_fields.bits.num_ref_idx_active_override_flag = 0;
slice.slice_qp_delta = 0;
int lcu_size = 32; // todo: block size settings?
int picture_width_in_ctus = (width + lcu_size - 1) / lcu_size;
int picture_height_in_ctus = (height + lcu_size - 1) / lcu_size;
slice.num_ctu_in_slice = picture_width_in_ctus * picture_height_in_ctus;
slice.max_num_merge_cand = 5;
// seems works
//slice.slice_fields.bits.collocated_from_l0_flag = 1;
// broken on intel???
//slice.slice_fields.bits.slice_sao_chroma_flag = 1;
//slice.slice_fields.bits.slice_sao_luma_flag = 1;
memset((void*)slice.ref_pic_list0, -1, sizeof(slice.ref_pic_list0));
memset((void*)slice.ref_pic_list1, -1, sizeof(slice.ref_pic_list1));
for(int i = 0; i < 15; i++)
{
slice.ref_pic_list0[i].flags = -1;//VA_PICTURE_HEVC_INVALID;
slice.ref_pic_list0[i].picture_id = VA_INVALID_SURFACE;
slice.ref_pic_list0[i].pic_order_cnt = -1;
slice.ref_pic_list1[i].flags = -1;//VA_PICTURE_HEVC_INVALID;
slice.ref_pic_list1[i].picture_id = VA_INVALID_SURFACE;
slice.ref_pic_list1[i].pic_order_cnt = -1;
}
}
bool Setup(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
VAProfile profile = VAProfileHEVCMain10;
if(!SetupVA(profile, drm_fd, width, height, filename, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount))
{
VaapiEncoder::Destroy();
return false;
}
if(!CreateContext(profile, VA_RT_FORMAT_YUV420_10, VA_FOURCC_P010, VA_RC_CBR))
{
VaapiEncoder::Destroy();
return false;
}
InitParameters();
return true;
}
inline void EncodeIDR(int idx)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
VABufferID seqb = CreateParamererBuffer(VAEncSequenceParameterBufferType, seq);
slice.slice_type = SLICE_I;
slice.ref_pic_list0[0].pic_order_cnt = -1;
slice.ref_pic_list0[0].picture_id = VA_INVALID_SURFACE;
slice.ref_pic_list0[0].flags = -1;
VABufferID sliceb = CreateParamererBuffer(VAEncSliceParameterBufferType, slice);
VABufferID output = CreateOutputBuf(width * height);
pic.decoded_curr_pic.picture_id = reference_picture[frame_count % 2];
pic.decoded_curr_pic.pic_order_cnt = frame_count;
pic.reference_frames[0].picture_id = VA_INVALID_SURFACE;
pic.reference_frames[0].flags = VA_PICTURE_HEVC_INVALID;
pic.reference_frames[0].pic_order_cnt = 0;
pic.coded_buf = output;
pic.pic_fields.bits.idr_pic_flag = 1;
pic.nal_unit_type = NALU_IDR_W_DLP;
pic.pic_fields.bits.coding_type = 1;
VABufferID picb = CreateParamererBuffer(VAEncPictureParameterBufferType, pic);
VABufferID fpsb = CreateMiscParameterBuffer(VAEncMiscParameterTypeFrameRate,VAEncMiscParameterFrameRate{.framerate = 90} );
VABufferID hrdb = CreateMiscParameterBuffer(VAEncMiscParameterTypeHRD, VAEncMiscParameterHRD{});
VABufferID rcb = CreateMiscParameterBuffer(VAEncMiscParameterTypeRateControl, VAEncMiscParameterRateControl{
.bits_per_second = 150*1024*1024,
.target_percentage = 66,
.window_size = 1000,
.initial_qp = 25
});
VABufferID ppps[2];
VABufferID pvps[2];
VABufferID psps[2];
CreatePackedBuffer(pvps[0], pvps[1], VAEncPackedHeaderSequence, PackedVPSHEVC(&seq));
CreatePackedBuffer(psps[0], psps[1], VAEncPackedHeaderSequence, PackedSPSHEVC(&seq, width, height));
CreatePackedBuffer(ppps[0], ppps[1], VAEncPackedHeaderPicture, PackedPPSHEVC(&pic));
VABufferID pslice[2];
CreatePackedBuffer(pslice[0], pslice[1], VAEncPackedHeaderSlice,
PackedSliceHEVC(frame_count, &seq, &slice, &pic));
VABufferID buffers[] = {seqb, pvps[0],pvps[1],psps[0], psps[1], fpsb, hrdb, rcb, ppps[0], ppps[1], picb, pslice[0], pslice[1], sliceb };
vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) );
status = vaEndPicture(dpy, ctx);
if(status != VA_STATUS_SUCCESS)
abort();
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;
}
inline void EncodeP(int idx)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
// todo: chain slice/output buffers, patch POC in slice buffers???
slice.slice_type = SLICE_P;
slice.ref_pic_list0[0].pic_order_cnt = frame_count - 1;
slice.ref_pic_list0[0].picture_id = reference_picture[(frame_count - 1)% 2];
slice.ref_pic_list0[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE;
VABufferID sliceb = CreateParamererBuffer(VAEncSliceParameterBufferType, slice);
VABufferID output = CreateOutputBuf(width * height);
pic.decoded_curr_pic.picture_id = reference_picture[frame_count % 2];
pic.decoded_curr_pic.pic_order_cnt = frame_count;
pic.reference_frames[0].picture_id = reference_picture[(frame_count + 1) % 2];
pic.reference_frames[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE;
pic.reference_frames[0].pic_order_cnt = frame_count - 1;
pic.coded_buf = output;
pic.pic_fields.bits.idr_pic_flag = 0;
pic.nal_unit_type = NALU_TRAIL_R;
pic.pic_fields.bits.coding_type = 2;
VABufferID picb = CreateParamererBuffer(VAEncPictureParameterBufferType, pic);
VABufferID pslice[2];
CreatePackedBuffer(pslice[0], pslice[1], VAEncPackedHeaderSlice,
PackedSliceHEVC(frame_count, &seq, &slice, &pic));
VABufferID buffers[] = {picb, pslice[0], pslice[1], sliceb };
vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) );
status = vaEndPicture(dpy, ctx);
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;
}
};
#endif // VAAPI_ENCODER_HEVC_H

View file

@ -35,7 +35,8 @@
#include <sys/ioctl.h>
#include <stdlib.h>
#include <unistd.h>
#include "vaapi-recorder.h"
//#include "vaapi-recorder.h"
#include "vaapi_encoder_hevc.h"
struct DrmHelper
{
@ -216,7 +217,7 @@ struct ComputeApplication {
VkCommandBuffer commandBuffer;
UBO *pMappedUBO = NULL;
VkFence fence;
bool running;
bool running = false;
} chain[CHAIN_SIZE];
@ -1347,7 +1348,9 @@ struct ComputeApplication {
int fd[CHAIN_SIZE];
uint64_t modifiers[32];
int count = getAvailiableModifiersList(modifiers, 32, VK_FORMAT_R16_UNORM);
auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
//auto *r = vaapi_recorder_create5(drm_fd, WIDTH, HEIGHT, "out.264", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
VaapiEncoderHEVC enc = {};
enc.Setup(drm_fd, WIDTH, HEIGHT, "out.265", fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count);
for(int i = 0; i < CHAIN_SIZE; i++)
{
createUBO(i);
@ -1375,7 +1378,11 @@ struct ComputeApplication {
#ifndef SKIP_FENCE_SYNC
waitFence(chidx);
#endif
recorder_frame4(r, chidx);
//recorder_frame4(r, chidx);
if(frameNum == 1)
enc.EncodeIDR(chidx);
else
enc.EncodeP(chidx);
chain[chidx].pMappedUBO->frameNum = frameNum;
}