#ifndef VAAPI_ENCODER_HEVC_H #define VAAPI_ENCODER_HEVC_H #include "vaapi_encoder.h" #include #include #define NAL_REF_IDC_NONE 0 #define NAL_REF_IDC_LOW 1 #define NAL_REF_IDC_MEDIUM 2 #define NAL_REF_IDC_HIGH 3 // SLICE TYPE HEVC ENUM enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2, }; #define HEVC_IS_I_SLICE(type) (SLICE_I == (type)) #define HEVC_IS_P_SLICE(type) (SLICE_P == (type)) #define HEVC_IS_B_SLICE(type) (SLICE_B == (type)) #define ENTROPY_MODE_CAVLC 0 #define ENTROPY_MODE_CABAC 1 #define HEVC_PROFILE_IDC_MAIN 1 #define HEVC_PROFILE_IDC_MAIN10 2 enum NALUType { NALU_TRAIL_N = 0x00, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC NALU_TRAIL_R = 0x01, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC NALU_TSA_N = 0x02, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC NALU_TSA_R = 0x03, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC NALU_STSA_N = 0x04, // Coded slice of an STSA picture - slice_layer_rbsp, VLC NALU_STSA_R = 0x05, // Coded slice of an STSA picture - slice_layer_rbsp, VLC NALU_RADL_N = 0x06, // Coded slice of an RADL picture - slice_layer_rbsp, VLC NALU_RADL_R = 0x07, // Coded slice of an RADL picture - slice_layer_rbsp, VLC NALU_RASL_N = 0x08, // Coded slice of an RASL picture - slice_layer_rbsp, VLC NALU_RASL_R = 0x09, // Coded slice of an RASL picture - slice_layer_rbsp, VLC /* 0x0a..0x0f - Reserved */ NALU_BLA_W_LP = 0x10, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_BLA_W_DLP = 0x11, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_BLA_N_LP = 0x12, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_IDR_W_DLP = 0x13, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC NALU_IDR_N_LP = 0x14, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC NALU_CRA = 0x15, // Coded slice segment of an CRA picture - slice_segment_layer_rbsp, VLC /* 0x16..0x1f - Reserved */ NALU_VPS = 0x20, // Video parameter set - video_parameter_set_rbsp, non-VLC NALU_SPS = 0x21, // Sequence parameter set - seq_parameter_set_rbsp, non-VLC NALU_PPS = 0x22, // Picture parameter set - pic_parameter_set_rbsp, non-VLC NALU_AUD = 0x23, // Access unit delimiter - access_unit_delimiter_rbsp, non-VLC NALU_EOS = 0x24, // End of sequence - end_of_seq_rbsp, non-VLC NALU_EOB = 0x25, // End of bitsteam - end_of_bitsteam_rbsp, non-VLC NALU_FD = 0x26, // Filler data - filler_data_rbsp, non-VLC NALU_PREFIX_SEI = 0x27, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC NALU_SUFFIX_SEI = 0x28, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC /* 0x29..0x2f - Reserved */ /* 0x30..0x3f - Unspecified */ //this should be the last element of this enum //chagne this value if NAL unit type increased MAX_HEVC_NAL_TYPE = 0x3f, }; struct BaseBitstreamHEVC : BaseBitstream<16> { inline void NalStartCodePrefix(int nal_unit_type) { if (nal_unit_type == NALU_VPS || nal_unit_type == NALU_SPS || nal_unit_type == NALU_PPS || nal_unit_type == NALU_AUD) PutUI(0x00000001, 32); else PutUI(0x000001, 24); } inline void NalHeader(int nal_unit_type) { PutUI(0, 1); /* forbidden_zero_bit: 0 */ PutUI(nal_unit_type, 6); PutUI(0, 6); PutUI(1, 3); } void ProfileTier(const VAEncSequenceParameterBufferHEVC *seq) { uint32_t i = 0; PutUI(0, 2); // general_profile_space PutUI(seq->general_tier_flag, 1); // general_tier_flag PutUI(seq->general_profile_idc, 5); // general_profile_idc // real_hevc_profile //for (i = 0; i < 32; i++) //PutUI(protier_param.general_profile_compatibility_flag[i], 1); //PutUI(1 << 30, 32); // ptps->general_profile_compatibility_flag[ptps->general_profile_idc] = 1; // todo: configurable flags? for (i = 0; i < 32; i++) PutUI(i == seq->general_profile_idc, 1); PutUI(1, 1); //general_progressive_source_flag PutUI(0, 1); // general_interlaced_source_flag PutUI(1, 1); // general_non_packed_constraint_flag PutUI(1, 1); // general_frame_only_constraint_flag // reserved? PutUI(0, 16); PutUI(0, 16); PutUI(0, 12); PutUI(seq->general_level_idc, 8); // general_level_idc //ptps->general_level_idc = 30; //ptps->general_level_idc = ptps->general_level_idc * 4; } }; #define TEMPORAL_ID_NESTING 1 #define POC_BITS 16 #define ALIGN16(x) ((x+15)&~15) struct PackedVPSHEVC : BaseBitstreamHEVC { PackedVPSHEVC(const VAEncSequenceParameterBufferHEVC *seq): BaseBitstreamHEVC() { NalStartCodePrefix(NALU_VPS); NalHeader(NALU_VPS); //uint32_t i = 0; PutUI(0, 4); // vps.vps_video_parameter_set_id PutUI(3, 2); //vps_reserved_three_2bits //vps_base_layer_internal_flag:1 //vps_base_layer_available_flag:1 PutUI(0, 6); //vps_reserved_zero_6bits // vps_max_layers_minus1:0 PutUI(0, 3); // vps_max_sub_layers_minus1 PutUI(TEMPORAL_ID_NESTING, 1); // vps_temporal_id_nesting_flag PutUI(0xFFFF, 16); //vps_reserved_0xffff_16bits ProfileTier(seq); PutUI(0, 1); // vps.vps_sub_layer_ordering_info_present_flag // for (i = (vps.vps_sub_layer_ordering_info_present_flag ? 0 : vps.vps_max_sub_layers_minus1); i <= vps.vps_max_sub_layers_minus1; i++) { // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering. // here just follow the spec 7.3.2.1 // todo: check this. At least, breaks some hevc parsers when set to 0 PutUE(1); //vps.vps_max_dec_pic_buffering_minus1[i] PutUE(0);//vps.vps_max_num_reorder_pics[i] PutUE(0);//vps.vps_max_latency_increase_plus1[i] //} //*/ PutUI(0, 6); // vps.vps_max_nuh_reserved_zero_layer_id PutUE(0); // vps.vps_num_op_sets_minus1 PutUI(0, 1); // vps.vps_num_op_sets_minus1 /*& if (vps.vps_timing_info_present_flag) { PutUE(vps.vps_num_units_in_tick); PutUE(vps.vps_time_scale); PutUE(vps.vps_poc_proportional_to_timing_flag); if (vps.vps_poc_proportional_to_timing_flag) { PutUE(vps.vps_num_ticks_poc_diff_one_minus1); } PutUE(vps.vps_num_hrd_parameters); for (i = 0; i < vps.vps_num_hrd_parameters; i++) { PutUE(vps.hrd_layer_set_idx[i]); if (i > 0) { PutUI(vps.cprms_present_flag[i], 1); } } } */ // todo: bitstream restrictions? // no extension flag PutUI(0, 1); RBSPTrailingBits(); End(); } }; struct PackedSPSHEVC : BaseBitstreamHEVC { PackedSPSHEVC(const VAEncSequenceParameterBufferHEVC *sps, int width, int height): BaseBitstreamHEVC() { NalStartCodePrefix(NALU_SPS); NalHeader(NALU_SPS); //uint32_t i = 0; PutUI(0, 4); // sps.sps_video_parameter_set_id PutUI(0, 3); // sps.sps_max_sub_layers_minus1 PutUI(TEMPORAL_ID_NESTING, 1); // sps.sps_temporal_id_nesting_flag ProfileTier(sps); PutUE(0); // sps.sps_seq_parameter_set_id PutUE(sps->seq_fields.bits.chroma_format_idc); // sps.chroma_format_idc // 4:2:0 // todo: check if we can use 4:4:4/4:2:2 if (sps->seq_fields.bits.chroma_format_idc == 3) { PutUI(sps->seq_fields.bits.separate_colour_plane_flag, 1); } PutUE(ALIGN16(width) ); //sps.pic_width_in_luma_samples PutUE(ALIGN16(height)); //sps.pic_height_in_luma_samples bool conformance_window_flag = ALIGN16(width) != width || ALIGN16(height) != height; PutUI(1, 1); // sps.conformance_window_flag if (conformance_window_flag) { // sps. PutUE(0); // sps.conf_win_left_offset PutUE((ALIGN16(width) - width) >> 1); // sps.conf_win_right_offset PutUE(0); // sps.conf_win_top_offset PutUE((ALIGN16(height) - height) >> 1); // sps.conf_win_bottom_offset } PutUE(sps->seq_fields.bits.bit_depth_luma_minus8); //sps.bit_depth_luma_minus8 PutUE(sps->seq_fields.bits.bit_depth_chroma_minus8); //sps.bit_depth_chroma_minus8 PutUE(POC_BITS - 4); // sps.log2_max_pic_order_cnt_lsb_minus4 PutUI(0 , 1); //sps.sps_sub_layer_ordering_info_present_flag //for (i = (sps.sps_sub_layer_ordering_info_present_flag ? 0 : sps.sps_max_sub_layers_minus1); i <= sps.sps_max_sub_layers_minus1; i++) { // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering. // here just follow the spec 7.3.2.2 // todo: check. At least, breaks some hevc parsers when set to 0 PutUE(0);//sps.sps_max_dec_pic_buffering_minus1[i] PutUE(0);//sps.sps_max_num_reorder_pics[i] PutUE(0); //sps.sps_max_latency_increase_plus1[i] //} PutUE(sps->log2_min_luma_coding_block_size_minus3); // sps.log2_min_luma_coding_block_size_minus3 PutUE(sps->log2_diff_max_min_luma_coding_block_size);//sps.log2_diff_max_min_luma_coding_block_size PutUE(sps->log2_min_transform_block_size_minus2);//sps.log2_min_luma_transform_block_size_minus2 PutUE(sps->log2_diff_max_min_transform_block_size);//sps.log2_diff_max_min_luma_transform_block_size PutUE(sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_inter PutUE(sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_intra assert(!sps->seq_fields.bits.scaling_list_enabled_flag); // scaling_list_enabled_flag is set as 0 in fill_sps_header() for now PutUI(0, 1); // sps.scaling_list_enabled_flag /*if (sps.scaling_list_enabled_flag) { PutUI(sps.sps_scaling_list_data_present_flag, 1); if (sps.sps_scaling_list_data_present_flag) { //scaling_list_data(); } }*/ PutUI(sps->seq_fields.bits.amp_enabled_flag, 1); // sps.amp_enabled_flag PutUI(sps->seq_fields.bits.sample_adaptive_offset_enabled_flag, 1); // sps.sample_adaptive_offset_enabled_flag // pcm_enabled_flag is set as 0 in fill_sps_header() for now PutUI(sps->seq_fields.bits.pcm_enabled_flag, 1);//sps.pcm_enabled_flag assert(!sps->seq_fields.bits.pcm_enabled_flag); if (sps->seq_fields.bits.pcm_enabled_flag) { PutUI(sps->pcm_sample_bit_depth_luma_minus1, 4); PutUI(sps->pcm_sample_bit_depth_chroma_minus1, 4); PutUE(sps->log2_min_pcm_luma_coding_block_size_minus3); PutUE(sps->log2_max_pcm_luma_coding_block_size_minus3 - sps->log2_min_pcm_luma_coding_block_size_minus3); //log2_diff_max_min_pcm_luma_coding_block_size PutUI(sps->seq_fields.bits.pcm_loop_filter_disabled_flag, 1); } PutUE(1);// sps.num_short_term_ref_pic_sets PutUE(1); // num_negative_pics PutUE(0); // num_positive_pics PutUE(0); //delta_poc_s0_minus1[i] PutUI(1, 1);//used_by_curr_pic_s0_flag[i] /*for (i = 0; i < sps.num_short_term_ref_pic_sets; i++) { pack_short_term_ref_pic_setp(bs, &sps.strp[i], i == 0); }*/ // long_term_ref_pics_present_flag is set as 0 in fill_sps_header() for now PutUI(0, 1); // sps.long_term_ref_pics_present_flag /*if (sps.long_term_ref_pics_present_flag) { PutUE(sps.num_long_term_ref_pics_sps); for (i = 0; i < sps.num_long_term_ref_pics_sps; i++) { PutUE(sps.lt_ref_pic_poc_lsb_sps[i]); PutUI(sps.used_by_curr_pic_lt_sps_flag[i], 1); } }*/ PutUI(sps->seq_fields.bits.sps_temporal_mvp_enabled_flag, 1); // sps.sps_temporal_mvp_enabled_flag PutUI(sps->seq_fields.bits.strong_intra_smoothing_enabled_flag, 1); //sps.strong_intra_smoothing_enabled_flag assert(!sps->vui_parameters_present_flag); PutUI(sps->vui_parameters_present_flag, 1); // sps.vui_parameters_present_flag PutUI(0, 1); // sps.sps_extension_present_flag RBSPTrailingBits(); End(); } }; #define PPS_CABAC_INIT_PRESENT_FLAG 1 struct PackedPPSHEVC : BaseBitstreamHEVC { PackedPPSHEVC(const VAEncPictureParameterBufferHEVC *pic): BaseBitstreamHEVC() { NalStartCodePrefix(NALU_PPS); NalHeader(NALU_PPS); uint32_t i = 0; bool deblocking_filter_control_present_flag = false; PutUE(0); // pps.pps_pic_parameter_set_id PutUE(0); // pps.pps_seq_parameter_set_id PutUI(pic->pic_fields.bits.dependent_slice_segments_enabled_flag, 1); // pps.dependent_slice_segments_enabled_flag // TODO: !!! PutUI(0, 1); //pps.output_flag_present_flag PutUI(0, 3); // pps.num_extra_slice_header_bits PutUI(pic->pic_fields.bits.sign_data_hiding_enabled_flag, 1); //pps.sign_data_hiding_enabled_flag PutUI(PPS_CABAC_INIT_PRESENT_FLAG, 1); // pps.cabac_init_present_flag PutUE(pic->num_ref_idx_l0_default_active_minus1); //pps.num_ref_idx_l0_default_active_minus1 PutUE(pic->num_ref_idx_l1_default_active_minus1); //pps.num_ref_idx_l1_default_active_minus1 PutSE(pic->pic_init_qp - 26); //pps.init_qp_minus26 PutUI(pic->pic_fields.bits.constrained_intra_pred_flag, 1); //pps.constrained_intra_pred_flag PutUI(pic->pic_fields.bits.transform_skip_enabled_flag, 1); //pps.transform_skip_enabled_flag PutUI(pic->pic_fields.bits.cu_qp_delta_enabled_flag, 1); //pps.cu_qp_delta_enabled_flag if (pic->pic_fields.bits.cu_qp_delta_enabled_flag) { PutUE(pic->diff_cu_qp_delta_depth);//diff_cu_qp_delta_depth } PutSE(pic->pps_cb_qp_offset);//pps.pps_cb_qp_offset PutSE(pic->pps_cr_qp_offset);//pps.pps_cr_qp_offset) PutUI(0, 1);//pps.pps_slice_chroma_qp_offsets_present_flag PutUI(pic->pic_fields.bits.weighted_pred_flag, 1);//pps.weighted_pred_flag PutUI(pic->pic_fields.bits.weighted_bipred_flag, 1); //pps.weighted_bipred_flag PutUI(pic->pic_fields.bits.transquant_bypass_enabled_flag, 1);//pps.transquant_bypass_enabled_flag PutUI(pic->pic_fields.bits.tiles_enabled_flag, 1);//pps.tiles_enabled_flag PutUI(pic->pic_fields.bits.entropy_coding_sync_enabled_flag, 1);//pps.entropy_coding_sync_enabled_flag if (pic->pic_fields.bits.tiles_enabled_flag) { bool uniform_spacing_flag = false; PutUE(pic->num_tile_columns_minus1); PutUE(pic->num_tile_rows_minus1); PutUI(uniform_spacing_flag, 1);//uniform_spacing_flag if (!uniform_spacing_flag) { for (i = 0; i < pic->num_tile_columns_minus1; i++) { PutUE(pic->column_width_minus1[i]); } for (i = 0; i < pic->num_tile_rows_minus1; i++) { PutUE(pic->row_height_minus1[i]); } } PutUI(pic->pic_fields.bits.loop_filter_across_tiles_enabled_flag, 1); } PutUI(pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag, 1); // pps.pps_loop_filter_across_slices_enabled_flag PutUI(deblocking_filter_control_present_flag, 1);//pps.deblocking_filter_control_present_flag if (deblocking_filter_control_present_flag) { bool deblocking_filter_override_enabled_flag = false; bool pps_deblocking_filter_disabled_flag = true; PutUI(deblocking_filter_override_enabled_flag, 1); PutUI(pps_deblocking_filter_disabled_flag, 1); int beta_offset_div2 = 0; int tc_offset_div2 = 0; if (!pps_deblocking_filter_disabled_flag) { PutSE(beta_offset_div2); PutSE(tc_offset_div2); } } // pps_scaling_list_data_present_flag is set as 0 in fill_pps_header() for now assert(!pic->pic_fields.bits.scaling_list_data_present_flag); PutUI(pic->pic_fields.bits.scaling_list_data_present_flag, 1);//pps.pps_scaling_list_data_present_flag /*if (pps.pps_scaling_list_data_present_flag) { //scaling_list_data(); }*/ PutUI(0, 1);//pps.lists_modification_present_flag PutUE(0); //pps.log2_parallel_merge_level_minus2 PutUI(0, 1);//pps.slice_segment_header_extension_present_flag PutUI(0, 1); //pps.pps_extension_present_flag /*if (pps.pps_extension_present_flag) { PutUI(pps.pps_range_extension_flag, 1); PutUI(pps.pps_multilayer_extension_flag, 1); PutUI(pps.pps_3d_extension_flag, 1); PutUI(pps.pps_extension_5bits, 1); } if (pps.pps_range_extension_flag) { if (pps.transform_skip_enabled_flag) PutUE(pps.log2_max_transform_skip_block_size_minus2); PutUI(pps.cross_component_prediction_enabled_flag, 1); PutUI(pps.chroma_qp_offset_list_enabled_flag, 1); if (pps.chroma_qp_offset_list_enabled_flag) { PutUE(pps.diff_cu_chroma_qp_offset_depth); PutUE(pps.chroma_qp_offset_list_len_minus1); for (i = 0; i <= pps.chroma_qp_offset_list_len_minus1; i++) { PutUE(pps.cb_qp_offset_list[i]); PutUE(pps.cr_qp_offset_list[i]); } } PutUE(pps.log2_sao_offset_scale_luma); PutUE(pps.log2_sao_offset_scale_chroma); } */ RBSPTrailingBits(); End(); } }; template struct PackedSliceHEVC : BaseBitstreamHEVC { inline PackedSliceHEVC(int framenum, const VAEncSequenceParameterBufferHEVC *sps, const VAEncSliceParameterBufferHEVC *slice, const VAEncPictureParameterBufferHEVC *pic): BaseBitstreamHEVC() { uint8_t nal_unit_type = NALU_TRAIL_R; //int gop_ref_distance = ip_period; //int i = 0; //int short_term_ref_pic_set_sps_flag = 1; // !is_idr; int slice_qp_delta = slice->slice_qp_delta; int pic_order_cnt_lsb = framenum; if constexpr (slice_type == SLICE_I) nal_unit_type = NALU_IDR_W_DLP; NalStartCodePrefix(nal_unit_type); NalHeader(nal_unit_type); PutUI(1, 1);// first_slice_segment_in_pic_flag // idr if (nal_unit_type >= 16 && nal_unit_type <= 23) PutUI(1, 1); //no_output_of_prior_pics_flag PutUE(0);//slice_pic_parameter_set_id /*if (!slice_header->first_slice_segment_in_pic_flag) { if (slice_header->dependent_slice_segment_flag) { PutUI(slice_header->dependent_slice_segment_flag, 1); } PutUI(slice_header->slice_segment_address, (uint8_t)(ceil(log(slice_header->picture_height_in_ctus * slice_header->picture_width_in_ctus) / log(2.0)))); }*/ // !slice_header->dependent_slice_segment_flag if (!slice->slice_fields.bits.dependent_slice_segment_flag) { /*for (i = 0; i < pps->num_extra_slice_header_bits; i++) { PutUI(slice_header->slice_reserved_undetermined_flag[i], 1); }*/ PutUE(slice_type); /*if (pps->output_flag_present_flag) { PutUI(slice_header->pic_output_flag, 1); }*/ if(sps->seq_fields.bits.separate_colour_plane_flag) PutUI(slice->slice_fields.bits.colour_plane_id, 2); if (!(nal_unit_type == NALU_IDR_W_DLP || nal_unit_type == NALU_IDR_N_LP)) { // slice_header->pic_order_cnt_lsb PutUI(pic_order_cnt_lsb, POC_BITS );//(sps->log2_max_pic_order_cnt_lsb_minus4 + 4) PutUI(1, 1); // short_term_ref_pic_set_sps_flag // assume we are only pushing I-slices on IDR frames, SPS only references (n-1)th frame for now // this should be restored when p-slice references something different or using CRA frames #if 0 if (!short_term_ref_pic_set_sps_flag) { // refer to Teddi if (sps->num_short_term_ref_pic_sets > 0) PutUI(0, 1); // inter_ref_pic_set_prediction_flag, always 0 for now PutUE(slice_header->strp.num_negative_pics); PutUE(slice_header->strp.num_positive_pics); // below chunks of codes (majorly two big 'for' blocks) are refering both // Teddi and mv_encoder, they look kind of ugly, however, keep them as these // since it will be pretty easy to update if change/update in Teddi side. // According to Teddi, these are CModel Implementation. int prev = 0; int frame_cnt_in_gop = slice_header->pic_order_cnt_lsb / 2; // this is the first big 'for' block for (i = 0; i < slice_header->strp.num_negative_pics; i++) { // Low Delay B case if (1 == gop_ref_distance) { PutUE(0 /*delta_poc_s0_minus1*/); } else { // For Non-BPyramid GOP i.e B0 type if (num_active_ref_p > 1) { // DeltaPOC Equals NumB int DeltaPoc = -(int)(gop_ref_distance); PutUE(prev - DeltaPoc - 1 /*delta_poc_s0_minus1*/); } else { // the big 'if' wraps here is - // if (!slice_header->short_term_ref_pic_set_sps_flag) // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0' // either for B-Prymid or first several frames in a GOP in multi-ref cases // when there are not enough backward refs. // So though there are really some codes under this 'else'in Teddi, don't // want to introduce them in MEA to avoid confusion, and put an assert // here to guard that there is new case we need handle in the future. assert(0); } } PutUI(1 /*used_by_curr_pic_s0_flag*/, 1); } prev = 0; // this is the second big 'for' block for (i = 0; i < slice_header->strp.num_positive_pics; i++) { // Non-BPyramid GOP if (num_active_ref_p > 1) { // MultiRef Case if (frame_cnt_in_gop < gop_ref_distance) { int DeltaPoc = (int)(gop_ref_distance - frame_cnt_in_gop); PutUE(DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/); } else if (frame_cnt_in_gop > gop_ref_distance) { int DeltaPoc = (int)(gop_ref_distance * slice_header->strp.num_negative_pics - frame_cnt_in_gop); PutUE(DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/); } } else { // the big 'if' wraps here is - // if (!slice_header->short_term_ref_pic_set_sps_flag) // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0' // either for B-Prymid or first several frames in a GOP in multi-ref cases // when there are not enough backward refs. // So though there are really some codes under this 'else'in Teddi, don't // want to introduce them in MEA to avoid confusion, and put an assert // here to guard that there is new case we need handle in the future. assert(0); } PutUI(1 /*used_by_curr_pic_s1_flag*/, 1); } } else if (sps->num_short_term_ref_pic_sets > 1) PutUI(slice_header->short_term_ref_pic_set_idx, (uint8_t)(ceil(log(sps->num_short_term_ref_pic_sets) / log(2.0)))); #endif // no long term refs #if 0 if (sps->long_term_ref_pics_present_flag) { if (sps->num_long_term_ref_pics_sps > 0) PutUE(slice_header->num_long_term_sps); PutUE(slice_header->num_long_term_pics); } #endif if (sps->seq_fields.bits.sps_temporal_mvp_enabled_flag) PutUI(slice->slice_fields.bits.slice_temporal_mvp_enabled_flag, 1); } if (sps->seq_fields.bits.sample_adaptive_offset_enabled_flag ) { // sample_adaptive_offset_enabled_flag PutUI(slice->slice_fields.bits.slice_sao_luma_flag, 1);// slice_sao_luma_flag PutUI(slice->slice_fields.bits.slice_sao_chroma_flag, 1);//slice_sao_chroma_flag } if constexpr(slice_type != SLICE_I) { PutUI(slice->slice_fields.bits.num_ref_idx_active_override_flag, 1); //num_ref_idx_active_override_flag if (slice->slice_fields.bits.num_ref_idx_active_override_flag) { PutUE(slice->num_ref_idx_l0_active_minus1); //if (slice->slice_type == SLICE_B) //PutUE(slice->num_ref_idx_l1_active_minus1); } #if 0 if (pps->lists_modification_present_flag && slice_header->num_poc_total_cur > 1) { /* ref_pic_list_modification */ PutUI(slice_header->ref_pic_list_modification_flag_l0, 1); if (slice_header->ref_pic_list_modification_flag_l0) { for (i = 0; i <= slice_header->num_ref_idx_l0_active_minus1; i++) { PutUI(slice_header->list_entry_l0[i], (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0)))); } } PutUI(slice_header->ref_pic_list_modification_flag_l1, 1); if (slice_header->ref_pic_list_modification_flag_l1) { for (i = 0; i <= slice_header->num_ref_idx_l1_active_minus1; i++) { PutUI(slice_header->list_entry_l1[i], (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0)))); } } } #endif /*if (slice_header->slice_type == SLICE_B) { PutUI(slice_header->mvd_l1_zero_flag, 1); }*/ if (PPS_CABAC_INIT_PRESENT_FLAG) { PutUI(slice->slice_fields.bits.cabac_init_flag, 1); //slice_header->cabac_init_present_flag } if (slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) { int collocated_from_l0_flag = 1; if (slice->slice_type == SLICE_B) { collocated_from_l0_flag = slice->slice_fields.bits.collocated_from_l0_flag; PutUI(collocated_from_l0_flag , 1); } if (((collocated_from_l0_flag && (slice->num_ref_idx_l0_active_minus1 > 0)) || (!collocated_from_l0_flag && (slice->num_ref_idx_l1_active_minus1 > 0)))) { PutUE(pic->num_ref_idx_l0_default_active_minus1); // collocated_ref_idx } } PutUE(5 - slice->max_num_merge_cand);//slice_header->five_minus_max_num_merge_cand } PutSE(slice_qp_delta); /*if (pps->chroma_qp_offset_list_enabled_flag) { PutSE(slice_header->slice_qp_delta_cb); PutSE(slice_header->slice_qp_delta_cr); } if (pps->deblocking_filter_override_enabled_flag) { PutUI(slice_header->deblocking_filter_override_flag, 1); } if (slice_header->deblocking_filter_override_flag) { PutUI(slice_header->disable_deblocking_filter_flag, 1); if (!slice_header->disable_deblocking_filter_flag) { PutSE(slice_header->beta_offset_div2); PutSE(slice_header->tc_offset_div2); } }*/ if (pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag && (slice->slice_fields.bits.slice_sao_luma_flag || slice->slice_fields.bits.slice_sao_chroma_flag || !slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) { PutUI(slice->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag, 1); } } if ((pic->pic_fields.bits.tiles_enabled_flag) || (pic->pic_fields.bits.entropy_coding_sync_enabled_flag)) { int num_entry_point_offsets = 0, offset_len_minus1 = 0; PutUE(num_entry_point_offsets); if (num_entry_point_offsets > 0) { PutUE(offset_len_minus1); } } /*if (pps->slice_segment_header_extension_present_flag) { int slice_header_extension_length = 0; PutUE(slice_header_extension_length); }*/ RBSPTrailingBits(); End(); } }; struct VaapiEncoderHEVC: VaapiEncoder { VAEncSequenceParameterBufferHEVC seq; VAEncPictureParameterBufferHEVC pic; VAEncSliceParameterBufferHEVC slice; void InitParameters(bool p010) { //int width_in_mbs, height_in_mbs; //int frame_cropping_flag = 0; //int frame_crop_bottom_offset = 0; //width_in_mbs = (width + 15) / 16; //height_in_mbs = (height + 15) / 16; // sps // seems work seq.seq_fields.bits.sps_temporal_mvp_enabled_flag = 1; // todo: check if we can use 4:4:4/4:2:2 seq.seq_fields.bits.chroma_format_idc = 1; // 4:2:0 seq.pic_width_in_luma_samples = ALIGN16(width); seq.pic_height_in_luma_samples = ALIGN16(height); // todo: tunable block size seq.log2_diff_max_min_luma_coding_block_size = 2; seq.log2_diff_max_min_transform_block_size = 3; seq.log2_min_transform_block_size_minus2 = 0; seq.max_transform_hierarchy_depth_inter = 2; seq.max_transform_hierarchy_depth_intra = 2; seq.seq_fields.bits.amp_enabled_flag = 1; // broken on intel? or broken bistream? seq.seq_fields.bits.sample_adaptive_offset_enabled_flag = 0; seq.intra_idr_period = 32767; seq.intra_period = 32767; seq.ip_period = 1; if(p010) { seq.seq_fields.bits.bit_depth_chroma_minus8 = 2; seq.seq_fields.bits.bit_depth_luma_minus8 = 2; } seq.bits_per_second = 15*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50; // vps // (none?) // profile seq.general_level_idc = 120; seq.general_profile_idc = p010? HEVC_PROFILE_IDC_MAIN10: HEVC_PROFILE_IDC_MAIN; // pps pic.pic_fields.bits.dependent_slice_segments_enabled_flag = 1; // seens work both pic.pic_fields.bits.transform_skip_enabled_flag = 1; // pic pic.collocated_ref_pic_index = 0;//255; pic.pic_init_qp = 5; pic.nal_unit_type = NALU_IDR_W_DLP; pic.pic_fields.bits.idr_pic_flag = 1; pic.pic_fields.bits.coding_type = 1; pic.pic_fields.bits.reference_pic_flag = 1; // seems work pic.pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = 1; pic.pic_fields.bits.cu_qp_delta_enabled_flag = 1; // CBR if(pic.pic_fields.bits.cu_qp_delta_enabled_flag) pic.diff_cu_qp_delta_depth = 2; for(int i = 0; i < 15; i++) { pic.reference_frames[i].picture_id = VA_INVALID_SURFACE; pic.reference_frames[i].flags = VA_PICTURE_HEVC_INVALID; pic.reference_frames[i].pic_order_cnt = 0; } // slice //slice.slice_fields.bits.num_ref_idx_active_override_flag = 0; slice.slice_qp_delta = 0; int lcu_size = 32; // todo: block size settings? int picture_width_in_ctus = (width + lcu_size - 1) / lcu_size; int picture_height_in_ctus = (height + lcu_size - 1) / lcu_size; slice.num_ctu_in_slice = picture_width_in_ctus * picture_height_in_ctus; slice.max_num_merge_cand = 5; // seems works //slice.slice_fields.bits.collocated_from_l0_flag = 1; // broken on intel??? //slice.slice_fields.bits.slice_sao_chroma_flag = 1; //slice.slice_fields.bits.slice_sao_luma_flag = 1; memset((void*)slice.ref_pic_list0, -1, sizeof(slice.ref_pic_list0)); memset((void*)slice.ref_pic_list1, -1, sizeof(slice.ref_pic_list1)); for(int i = 0; i < 15; i++) { slice.ref_pic_list0[i].flags = -1;//VA_PICTURE_HEVC_INVALID; slice.ref_pic_list0[i].picture_id = VA_INVALID_SURFACE; slice.ref_pic_list0[i].pic_order_cnt = -1; slice.ref_pic_list1[i].flags = -1;//VA_PICTURE_HEVC_INVALID; slice.ref_pic_list1[i].picture_id = VA_INVALID_SURFACE; slice.ref_pic_list1[i].pic_order_cnt = -1; } } bool Setup(int drm_fd, int width, int height, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount, bool p010) { VAProfile profile = p010?VAProfileHEVCMain10:VAProfileHEVCMain; uint32_t format = p010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420; uint32_t fourcc = p010?VA_FOURCC_P010:VA_FOURCC_NV12; if(!SetupVA(profile, format, fourcc, drm_fd, width, height, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount)) { VaapiEncoder::Destroy(); return false; } if(!CreateContext(profile, format, fourcc, VA_RC_CBR)) { VaapiEncoder::Destroy(); return false; } InitParameters(p010); return true; } template inline void EncodeIDR(int idx, Writer &w, typename Writer::ID id) { VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]); VABufferID seqb = CreateParamererBuffer(VAEncSequenceParameterBufferType, seq); slice.slice_type = SLICE_I; slice.ref_pic_list0[0].pic_order_cnt = -1; slice.ref_pic_list0[0].picture_id = VA_INVALID_SURFACE; slice.ref_pic_list0[0].flags = -1; VABufferID sliceb = CreateParamererBuffer(VAEncSliceParameterBufferType, slice); VABufferID output = CreateOutputBuf(width * height); pic.decoded_curr_pic.picture_id = reference_picture[frame_count % 2]; pic.decoded_curr_pic.pic_order_cnt = frame_count; pic.reference_frames[0].picture_id = VA_INVALID_SURFACE; pic.reference_frames[0].flags = VA_PICTURE_HEVC_INVALID; pic.reference_frames[0].pic_order_cnt = 0; pic.coded_buf = output; pic.pic_fields.bits.idr_pic_flag = 1; pic.nal_unit_type = NALU_IDR_W_DLP; pic.pic_fields.bits.coding_type = 1; VABufferID picb = CreateParamererBuffer(VAEncPictureParameterBufferType, pic); VABufferID fpsb = CreateMiscParameterBuffer(VAEncMiscParameterTypeFrameRate,VAEncMiscParameterFrameRate{.framerate = 90} ); VABufferID hrdb = CreateMiscParameterBuffer(VAEncMiscParameterTypeHRD, VAEncMiscParameterHRD{}); VABufferID rcb = CreateMiscParameterBuffer(VAEncMiscParameterTypeRateControl, VAEncMiscParameterRateControl{ .bits_per_second = 15*1024*1024, .target_percentage = 66, .window_size = 1000, .initial_qp = 1, .max_qp = 1 }); VABufferID ppps[2]; VABufferID pvps[2]; VABufferID psps[2]; CreatePackedBuffer(pvps[0], pvps[1], VAEncPackedHeaderSequence, PackedVPSHEVC(&seq)); CreatePackedBuffer(psps[0], psps[1], VAEncPackedHeaderSequence, PackedSPSHEVC(&seq, width, height)); CreatePackedBuffer(ppps[0], ppps[1], VAEncPackedHeaderPicture, PackedPPSHEVC(&pic)); VABufferID pslice[2]; CreatePackedBuffer(pslice[0], pslice[1], VAEncPackedHeaderSlice, PackedSliceHEVC(frame_count, &seq, &slice, &pic)); VABufferID buffers[] = {seqb, pvps[0],pvps[1],psps[0], psps[1], fpsb, hrdb, rcb, ppps[0], ppps[1], picb, pslice[0], pslice[1], sliceb }; vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) ); status = vaEndPicture(dpy, ctx); if(status != VA_STATUS_SUCCESS) abort(); w.WriteOutput(output, id); for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++) vaDestroyBuffer(dpy, buffers[i]); frame_count++; } template inline void EncodeP(int idx, Writer &w, typename Writer::ID id) { VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]); // todo: chain slice/output buffers, patch POC in slice buffers??? slice.slice_type = SLICE_P; slice.ref_pic_list0[0].pic_order_cnt = frame_count - 1; slice.ref_pic_list0[0].picture_id = reference_picture[(frame_count - 1)% 2]; slice.ref_pic_list0[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE; VABufferID sliceb = CreateParamererBuffer(VAEncSliceParameterBufferType, slice); VABufferID output = CreateOutputBuf(width * height); pic.decoded_curr_pic.picture_id = reference_picture[frame_count % 2]; pic.decoded_curr_pic.pic_order_cnt = frame_count; pic.reference_frames[0].picture_id = reference_picture[(frame_count + 1) % 2]; pic.reference_frames[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE; pic.reference_frames[0].pic_order_cnt = frame_count - 1; pic.coded_buf = output; pic.pic_fields.bits.idr_pic_flag = 0; pic.nal_unit_type = NALU_TRAIL_R; pic.pic_fields.bits.coding_type = 2; VABufferID picb = CreateParamererBuffer(VAEncPictureParameterBufferType, pic); VABufferID pslice[2]; CreatePackedBuffer(pslice[0], pslice[1], VAEncPackedHeaderSlice, PackedSliceHEVC(frame_count, &seq, &slice, &pic)); VABufferID buffers[] = {picb, pslice[0], pslice[1], sliceb }; vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) ); status = vaEndPicture(dpy, ctx); if(status != VA_STATUS_SUCCESS) abort(); w.WriteOutput(output, id); for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++) vaDestroyBuffer(dpy, buffers[i]); frame_count++; } }; #endif // VAAPI_ENCODER_HEVC_H