/* * Copyright (c) 2012 Intel Corporation. All Rights Reserved. * Copyright © 2013 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include //#include "va_enc_hevc.h" #include #include #include #include #include #define NAL_REF_IDC_NONE 0 #define NAL_REF_IDC_LOW 1 #define NAL_REF_IDC_MEDIUM 2 #define NAL_REF_IDC_HIGH 3 // SLICE TYPE HEVC ENUM enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2, }; #define IS_I_SLICE(type) (SLICE_I == (type)) #define IS_P_SLICE(type) (SLICE_P == (type)) #define IS_B_SLICE(type) (SLICE_B == (type)) #define ENTROPY_MODE_CAVLC 0 #define ENTROPY_MODE_CABAC 1 #define PROFILE_IDC_MAIN 1 #define PROFILE_IDC_MAIN10 2 enum NALUType { NALU_TRAIL_N = 0x00, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC NALU_TRAIL_R = 0x01, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC NALU_TSA_N = 0x02, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC NALU_TSA_R = 0x03, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC NALU_STSA_N = 0x04, // Coded slice of an STSA picture - slice_layer_rbsp, VLC NALU_STSA_R = 0x05, // Coded slice of an STSA picture - slice_layer_rbsp, VLC NALU_RADL_N = 0x06, // Coded slice of an RADL picture - slice_layer_rbsp, VLC NALU_RADL_R = 0x07, // Coded slice of an RADL picture - slice_layer_rbsp, VLC NALU_RASL_N = 0x08, // Coded slice of an RASL picture - slice_layer_rbsp, VLC NALU_RASL_R = 0x09, // Coded slice of an RASL picture - slice_layer_rbsp, VLC /* 0x0a..0x0f - Reserved */ NALU_BLA_W_LP = 0x10, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_BLA_W_DLP = 0x11, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_BLA_N_LP = 0x12, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC NALU_IDR_W_DLP = 0x13, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC NALU_IDR_N_LP = 0x14, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC NALU_CRA = 0x15, // Coded slice segment of an CRA picture - slice_segment_layer_rbsp, VLC /* 0x16..0x1f - Reserved */ NALU_VPS = 0x20, // Video parameter set - video_parameter_set_rbsp, non-VLC NALU_SPS = 0x21, // Sequence parameter set - seq_parameter_set_rbsp, non-VLC NALU_PPS = 0x22, // Picture parameter set - pic_parameter_set_rbsp, non-VLC NALU_AUD = 0x23, // Access unit delimiter - access_unit_delimiter_rbsp, non-VLC NALU_EOS = 0x24, // End of sequence - end_of_seq_rbsp, non-VLC NALU_EOB = 0x25, // End of bitsteam - end_of_bitsteam_rbsp, non-VLC NALU_FD = 0x26, // Filler data - filler_data_rbsp, non-VLC NALU_PREFIX_SEI = 0x27, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC NALU_SUFFIX_SEI = 0x28, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC /* 0x29..0x2f - Reserved */ /* 0x30..0x3f - Unspecified */ //this should be the last element of this enum //chagne this value if NAL unit type increased MAX_HEVC_NAL_TYPE = 0x3f, }; #define ALIGN16(x) ((x+15)&~15) #define CHAIN_SIZE 4 struct vaapi_recorder { int drm_fd, output_fd; int width, height; int frame_count; int error; int destroying; pthread_t worker_thread; pthread_mutex_t mutex; pthread_cond_t input_cond; struct { int valid; int prime_fd, stride; } input; VADisplay va_dpy; /* video post processing is used for colorspace conversion */ struct { VAConfigID cfg; VAContextID ctx; VABufferID pipeline_buf; VASurfaceID output; } vpp; struct { VAConfigID cfg; VAContextID ctx; VASurfaceID reference_picture[3]; int intra_period; int output_size; struct { VAEncSequenceParameterBufferHEVC seq; VAEncPictureParameterBufferHEVC pic; VAEncSliceParameterBufferHEVC slice; } param; VAConfigAttrib attrib[VAConfigAttribTypeMax]; pthread_t output_thread; pthread_mutex_t mutex; pthread_cond_t output_cond; VABufferID output_buf; VASurfaceID output_sync_surf; } encoder; VASurfaceID inputFrames[CHAIN_SIZE]; }; /* seq {general_profile_idc = 1 '\001', general_level_idc = 150 '\226', general_tier_flag = 0 '\000', intra_period = 32767, intra_idr_period = 32767, ip_period = 1, bits_per_second = 0, pic_width_in_luma_samples = 3200, pic_height_in_luma_samples = 2200, seq_fields = {bits = { chroma_format_idc = 1, separate_colour_plane_flag = 0, bit_depth_luma_minus8 = 0, bit_depth_chroma_minus8 = 0, scaling_list_enabled_flag = 0, strong_intra_smoothing_enabled_flag = 0, amp_enabled_flag = 1, sample_adaptive_offset_enabled_flag = 1, pcm_enabled_flag = 0, pcm_loop_filter_disabled_flag = 0, sps_temporal_mvp_enabled_flag = 0, low_delay_seq = 0, hierachical_flag = 0, reserved_bits = 0}, value = 6145}, log2_min_luma_coding_block_size_minus3 = 0 '\000', log2_diff_max_min_luma_coding_block_size = 3 '\003', log2_min_transform_block_size_minus2 = 0 '\000', log2_diff_max_min_transform_block_size = 3 '\003', max_transform_hierarchy_depth_inter = 0 '\000', max_transform_hierarchy_depth_intra = 0 '\000', pcm_sample_bit_depth_luma_minus1 = 0, pcm_sample_bit_depth_chroma_minus1 = 0, log2_min_pcm_luma_coding_block_size_minus3 = 0, log2_max_pcm_luma_coding_block_size_minus3 = 0, vui_parameters_present_flag = 0 '\000', vui_fields = {bits = {aspect_ratio_info_present_flag = 0, neutral_chroma_indication_flag = 0, field_seq_flag = 0, vui_timing_info_present_flag = 0, bitstream_restriction_flag = 0, tiles_fixed_structure_flag = 0, motion_vectors_over_pic_boundaries_flag = 0, restricted_ref_pic_lists_flag = 0, log2_max_mv_length_horizontal = 0, log2_max_mv_length_vertical = 0}, value = 0}, aspect_ratio_idc = 0 '\000', sar_width = 0, sar_height = 0, vui_num_units_in_tick = 0, vui_time_scale = 0, min_spatial_segmentation_idc = 0, max_bytes_per_pic_denom = 0 '\000', max_bits_per_min_cu_denom = 0 '\000', scc_fields = {bits = {palette_mode_enabled_flag = 0, reserved = 0}, value = 0}, va_reserved = {0, 0, 0, 0, 0, 0, 0}} pic {decoded_curr_pic = {picture_id = 5, pic_order_cnt = 0, flags = 0, va_reserved = {0, 0, 0, 0}}, reference_frames = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, coded_buf = 9, collocated_ref_pic_index = 255 '\377', last_picture = 0 '\000', pic_init_qp = 25 '\031', diff_cu_qp_delta_depth = 0 '\000', pps_cb_qp_offset = 0 '\000', pps_cr_qp_offset = 0 '\000', num_tile_columns_minus1 = 0 '\000', num_tile_rows_minus1 = 0 '\000', column_width_minus1 = '\000' , row_height_minus1 = '\000' , log2_parallel_merge_level_minus2 = 0 '\000', ctu_max_bitsize_allowed = 0 '\000', num_ref_idx_l0_default_active_minus1 = 0 '\000', num_ref_idx_l1_default_active_minus1 = 0 '\000', slice_pic_parameter_set_id = 0 '\000', nal_unit_type = 19 '\023', pic_fields = {bits = {idr_pic_flag = 1, coding_type = 1, reference_pic_flag = 1, dependent_slice_segments_enabled_flag = 0, sign_data_hiding_enabled_flag = 0, constrained_intra_pred_flag = 0, transform_skip_enabled_flag = 0, cu_qp_delta_enabled_flag = 0, weighted_pred_flag = 0, weighted_bipred_flag = 0, transquant_bypass_enabled_flag = 0, tiles_enabled_flag = 0, entropy_coding_sync_enabled_flag = 0, loop_filter_across_tiles_enabled_flag = 0, pps_loop_filter_across_slices_enabled_flag = 1, scaling_list_data_present_flag = 0, screen_content_flag = 0, enable_gpu_weighted_prediction = 0, no_output_of_prior_pics_flag = 0, reserved = 0}, value = 65555}, hierarchical_level_plus1 = 0 '\000', va_byte_reserved = 0 '\000', scc_fields = {bits = { pps_curr_pic_ref_enabled_flag = 0, reserved = 0}, value = 0}, va_reserved = {0 }} slice {slice_segment_address = 0, num_ctu_in_slice = 1750, slice_type = 2 '\002', slice_pic_parameter_set_id = 0 '\000', num_ref_idx_l0_active_minus1 = 0 '\000', num_ref_idx_l1_active_minus1 = 0 '\000', ref_pic_list0 = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, ref_pic_list1 = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, luma_log2_weight_denom = 0 '\000', delta_chroma_log2_weight_denom = 0 '\000', delta_luma_weight_l0 = '\000' , luma_offset_l0 = '\000' , delta_chroma_weight_l0 = {"\000" }, chroma_offset_l0 = {"\000" }, delta_luma_weight_l1 = '\000' , luma_offset_l1 = '\000' , delta_chroma_weight_l1 = {"\000" }, chroma_offset_l1 = {"\000" }, max_num_merge_cand = 5 '\005', slice_qp_delta = 0 '\000', slice_cb_qp_offset = 0 '\000', slice_cr_qp_offset = 0 '\000', slice_beta_offset_div2 = 0 '\000', slice_tc_offset_div2 = 0 '\000', slice_fields = {bits = {last_slice_of_pic_flag = 1, dependent_slice_segment_flag = 0, colour_plane_id = 0, slice_temporal_mvp_enabled_flag = 0, slice_sao_luma_flag = 1, slice_sao_chroma_flag = 1, num_ref_idx_active_override_flag = 0, mvd_l1_zero_flag = 0, cabac_init_flag = 0, slice_deblocking_filter_disabled_flag = 0, slice_loop_filter_across_slices_enabled_flag = 0, collocated_from_l0_flag = 0}, value = 97}, pred_weight_table_bit_offset = 0, pred_weight_table_bit_length = 0, va_reserved = {0, 0, 0, 0, 0, 0}} pic {decoded_curr_pic = {picture_id = 1, pic_order_cnt = 1, flags = 0, va_reserved = {0, 0, 0, 0}}, reference_frames = {{picture_id = 5, pic_order_cnt = 0, flags = 16, va_reserved = {0, 0, 0, 0}}, {picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, coded_buf = 3, collocated_ref_pic_index = 255 '\377', last_picture = 0 '\000', pic_init_qp = 25 '\031', diff_cu_qp_delta_depth = 0 '\000', pps_cb_qp_offset = 0 '\000', pps_cr_qp_offset = 0 '\000', num_tile_columns_minus1 = 0 '\000', num_tile_rows_minus1 = 0 '\000', column_width_minus1 = '\000' , row_height_minus1 = '\000' , log2_parallel_merge_level_minus2 = 0 '\000', ctu_max_bitsize_allowed = 0 '\000', num_ref_idx_l0_default_active_minus1 = 0 '\000', num_ref_idx_l1_default_active_minus1 = 0 '\000', slice_pic_parameter_set_id = 0 '\000', nal_unit_type = 1 '\001', pic_fields = {bits = { idr_pic_flag = 0, coding_type = 2, reference_pic_flag = 1, dependent_slice_segments_enabled_flag = 0, sign_data_hiding_enabled_flag = 0, constrained_intra_pred_flag = 0, transform_skip_enabled_flag = 0, cu_qp_delta_enabled_flag = 0, weighted_pred_flag = 0, weighted_bipred_flag = 0, transquant_bypass_enabled_flag = 0, tiles_enabled_flag = 0, entropy_coding_sync_enabled_flag = 0, loop_filter_across_tiles_enabled_flag = 0, pps_loop_filter_across_slices_enabled_flag = 1, scaling_list_data_present_flag = 0, screen_content_flag = 0, enable_gpu_weighted_prediction = 0, no_output_of_prior_pics_flag = 0, reserved = 0}, value = 65556}, hierarchical_level_plus1 = 0 '\000', va_byte_reserved = 0 '\000', scc_fields = {bits = {pps_curr_pic_ref_enabled_flag = 0, reserved = 0}, value = 0}, va_reserved = {0 }} slice {slice_segment_address = 0, num_ctu_in_slice = 1750, slice_type = 1 '\001', slice_pic_parameter_set_id = 0 '\000', num_ref_idx_l0_active_minus1 = 0 '\000', num_ref_idx_l1_active_minus1 = 0 '\000', ref_pic_list0 = {{picture_id = 5, pic_order_cnt = 0, flags = 16, va_reserved = {0, 0, 0, 0}}, {picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, ref_pic_list1 = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} }, luma_log2_weight_denom = 0 '\000', delta_chroma_log2_weight_denom = 0 '\000', delta_luma_weight_l0 = '\000' , luma_offset_l0 = '\000' , delta_chroma_weight_l0 = {"\000" }, chroma_offset_l0 = {"\000" }, delta_luma_weight_l1 = '\000' , luma_offset_l1 = '\000' , delta_chroma_weight_l1 = {"\000" }, chroma_offset_l1 = {"\000" }, max_num_merge_cand = 5 '\005', slice_qp_delta = 0 '\000', slice_cb_qp_offset = 0 '\000', slice_cr_qp_offset = 0 '\000', slice_beta_offset_div2 = 0 '\000', slice_tc_offset_div2 = 0 '\000', slice_fields = {bits = { last_slice_of_pic_flag = 1, dependent_slice_segment_flag = 0, colour_plane_id = 0, slice_temporal_mvp_enabled_flag = 0, slice_sao_luma_flag = 1, slice_sao_chroma_flag = 1, num_ref_idx_active_override_flag = 0, mvd_l1_zero_flag = 0, cabac_init_flag = 0, slice_deblocking_filter_disabled_flag = 0, slice_loop_filter_across_slices_enabled_flag = 0, collocated_from_l0_flag = 1}, value = 8289}, pred_weight_table_bit_offset = 0, pred_weight_table_bit_length = 0, va_reserved = {0, 0, 0, 0, 0, 0}} VPS vps_video_parameter_set_id:0 vps_base_layer_internal_flag:1 vps_base_layer_available_flag:1 vps_max_layers_minus1:0 vps_max_sub_layers_minus1:0 vps_temporal_id_nesting_flag:1 profile_tier_level( 1, vps_max_sub_layers_minus1 ): general_profile_space:0 general_tier_flag:0 general_profile_idc:1 general_profile_compatibility_flag: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 general_progressive_source_flag:1 general_interlaced_source_flag:0 general_non_packed_constraint_flag:1 general_frame_only_constraint_flag:1 general_level_idc:150 vps_sub_layer_ordering_info_present_flag:0 vps_max_layer_id:0 vps_num_layer_sets_minus1:0 vps_timing_info_present_flag:0 SPS: sps_video_parameter_set_id:0 sps_max_sub_layers_minus1:0 sps_temporal_id_nesting_flag:1 profile_tier_level( 1, vps_max_sub_layers_minus1 ): general_profile_space:0 general_tier_flag:0 general_profile_idc:1 general_profile_compatibility_flag: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 general_progressive_source_flag:1 general_interlaced_source_flag:0 general_non_packed_constraint_flag:1 general_frame_only_constraint_flag:1 general_level_idc:150 sps_seq_parameter_set_id:0 chroma_format_idc:1 pic_width_in_luma_samples:3200 pic_height_in_luma_samples:2208 conformance_window_flag:1 !! Found NAL at offset 75 (0x004B), size 7 (0x0007) forbidden_zero_bit: 0 nal->nal_unit_type: 34 nal->nuh_layer_id: 0 nal->nuh_temporal_id_plus1: 1 PPS: pps_pic_parameter_set_id:0 pps_seq_parameter_set_id:0 dependent_slice_segments_enabled_flag:1 output_flag_present_flag:0 num_extra_slice_header_bits:0 sign_data_hiding_enabled_flag:0 cabac_init_present_flag:1 num_ref_idx_l0_default_active_minus1:0 num_ref_idx_l1_default_active_minus1:0 init_qp_minus26:0 constrained_intra_pred_flag:0 transform_skip_enabled_flag:1 cu_qp_delta_enabled_flag:0 pps_cb_qp_offset:0 pps_cr_qp_offset:0 pps_slice_chroma_qp_offsets_present_flag:0 weighted_pred_flag:0 weighted_bipred_flag:0 transquant_bypass_enabled_flag:0 tiles_enabled_flag:0 entropy_coding_sync_enabled_flag:0 */ static void * worker_thread_function(void *); /* bitstream code used for writing the packed headers */ #define BITSTREAM_ALLOCATE_STEPPING 4096 struct bitstream { unsigned int *buffer; int bit_offset; int max_size_in_dword; }; static unsigned int va_swap32(unsigned int val) { unsigned char *pval = (unsigned char *)&val; return ((pval[0] << 24) | (pval[1] << 16) | (pval[2] << 8) | (pval[3] << 0)); } static void bitstream_start(struct bitstream *bs) { bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING; bs->buffer = (unsigned int*)calloc(bs->max_size_in_dword * sizeof(unsigned int), 1); bs->bit_offset = 0; } static void bitstream_end(struct bitstream *bs) { int pos = (bs->bit_offset >> 5); int bit_offset = (bs->bit_offset & 0x1f); int bit_left = 32 - bit_offset; if (bit_offset) { bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left)); } } static void bitstream_put_ui(struct bitstream *bs, unsigned int val, int size_in_bits) { int pos = (bs->bit_offset >> 5); int bit_offset = (bs->bit_offset & 0x1f); int bit_left = 32 - bit_offset; if (!size_in_bits) return; bs->bit_offset += size_in_bits; if (bit_left > size_in_bits) { bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val); } else { size_in_bits -= bit_left; bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits); bs->buffer[pos] = va_swap32(bs->buffer[pos]); if (pos + 1 == bs->max_size_in_dword) { bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING; bs->buffer = (unsigned int*)realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int)); assert(bs->buffer); } bs->buffer[pos + 1] = val; } } static void bitstream_put_ue(struct bitstream *bs, unsigned int val) { int size_in_bits = 0; int tmp_val = ++val; while (tmp_val) { tmp_val >>= 1; size_in_bits++; } bitstream_put_ui(bs, 0, size_in_bits - 1); /* leading zero */ bitstream_put_ui(bs, val, size_in_bits); } static void bitstream_put_se(struct bitstream *bs, int val) { unsigned int new_val; if (val <= 0) new_val = -2 * val; else new_val = 2 * val - 1; bitstream_put_ue(bs, new_val); } static void bitstream_byte_aligning(struct bitstream *bs, int bit) { int bit_offset = (bs->bit_offset & 0x7); int bit_left = 8 - bit_offset; int new_val; if (!bit_offset) return; if (bit) new_val = (1 << bit_left) - 1; else new_val = 0; bitstream_put_ui(bs, new_val, bit_left); } #define USE_P010 1 #define PRINT_FLAG(f,name) if(f & name)printf(" %s", #name ) #define PRINT_INT_ATTR(name) if(r->encoder.attrib[name].value != VA_ATTRIB_NOT_SUPPORTED)printf("Supported %s: %d\n", #name, (int)r->encoder.attrib[name].value) #define PRINT_BIT_FIELD(name) if(val.bits.name)printf(" %s=%d", #name, val.bits.name) static VAStatus encoder_create_config(struct vaapi_recorder *r) { VAConfigAttrib attrib[2]; VAStatus status; VAProfile profile = USE_P010?VAProfileHEVCMain10:VAProfileHEVCMain; for (int i = 0; i < VAConfigAttribTypeMax; i++) r->encoder.attrib[i].type = (VAConfigAttribType)i; vaGetConfigAttributes(r->va_dpy, profile, VAEntrypointEncSlice, r->encoder.attrib, VAConfigAttribTypeMax); uint32_t flags= r->encoder.attrib[VAConfigAttribRTFormat].value; printf("Supported RT formats: %d", flags); PRINT_FLAG(flags, VA_RT_FORMAT_YUV420); PRINT_FLAG(flags, VA_RT_FORMAT_YUV422); PRINT_FLAG(flags, VA_RT_FORMAT_YUV444); PRINT_FLAG(flags, VA_RT_FORMAT_YUV411); PRINT_FLAG(flags, VA_RT_FORMAT_YUV400); PRINT_FLAG(flags, VA_RT_FORMAT_YUV420_10); PRINT_FLAG(flags, VA_RT_FORMAT_YUV422_10); PRINT_FLAG(flags, VA_RT_FORMAT_YUV444_10); PRINT_FLAG(flags, VA_RT_FORMAT_YUV420_12); PRINT_FLAG(flags, VA_RT_FORMAT_YUV422_12); PRINT_FLAG(flags, VA_RT_FORMAT_YUV444_12); PRINT_FLAG(flags, VA_RT_FORMAT_RGB16); PRINT_FLAG(flags, VA_RT_FORMAT_RGB32); PRINT_FLAG(flags, VA_RT_FORMAT_RGBP); PRINT_FLAG(flags, VA_RT_FORMAT_RGB32_10); PRINT_FLAG(flags, VA_RT_FORMAT_PROTECTED); printf("\n"); flags= r->encoder.attrib[VAConfigAttribRateControl].value; printf("Supported rate control: %d", flags); PRINT_FLAG(flags, VA_RC_NONE); PRINT_FLAG(flags, VA_RC_CBR); PRINT_FLAG(flags, VA_RC_VBR); PRINT_FLAG(flags, VA_RC_VCM); PRINT_FLAG(flags, VA_RC_CQP); PRINT_FLAG(flags, VA_RC_VBR_CONSTRAINED); PRINT_FLAG(flags, VA_RC_ICQ); PRINT_FLAG(flags, VA_RC_MB); PRINT_FLAG(flags, VA_RC_CFS); PRINT_FLAG(flags, VA_RC_PARALLEL); PRINT_FLAG(flags, VA_RC_QVBR); PRINT_FLAG(flags, VA_RC_AVBR); PRINT_FLAG(flags, VA_RC_TCBRC); printf("\n"); flags = r->encoder.attrib[VAConfigAttribEncPackedHeaders].value; printf("Supported packed headers: %d", flags); PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_SEQUENCE); PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_PICTURE); PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_SLICE); PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_MISC); PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_RAW_DATA); printf("\n"); PRINT_INT_ATTR(VAConfigAttribEncMaxSlices); flags = r->encoder.attrib[VAConfigAttribEncSliceStructure].value; printf("Supported slice structure: %d", flags); PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS); PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS); PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS); PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_MAX_SLICE_SIZE); PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_EQUAL_MULTI_ROWS); printf("\n"); PRINT_INT_ATTR(VAConfigAttribEncMacroblockInfo); PRINT_INT_ATTR(VAConfigAttribMaxPictureWidth); PRINT_INT_ATTR(VAConfigAttribMaxPictureHeight); PRINT_INT_ATTR(VAConfigAttribEncSkipFrame); //PRINT_INT_ATTR(VAConfigAttribEncROI); if(r->encoder.attrib[VAConfigAttribEncROI].value != VA_ATTRIB_NOT_SUPPORTED) { VAConfigAttribValEncROI val; val.value = r->encoder.attrib[VAConfigAttribEncROI].value; printf("Supported ROI attrs: %d", val.value); PRINT_BIT_FIELD(num_roi_regions); PRINT_BIT_FIELD(roi_rc_priority_support); PRINT_BIT_FIELD(roi_rc_qp_delta_support); printf("\n"); } PRINT_INT_ATTR(VAConfigAttribEncRateControlExt); PRINT_INT_ATTR(VAConfigAttribContextPriority); PRINT_INT_ATTR(VAConfigAttribEncDirtyRect); PRINT_INT_ATTR(VAConfigAttribEncParallelRateControl); PRINT_INT_ATTR(VAConfigAttribEncDynamicScaling); PRINT_INT_ATTR(VAConfigAttribFrameSizeToleranceSupport); PRINT_INT_ATTR(VAConfigAttribStats); PRINT_INT_ATTR(VAConfigAttribEncTileSupport); PRINT_INT_ATTR(VAConfigAttribCustomRoundingControl); PRINT_INT_ATTR(VAConfigAttribQPBlockSize); PRINT_INT_ATTR(VAConfigAttribEncHEVCFeatures); PRINT_INT_ATTR(VAConfigAttribEncHEVCBlockSizes); if(r->encoder.attrib[VAConfigAttribEncHEVCFeatures].value != VA_ATTRIB_NOT_SUPPORTED) { VAConfigAttribValEncHEVCFeatures val; val.value = r->encoder.attrib[VAConfigAttribEncHEVCFeatures].value; printf("Supported HEVC features: %d", val.value); PRINT_BIT_FIELD(separate_colour_planes); PRINT_BIT_FIELD(scaling_lists); PRINT_BIT_FIELD(amp); PRINT_BIT_FIELD(sao); PRINT_BIT_FIELD(pcm); PRINT_BIT_FIELD(temporal_mvp); PRINT_BIT_FIELD(strong_intra_smoothing); PRINT_BIT_FIELD(dependent_slices); PRINT_BIT_FIELD(sign_data_hiding); PRINT_BIT_FIELD(constrained_intra_pred); PRINT_BIT_FIELD(transform_skip); PRINT_BIT_FIELD(cu_qp_delta); PRINT_BIT_FIELD(weighted_prediction); PRINT_BIT_FIELD(transquant_bypass); PRINT_BIT_FIELD(deblocking_filter_disable); printf("\n"); } if(r->encoder.attrib[VAConfigAttribEncHEVCBlockSizes].value != VA_ATTRIB_NOT_SUPPORTED) { VAConfigAttribValEncHEVCBlockSizes val; val.value = r->encoder.attrib[VAConfigAttribEncHEVCBlockSizes].value; printf("Supported HEVC block sizes: %d", val.value); PRINT_BIT_FIELD(log2_max_coding_tree_block_size_minus3); PRINT_BIT_FIELD(log2_min_coding_tree_block_size_minus3); PRINT_BIT_FIELD(log2_min_luma_coding_block_size_minus3); PRINT_BIT_FIELD(log2_max_luma_transform_block_size_minus2); PRINT_BIT_FIELD(log2_min_luma_transform_block_size_minus2); PRINT_BIT_FIELD(max_max_transform_hierarchy_depth_inter); PRINT_BIT_FIELD(min_max_transform_hierarchy_depth_inter); PRINT_BIT_FIELD(max_max_transform_hierarchy_depth_intra); PRINT_BIT_FIELD(min_max_transform_hierarchy_depth_intra); PRINT_BIT_FIELD(log2_max_pcm_coding_block_size_minus3); PRINT_BIT_FIELD(log2_min_pcm_coding_block_size_minus3); printf("\n"); } flags = r->encoder.attrib[VAConfigAttribEncQuantization].value; printf("Supported enc quantization: %d", flags); PRINT_FLAG(flags, VA_ENC_QUANTIZATION_NONE); PRINT_FLAG(flags, VA_ENC_QUANTIZATION_TRELLIS_SUPPORTED); printf("\n"); flags = r->encoder.attrib[VAConfigAttribEncIntraRefresh].value; printf("Supported intra refresh: %d", flags); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ROLLING_COLUMN); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ROLLING_ROW); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ADAPTIVE); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_CYCLIC); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_P_FRAME); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_B_FRAME); PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_MULTI_REF); printf("\n"); /* FIXME: should check if VAEntrypointEncSlice is supported */ /* FIXME: should check if specified attributes are supported */ attrib[0].type = VAConfigAttribRTFormat; attrib[0].value = USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420; attrib[1].type = VAConfigAttribRateControl; attrib[1].value = VA_RC_CBR; status = vaCreateConfig(r->va_dpy, profile, VAEntrypointEncSlice, attrib, 2, &r->encoder.cfg); if (status != VA_STATUS_SUCCESS) return status; VASurfaceID ctx_surfaces[4]; memcpy(ctx_surfaces, r->encoder.reference_picture, sizeof(VASurfaceID) * 3); ctx_surfaces[3] = r->vpp.output; status = vaCreateContext(r->va_dpy, r->encoder.cfg, r->width, r->height, VA_PROGRESSIVE, ctx_surfaces, 4, &r->encoder.ctx); if (status != VA_STATUS_SUCCESS) { vaDestroyConfig(r->va_dpy, r->encoder.cfg); return status; } return VA_STATUS_SUCCESS; } static void encoder_destroy_config(struct vaapi_recorder *r) { vaDestroyContext(r->va_dpy, r->encoder.ctx); vaDestroyConfig(r->va_dpy, r->encoder.cfg); } static void encoder_init_parameters(struct vaapi_recorder *r) { int width_in_mbs, height_in_mbs; int frame_cropping_flag = 0; int frame_crop_bottom_offset = 0; width_in_mbs = (r->width + 15) / 16; height_in_mbs = (r->height + 15) / 16; // sps // seems work r->encoder.param.seq.seq_fields.bits.sps_temporal_mvp_enabled_flag = 1; // todo: check if we can use 4:4:4/4:2:2 r->encoder.param.seq.seq_fields.bits.chroma_format_idc = 1; // 4:2:0 r->encoder.param.seq.pic_width_in_luma_samples = ALIGN16(r->width); r->encoder.param.seq.pic_height_in_luma_samples = ALIGN16(r->height); // todo: tunable block size r->encoder.param.seq.log2_diff_max_min_luma_coding_block_size = 2; r->encoder.param.seq.log2_diff_max_min_transform_block_size = 3; r->encoder.param.seq.log2_min_transform_block_size_minus2 = 0; r->encoder.param.seq.max_transform_hierarchy_depth_inter = 2; r->encoder.param.seq.max_transform_hierarchy_depth_intra = 2; r->encoder.param.seq.seq_fields.bits.amp_enabled_flag = 1; // broken on intel? or broken bistream? r->encoder.param.seq.seq_fields.bits.sample_adaptive_offset_enabled_flag = 0; r->encoder.param.seq.intra_idr_period = 32767; r->encoder.param.seq.intra_period = 32767; r->encoder.param.seq.ip_period = 1; if(USE_P010) { r->encoder.param.seq.seq_fields.bits.bit_depth_chroma_minus8 = 2; r->encoder.param.seq.seq_fields.bits.bit_depth_luma_minus8 = 2; } r->encoder.param.seq.bits_per_second = 150*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50; // vps // (none?) // profile r->encoder.param.seq.general_level_idc = 120; r->encoder.param.seq.general_profile_idc = USE_P010? 2: 1; // pps r->encoder.param.pic.pic_fields.bits.dependent_slice_segments_enabled_flag = 1; // seens work both r->encoder.param.pic.pic_fields.bits.transform_skip_enabled_flag = 1; // pic r->encoder.param.pic.collocated_ref_pic_index = 0;//255; r->encoder.param.pic.pic_init_qp = 26; r->encoder.param.pic.nal_unit_type = NALU_IDR_W_DLP; r->encoder.param.pic.pic_fields.bits.idr_pic_flag = 1; r->encoder.param.pic.pic_fields.bits.coding_type = 1; r->encoder.param.pic.pic_fields.bits.reference_pic_flag = 1; // seems work r->encoder.param.pic.pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = 1; r->encoder.param.pic.pic_fields.bits.cu_qp_delta_enabled_flag = 1; // CBR if(r->encoder.param.pic.pic_fields.bits.cu_qp_delta_enabled_flag) r->encoder.param.pic.diff_cu_qp_delta_depth = 2; for(int i = 0; i < 15; i++) { r->encoder.param.pic.reference_frames[i].picture_id = VA_INVALID_SURFACE; r->encoder.param.pic.reference_frames[i].flags = VA_PICTURE_HEVC_INVALID; r->encoder.param.pic.reference_frames[i].pic_order_cnt = 0; } // slice //r->encoder.param.slice.slice_fields.bits.num_ref_idx_active_override_flag = 0; r->encoder.param.slice.slice_qp_delta = 0; int lcu_size = 32; // todo: block size settings? int picture_width_in_ctus = (r->width + lcu_size - 1) / lcu_size; int picture_height_in_ctus = (r->height + lcu_size - 1) / lcu_size; r->encoder.param.slice.num_ctu_in_slice = picture_width_in_ctus * picture_height_in_ctus; r->encoder.param.slice.max_num_merge_cand = 5; // seems works //r->encoder.param.slice.slice_fields.bits.collocated_from_l0_flag = 1; // broken on intel??? //r->encoder.param.slice.slice_fields.bits.slice_sao_chroma_flag = 1; //r->encoder.param.slice.slice_fields.bits.slice_sao_luma_flag = 1; memset((void*)r->encoder.param.slice.ref_pic_list0, -1, sizeof(r->encoder.param.slice.ref_pic_list0)); memset((void*)r->encoder.param.slice.ref_pic_list1, -1, sizeof(r->encoder.param.slice.ref_pic_list1)); for(int i = 0; i < 15; i++) { r->encoder.param.slice.ref_pic_list0[i].flags = -1;//VA_PICTURE_HEVC_INVALID; r->encoder.param.slice.ref_pic_list0[i].picture_id = VA_INVALID_SURFACE; r->encoder.param.slice.ref_pic_list0[i].pic_order_cnt = -1; r->encoder.param.slice.ref_pic_list1[i].flags = -1;//VA_PICTURE_HEVC_INVALID; r->encoder.param.slice.ref_pic_list1[i].picture_id = VA_INVALID_SURFACE; r->encoder.param.slice.ref_pic_list1[i].pic_order_cnt = -1; } } static VABufferID encoder_update_seq_parameters(struct vaapi_recorder *r) { VABufferID seq_buf; VAStatus status; status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncSequenceParameterBufferType, sizeof(r->encoder.param.seq), 1, &r->encoder.param.seq, &seq_buf); if (status == VA_STATUS_SUCCESS) return seq_buf; else { printf("%s: %d\n", __PRETTY_FUNCTION__, status); return VA_INVALID_ID; } } static VABufferID encoder_update_pic_parameters(struct vaapi_recorder *r, VABufferID output_buf) { VAEncPictureParameterBufferHEVC *pic = &r->encoder.param.pic; VAStatus status; VABufferID pic_param_buf; VASurfaceID curr_pic, pic0; curr_pic = r->encoder.reference_picture[r->frame_count % 2]; pic0 = r->encoder.reference_picture[(r->frame_count + 1) % 2]; pic->decoded_curr_pic.picture_id = curr_pic; pic->decoded_curr_pic.pic_order_cnt = r->frame_count; if(r->frame_count) { pic->reference_frames[0].picture_id = pic0; pic->reference_frames[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE; pic->reference_frames[0].pic_order_cnt = r->frame_count - 1; } else { pic->reference_frames[0].picture_id = VA_INVALID_SURFACE; pic->reference_frames[0].flags = VA_PICTURE_HEVC_INVALID; pic->reference_frames[0].pic_order_cnt = 0; } pic->coded_buf = output_buf; pic->pic_fields.bits.idr_pic_flag = (r->frame_count == 0); pic->nal_unit_type = (r->frame_count == 0)? NALU_IDR_W_DLP : NALU_TRAIL_R; pic->pic_fields.bits.reference_pic_flag = 1; pic->pic_fields.bits.coding_type = (r->frame_count == 0)? 1:2; status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncPictureParameterBufferType, sizeof(VAEncPictureParameterBufferHEVC), 1, pic, &pic_param_buf); if (status == VA_STATUS_SUCCESS) return pic_param_buf; else { printf("%s: %d\n", __PRETTY_FUNCTION__, status); return VA_INVALID_ID; } } static VABufferID encoder_update_slice_parameter(struct vaapi_recorder *r, int slice_type) { VABufferID slice_param_buf; VAStatus status; r->encoder.param.slice.slice_type = r->frame_count == 0? SLICE_I: SLICE_P; if(r->frame_count) { r->encoder.param.slice.ref_pic_list0[0].pic_order_cnt = r->frame_count - 1; r->encoder.param.slice.ref_pic_list0[0].picture_id = r->encoder.reference_picture[(r->frame_count - 1)% 2]; r->encoder.param.slice.ref_pic_list0[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE; } else { r->encoder.param.slice.ref_pic_list0[0].pic_order_cnt = -1; r->encoder.param.slice.ref_pic_list0[0].picture_id = VA_INVALID_SURFACE; r->encoder.param.slice.ref_pic_list0[0].flags = -1;//VA_PICTURE_HEVC_INVALID; } status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncSliceParameterBufferType, sizeof(r->encoder.param.slice), 1, &r->encoder.param.slice, &slice_param_buf); if (status == VA_STATUS_SUCCESS) return slice_param_buf; else { printf("%s: %d\n", __PRETTY_FUNCTION__, status); return VA_INVALID_ID; } } static VABufferID encoder_update_misc_rate_parameter(struct vaapi_recorder *r) { VAEncMiscParameterBuffer *misc_param; VAEncMiscParameterHRD *hrd; VAEncMiscParameterRateControl *rc; VABufferID buffer; VAStatus status; int total_size = sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl); status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncMiscParameterBufferType, total_size, 1, NULL, &buffer); if (status != VA_STATUS_SUCCESS) return VA_INVALID_ID; status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param); if (status != VA_STATUS_SUCCESS) { vaDestroyBuffer(r->va_dpy, buffer); return VA_INVALID_ID; } misc_param->type = VAEncMiscParameterTypeRateControl; rc = (VAEncMiscParameterRateControl *)misc_param->data; rc->initial_qp = 25; rc->max_qp = rc->max_qp = 0; rc->basic_unit_size = 0; rc->window_size = 1000; rc->target_percentage = 66; rc->bits_per_second = 150*1024*1024; vaUnmapBuffer(r->va_dpy, buffer); return buffer; } static VABufferID encoder_update_misc_framerate_parameter(struct vaapi_recorder *r) { VAEncMiscParameterBuffer *misc_param; VAEncMiscParameterHRD *hrd; VAEncMiscParameterFrameRate *fps; VABufferID buffer; VAStatus status; int total_size = sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl); status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncMiscParameterBufferType, total_size, 1, NULL, &buffer); if (status != VA_STATUS_SUCCESS) return VA_INVALID_ID; status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param); if (status != VA_STATUS_SUCCESS) { vaDestroyBuffer(r->va_dpy, buffer); return VA_INVALID_ID; } misc_param->type = VAEncMiscParameterTypeFrameRate; fps = (VAEncMiscParameterFrameRate *)misc_param->data; fps->framerate = 90; vaUnmapBuffer(r->va_dpy, buffer); return buffer; } static VABufferID encoder_update_misc_hrd_parameter(struct vaapi_recorder *r) { VAEncMiscParameterBuffer *misc_param; VAEncMiscParameterHRD *hrd; VAEncMiscParameterRateControl *rc; VABufferID buffer; VAStatus status; int total_size = sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterHRD); status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncMiscParameterBufferType, total_size, 1, NULL, &buffer); if (status != VA_STATUS_SUCCESS) return VA_INVALID_ID; status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param); if (status != VA_STATUS_SUCCESS) { vaDestroyBuffer(r->va_dpy, buffer); return VA_INVALID_ID; } misc_param->type = VAEncMiscParameterTypeHRD; hrd = (VAEncMiscParameterHRD *) misc_param->data; hrd->initial_buffer_fullness = 0; hrd->buffer_size = 0; vaUnmapBuffer(r->va_dpy, buffer); return buffer; } static int setup_encoder(struct vaapi_recorder *r) { VAStatus status; VASurfaceAttrib attrs[2] = { {VASurfaceAttribMemoryType, VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},}, {VASurfaceAttribPixelFormat,VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},} }; attrs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA; attrs[1].value.value.i = USE_P010?VA_FOURCC_P010:VA_FOURCC_NV12; status = vaCreateSurfaces(r->va_dpy, USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420, r->width, r->height, r->encoder.reference_picture, 3, attrs, 2); status = encoder_create_config(r); if (status != VA_STATUS_SUCCESS) { return -1; } if (status != VA_STATUS_SUCCESS) { encoder_destroy_config(r); return -1; } r->encoder.output_size = r->width * r->height; r->encoder.intra_period = 32767; encoder_init_parameters(r); return 0; } static void encoder_destroy(struct vaapi_recorder *r) { vaDestroySurfaces(r->va_dpy, r->encoder.reference_picture, 3); encoder_destroy_config(r); } static void nal_start_code_prefix(bitstream *bs, int nal_unit_type) { if (nal_unit_type == NALU_VPS || nal_unit_type == NALU_SPS || nal_unit_type == NALU_PPS || nal_unit_type == NALU_AUD) bitstream_put_ui(bs, 0x00000001, 32); else bitstream_put_ui(bs, 0x000001, 24); } static void nal_header(bitstream *bs, int nal_unit_type) { bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */ bitstream_put_ui(bs, nal_unit_type, 6); bitstream_put_ui(bs, 0, 6); bitstream_put_ui(bs, 1, 3); } static void rbsp_trailing_bits(struct bitstream *bs) { bitstream_put_ui(bs, 1, 1); bitstream_byte_aligning(bs, 0); } #define TEMPORAL_ID_NESTING 1 #define POC_BITS 16 static void protier_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *seq) { uint32_t i = 0; bitstream_put_ui(bs, 0, 2); // general_profile_space bitstream_put_ui(bs, seq->general_tier_flag, 1); // general_tier_flag bitstream_put_ui(bs, seq->general_profile_idc, 5); // general_profile_idc // real_hevc_profile //for (i = 0; i < 32; i++) //bitstream_put_ui(bs, protier_param.general_profile_compatibility_flag[i], 1); //bitstream_put_ui(bs, 1 << 30, 32); // ptps->general_profile_compatibility_flag[ptps->general_profile_idc] = 1; // todo: configurable flags? for (i = 0; i < 32; i++) bitstream_put_ui(bs, i == seq->general_profile_idc, 1); bitstream_put_ui(bs, 1, 1); //general_progressive_source_flag bitstream_put_ui(bs, 0, 1); // general_interlaced_source_flag bitstream_put_ui(bs, 1, 1); // general_non_packed_constraint_flag bitstream_put_ui(bs, 1, 1); // general_frame_only_constraint_flag // reserved? bitstream_put_ui(bs, 0, 16); bitstream_put_ui(bs, 0, 16); bitstream_put_ui(bs, 0, 12); bitstream_put_ui(bs, seq->general_level_idc, 8); // general_level_idc //ptps->general_level_idc = 30; //ptps->general_level_idc = ptps->general_level_idc * 4; } static void vps_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *seq) { uint32_t i = 0; bitstream_put_ui(bs, 0, 4); // vps.vps_video_parameter_set_id bitstream_put_ui(bs, 3, 2); //vps_reserved_three_2bits //vps_base_layer_internal_flag:1 //vps_base_layer_available_flag:1 bitstream_put_ui(bs, 0, 6); //vps_reserved_zero_6bits // vps_max_layers_minus1:0 bitstream_put_ui(bs, 0, 3); // vps_max_sub_layers_minus1 bitstream_put_ui(bs, TEMPORAL_ID_NESTING, 1); // vps_temporal_id_nesting_flag bitstream_put_ui(bs, 0xFFFF, 16); //vps_reserved_0xffff_16bits protier_rbsp(bs, seq); bitstream_put_ui(bs, 0, 1); // vps.vps_sub_layer_ordering_info_present_flag // for (i = (vps.vps_sub_layer_ordering_info_present_flag ? 0 : vps.vps_max_sub_layers_minus1); i <= vps.vps_max_sub_layers_minus1; i++) { // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering. // here just follow the spec 7.3.2.1 // todo: check this. At least, breaks some hevc parsers when set to 0 bitstream_put_ue(bs, 1); //vps.vps_max_dec_pic_buffering_minus1[i] bitstream_put_ue(bs, 0);//vps.vps_max_num_reorder_pics[i] bitstream_put_ue(bs, 0);//vps.vps_max_latency_increase_plus1[i] //} //*/ bitstream_put_ui(bs, 0, 6); // vps.vps_max_nuh_reserved_zero_layer_id bitstream_put_ue(bs, 0); // vps.vps_num_op_sets_minus1 bitstream_put_ui(bs, 0, 1); // vps.vps_num_op_sets_minus1 /*& if (vps.vps_timing_info_present_flag) { bitstream_put_ue(bs, vps.vps_num_units_in_tick); bitstream_put_ue(bs, vps.vps_time_scale); bitstream_put_ue(bs, vps.vps_poc_proportional_to_timing_flag); if (vps.vps_poc_proportional_to_timing_flag) { bitstream_put_ue(bs, vps.vps_num_ticks_poc_diff_one_minus1); } bitstream_put_ue(bs, vps.vps_num_hrd_parameters); for (i = 0; i < vps.vps_num_hrd_parameters; i++) { bitstream_put_ue(bs, vps.hrd_layer_set_idx[i]); if (i > 0) { bitstream_put_ui(bs, vps.cprms_present_flag[i], 1); } } } */ // todo: bitstream restrictions? // no extension flag bitstream_put_ui(bs, 0, 1); } static void sps_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *sps, int width, int height) { uint32_t i = 0; bitstream_put_ui(bs, 0, 4); // sps.sps_video_parameter_set_id bitstream_put_ui(bs, 0, 3); // sps.sps_max_sub_layers_minus1 bitstream_put_ui(bs, TEMPORAL_ID_NESTING, 1); // sps.sps_temporal_id_nesting_flag protier_rbsp(bs, sps); bitstream_put_ue(bs, 0); // sps.sps_seq_parameter_set_id bitstream_put_ue(bs, sps->seq_fields.bits.chroma_format_idc); // sps.chroma_format_idc // 4:2:0 // todo: check if we can use 4:4:4/4:2:2 if (sps->seq_fields.bits.chroma_format_idc == 3) { bitstream_put_ui(bs, sps->seq_fields.bits.separate_colour_plane_flag, 1); } bitstream_put_ue(bs, ALIGN16(width) ); //sps.pic_width_in_luma_samples bitstream_put_ue(bs, ALIGN16(height)); //sps.pic_height_in_luma_samples bool conformance_window_flag = ALIGN16(width) != width || ALIGN16(height) != height; bitstream_put_ui(bs, 1, 1); // sps.conformance_window_flag if (conformance_window_flag) { // sps. bitstream_put_ue(bs, 0); // sps.conf_win_left_offset bitstream_put_ue(bs, (ALIGN16(width) - width) >> 1); // sps.conf_win_right_offset bitstream_put_ue(bs, 0); // sps.conf_win_top_offset bitstream_put_ue(bs, (ALIGN16(height) - height) >> 1); // sps.conf_win_bottom_offset } bitstream_put_ue(bs, sps->seq_fields.bits.bit_depth_luma_minus8); //sps.bit_depth_luma_minus8 bitstream_put_ue(bs, sps->seq_fields.bits.bit_depth_chroma_minus8); //sps.bit_depth_chroma_minus8 bitstream_put_ue(bs, POC_BITS - 4); // sps.log2_max_pic_order_cnt_lsb_minus4 bitstream_put_ui(bs, 0 , 1); //sps.sps_sub_layer_ordering_info_present_flag //for (i = (sps.sps_sub_layer_ordering_info_present_flag ? 0 : sps.sps_max_sub_layers_minus1); i <= sps.sps_max_sub_layers_minus1; i++) { // NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering. // here just follow the spec 7.3.2.2 // todo: check. At least, breaks some hevc parsers when set to 0 bitstream_put_ue(bs, 0);//sps.sps_max_dec_pic_buffering_minus1[i] bitstream_put_ue(bs, 0);//sps.sps_max_num_reorder_pics[i] bitstream_put_ue(bs, 0); //sps.sps_max_latency_increase_plus1[i] //} bitstream_put_ue(bs, sps->log2_min_luma_coding_block_size_minus3); // sps.log2_min_luma_coding_block_size_minus3 bitstream_put_ue(bs, sps->log2_diff_max_min_luma_coding_block_size);//sps.log2_diff_max_min_luma_coding_block_size bitstream_put_ue(bs, sps->log2_min_transform_block_size_minus2);//sps.log2_min_luma_transform_block_size_minus2 bitstream_put_ue(bs, sps->log2_diff_max_min_transform_block_size);//sps.log2_diff_max_min_luma_transform_block_size bitstream_put_ue(bs, sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_inter bitstream_put_ue(bs, sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_intra assert(!sps->seq_fields.bits.scaling_list_enabled_flag); // scaling_list_enabled_flag is set as 0 in fill_sps_header() for now bitstream_put_ui(bs, 0, 1); // sps.scaling_list_enabled_flag /*if (sps.scaling_list_enabled_flag) { bitstream_put_ui(bs, sps.sps_scaling_list_data_present_flag, 1); if (sps.sps_scaling_list_data_present_flag) { //scaling_list_data(); } }*/ bitstream_put_ui(bs, sps->seq_fields.bits.amp_enabled_flag, 1); // sps.amp_enabled_flag bitstream_put_ui(bs, sps->seq_fields.bits.sample_adaptive_offset_enabled_flag, 1); // sps.sample_adaptive_offset_enabled_flag // pcm_enabled_flag is set as 0 in fill_sps_header() for now bitstream_put_ui(bs, sps->seq_fields.bits.pcm_enabled_flag, 1);//sps.pcm_enabled_flag assert(!sps->seq_fields.bits.pcm_enabled_flag); if (sps->seq_fields.bits.pcm_enabled_flag) { bitstream_put_ui(bs, sps->pcm_sample_bit_depth_luma_minus1, 4); bitstream_put_ui(bs, sps->pcm_sample_bit_depth_chroma_minus1, 4); bitstream_put_ue(bs, sps->log2_min_pcm_luma_coding_block_size_minus3); bitstream_put_ue(bs, sps->log2_max_pcm_luma_coding_block_size_minus3 - sps->log2_min_pcm_luma_coding_block_size_minus3); //log2_diff_max_min_pcm_luma_coding_block_size bitstream_put_ui(bs, sps->seq_fields.bits.pcm_loop_filter_disabled_flag, 1); } bitstream_put_ue(bs, 1);// sps.num_short_term_ref_pic_sets bitstream_put_ue(bs, 1); // num_negative_pics bitstream_put_ue(bs, 0); // num_positive_pics bitstream_put_ue(bs, 0); //delta_poc_s0_minus1[i] bitstream_put_ui(bs, 1, 1);//used_by_curr_pic_s0_flag[i] /*for (i = 0; i < sps.num_short_term_ref_pic_sets; i++) { pack_short_term_ref_pic_setp(bs, &sps.strp[i], i == 0); }*/ // long_term_ref_pics_present_flag is set as 0 in fill_sps_header() for now bitstream_put_ui(bs, 0, 1); // sps.long_term_ref_pics_present_flag /*if (sps.long_term_ref_pics_present_flag) { bitstream_put_ue(bs, sps.num_long_term_ref_pics_sps); for (i = 0; i < sps.num_long_term_ref_pics_sps; i++) { bitstream_put_ue(bs, sps.lt_ref_pic_poc_lsb_sps[i]); bitstream_put_ui(bs, sps.used_by_curr_pic_lt_sps_flag[i], 1); } }*/ bitstream_put_ui(bs, sps->seq_fields.bits.sps_temporal_mvp_enabled_flag, 1); // sps.sps_temporal_mvp_enabled_flag bitstream_put_ui(bs, sps->seq_fields.bits.strong_intra_smoothing_enabled_flag, 1); //sps.strong_intra_smoothing_enabled_flag assert(!sps->vui_parameters_present_flag); bitstream_put_ui(bs, sps->vui_parameters_present_flag, 1); // sps.vui_parameters_present_flag bitstream_put_ui(bs, 0, 1); // sps.sps_extension_present_flag } #define PPS_CABAC_INIT_PRESENT_FLAG 1 static void pps_rbsp(bitstream *bs, VAEncPictureParameterBufferHEVC *pic) { uint32_t i = 0; bool deblocking_filter_control_present_flag = false; bitstream_put_ue(bs, 0); // pps.pps_pic_parameter_set_id bitstream_put_ue(bs, 0); // pps.pps_seq_parameter_set_id bitstream_put_ui(bs, pic->pic_fields.bits.dependent_slice_segments_enabled_flag, 1); // pps.dependent_slice_segments_enabled_flag // TODO: !!! bitstream_put_ui(bs, 0, 1); //pps.output_flag_present_flag bitstream_put_ui(bs, 0, 3); // pps.num_extra_slice_header_bits bitstream_put_ui(bs, pic->pic_fields.bits.sign_data_hiding_enabled_flag, 1); //pps.sign_data_hiding_enabled_flag bitstream_put_ui(bs, PPS_CABAC_INIT_PRESENT_FLAG, 1); // pps.cabac_init_present_flag bitstream_put_ue(bs, pic->num_ref_idx_l0_default_active_minus1); //pps.num_ref_idx_l0_default_active_minus1 bitstream_put_ue(bs, pic->num_ref_idx_l1_default_active_minus1); //pps.num_ref_idx_l1_default_active_minus1 bitstream_put_se(bs, pic->pic_init_qp - 26); //pps.init_qp_minus26 bitstream_put_ui(bs, pic->pic_fields.bits.constrained_intra_pred_flag, 1); //pps.constrained_intra_pred_flag bitstream_put_ui(bs, pic->pic_fields.bits.transform_skip_enabled_flag, 1); //pps.transform_skip_enabled_flag bitstream_put_ui(bs, pic->pic_fields.bits.cu_qp_delta_enabled_flag, 1); //pps.cu_qp_delta_enabled_flag if (pic->pic_fields.bits.cu_qp_delta_enabled_flag) { bitstream_put_ue(bs, pic->diff_cu_qp_delta_depth);//diff_cu_qp_delta_depth } bitstream_put_se(bs, pic->pps_cb_qp_offset);//pps.pps_cb_qp_offset bitstream_put_se(bs, pic->pps_cr_qp_offset);//pps.pps_cr_qp_offset) bitstream_put_ui(bs, 0, 1);//pps.pps_slice_chroma_qp_offsets_present_flag bitstream_put_ui(bs, pic->pic_fields.bits.weighted_pred_flag, 1);//pps.weighted_pred_flag bitstream_put_ui(bs, pic->pic_fields.bits.weighted_bipred_flag, 1); //pps.weighted_bipred_flag bitstream_put_ui(bs, pic->pic_fields.bits.transquant_bypass_enabled_flag, 1);//pps.transquant_bypass_enabled_flag bitstream_put_ui(bs, pic->pic_fields.bits.tiles_enabled_flag, 1);//pps.tiles_enabled_flag bitstream_put_ui(bs, pic->pic_fields.bits.entropy_coding_sync_enabled_flag, 1);//pps.entropy_coding_sync_enabled_flag if (pic->pic_fields.bits.tiles_enabled_flag) { bool uniform_spacing_flag = false; bitstream_put_ue(bs, pic->num_tile_columns_minus1); bitstream_put_ue(bs, pic->num_tile_rows_minus1); bitstream_put_ui(bs, uniform_spacing_flag, 1);//uniform_spacing_flag if (!uniform_spacing_flag) { for (i = 0; i < pic->num_tile_columns_minus1; i++) { bitstream_put_ue(bs, pic->column_width_minus1[i]); } for (i = 0; i < pic->num_tile_rows_minus1; i++) { bitstream_put_ue(bs, pic->row_height_minus1[i]); } } bitstream_put_ui(bs, pic->pic_fields.bits.loop_filter_across_tiles_enabled_flag, 1); } bitstream_put_ui(bs, pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag, 1); // pps.pps_loop_filter_across_slices_enabled_flag bitstream_put_ui(bs, deblocking_filter_control_present_flag, 1);//pps.deblocking_filter_control_present_flag if (deblocking_filter_control_present_flag) { bool deblocking_filter_override_enabled_flag = false; bool pps_deblocking_filter_disabled_flag = true; bitstream_put_ui(bs, deblocking_filter_override_enabled_flag, 1); bitstream_put_ui(bs, pps_deblocking_filter_disabled_flag, 1); int beta_offset_div2 = 0; int tc_offset_div2 = 0; if (!pps_deblocking_filter_disabled_flag) { bitstream_put_se(bs, beta_offset_div2); bitstream_put_se(bs, tc_offset_div2); } } // pps_scaling_list_data_present_flag is set as 0 in fill_pps_header() for now assert(!pic->pic_fields.bits.scaling_list_data_present_flag); bitstream_put_ui(bs, pic->pic_fields.bits.scaling_list_data_present_flag, 1);//pps.pps_scaling_list_data_present_flag /*if (pps.pps_scaling_list_data_present_flag) { //scaling_list_data(); }*/ bitstream_put_ui(bs, 0, 1);//pps.lists_modification_present_flag bitstream_put_ue(bs, 0); //pps.log2_parallel_merge_level_minus2 bitstream_put_ui(bs, 0, 1);//pps.slice_segment_header_extension_present_flag bitstream_put_ui(bs,0, 1); //pps.pps_extension_present_flag /*if (pps.pps_extension_present_flag) { bitstream_put_ui(bs, pps.pps_range_extension_flag, 1); bitstream_put_ui(bs, pps.pps_multilayer_extension_flag, 1); bitstream_put_ui(bs, pps.pps_3d_extension_flag, 1); bitstream_put_ui(bs, pps.pps_extension_5bits, 1); } if (pps.pps_range_extension_flag) { if (pps.transform_skip_enabled_flag) bitstream_put_ue(bs, pps.log2_max_transform_skip_block_size_minus2); bitstream_put_ui(bs, pps.cross_component_prediction_enabled_flag, 1); bitstream_put_ui(bs, pps.chroma_qp_offset_list_enabled_flag, 1); if (pps.chroma_qp_offset_list_enabled_flag) { bitstream_put_ue(bs, pps.diff_cu_chroma_qp_offset_depth); bitstream_put_ue(bs, pps.chroma_qp_offset_list_len_minus1); for (i = 0; i <= pps.chroma_qp_offset_list_len_minus1; i++) { bitstream_put_ue(bs, pps.cb_qp_offset_list[i]); bitstream_put_ue(bs, pps.cr_qp_offset_list[i]); } } bitstream_put_ue(bs, pps.log2_sao_offset_scale_luma); bitstream_put_ue(bs, pps.log2_sao_offset_scale_chroma); } */ } static void sliceHeader_rbsp( bitstream *bs, int framenum, VAEncSequenceParameterBufferHEVC *sps, VAEncSliceParameterBufferHEVC *slice, VAEncPictureParameterBufferHEVC *pic) { uint8_t nal_unit_type = NALU_TRAIL_R; //int gop_ref_distance = ip_period; int i = 0; bool is_idr = framenum == 0; int slice_type = is_idr?SLICE_I : SLICE_P; int short_term_ref_pic_set_sps_flag = 1; // !is_idr; int slice_qp_delta = slice->slice_qp_delta; int pic_order_cnt_lsb = framenum; bitstream_put_ui(bs, 1, 1);// first_slice_segment_in_pic_flag if (pic_order_cnt_lsb == 0) nal_unit_type = NALU_IDR_W_DLP; // idr if (nal_unit_type >= 16 && nal_unit_type <= 23) bitstream_put_ui(bs, 1, 1); //no_output_of_prior_pics_flag bitstream_put_ue(bs, 0);//slice_pic_parameter_set_id /*if (!slice_header->first_slice_segment_in_pic_flag) { if (slice_header->dependent_slice_segment_flag) { bitstream_put_ui(bs, slice_header->dependent_slice_segment_flag, 1); } bitstream_put_ui(bs, slice_header->slice_segment_address, (uint8_t)(ceil(log(slice_header->picture_height_in_ctus * slice_header->picture_width_in_ctus) / log(2.0)))); }*/ // !slice_header->dependent_slice_segment_flag if (!slice->slice_fields.bits.dependent_slice_segment_flag) { /*for (i = 0; i < pps->num_extra_slice_header_bits; i++) { bitstream_put_ui(bs, slice_header->slice_reserved_undetermined_flag[i], 1); }*/ bitstream_put_ue(bs, slice_type); /*if (pps->output_flag_present_flag) { bitstream_put_ui(bs, slice_header->pic_output_flag, 1); }*/ if(sps->seq_fields.bits.separate_colour_plane_flag) bitstream_put_ui(bs, slice->slice_fields.bits.colour_plane_id, 2); if (!(nal_unit_type == NALU_IDR_W_DLP || nal_unit_type == NALU_IDR_N_LP)) { // slice_header->pic_order_cnt_lsb bitstream_put_ui(bs, pic_order_cnt_lsb, POC_BITS );//(sps->log2_max_pic_order_cnt_lsb_minus4 + 4) bitstream_put_ui(bs, 1, 1); // short_term_ref_pic_set_sps_flag // assume we are only pushing I-slices on IDR frames, SPS only references (n-1)th frame for now // this should be restored when p-slice references something different or using CRA frames #if 0 if (!short_term_ref_pic_set_sps_flag) { // refer to Teddi if (sps->num_short_term_ref_pic_sets > 0) bitstream_put_ui(bs, 0, 1); // inter_ref_pic_set_prediction_flag, always 0 for now bitstream_put_ue(bs, slice_header->strp.num_negative_pics); bitstream_put_ue(bs, slice_header->strp.num_positive_pics); // below chunks of codes (majorly two big 'for' blocks) are refering both // Teddi and mv_encoder, they look kind of ugly, however, keep them as these // since it will be pretty easy to update if change/update in Teddi side. // According to Teddi, these are CModel Implementation. int prev = 0; int frame_cnt_in_gop = slice_header->pic_order_cnt_lsb / 2; // this is the first big 'for' block for (i = 0; i < slice_header->strp.num_negative_pics; i++) { // Low Delay B case if (1 == gop_ref_distance) { bitstream_put_ue(bs, 0 /*delta_poc_s0_minus1*/); } else { // For Non-BPyramid GOP i.e B0 type if (num_active_ref_p > 1) { // DeltaPOC Equals NumB int DeltaPoc = -(int)(gop_ref_distance); bitstream_put_ue(bs, prev - DeltaPoc - 1 /*delta_poc_s0_minus1*/); } else { // the big 'if' wraps here is - // if (!slice_header->short_term_ref_pic_set_sps_flag) // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0' // either for B-Prymid or first several frames in a GOP in multi-ref cases // when there are not enough backward refs. // So though there are really some codes under this 'else'in Teddi, don't // want to introduce them in MEA to avoid confusion, and put an assert // here to guard that there is new case we need handle in the future. assert(0); } } bitstream_put_ui(bs, 1 /*used_by_curr_pic_s0_flag*/, 1); } prev = 0; // this is the second big 'for' block for (i = 0; i < slice_header->strp.num_positive_pics; i++) { // Non-BPyramid GOP if (num_active_ref_p > 1) { // MultiRef Case if (frame_cnt_in_gop < gop_ref_distance) { int DeltaPoc = (int)(gop_ref_distance - frame_cnt_in_gop); bitstream_put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/); } else if (frame_cnt_in_gop > gop_ref_distance) { int DeltaPoc = (int)(gop_ref_distance * slice_header->strp.num_negative_pics - frame_cnt_in_gop); bitstream_put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/); } } else { // the big 'if' wraps here is - // if (!slice_header->short_term_ref_pic_set_sps_flag) // From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0' // either for B-Prymid or first several frames in a GOP in multi-ref cases // when there are not enough backward refs. // So though there are really some codes under this 'else'in Teddi, don't // want to introduce them in MEA to avoid confusion, and put an assert // here to guard that there is new case we need handle in the future. assert(0); } bitstream_put_ui(bs, 1 /*used_by_curr_pic_s1_flag*/, 1); } } else if (sps->num_short_term_ref_pic_sets > 1) bitstream_put_ui(bs, slice_header->short_term_ref_pic_set_idx, (uint8_t)(ceil(log(sps->num_short_term_ref_pic_sets) / log(2.0)))); #endif // no long term refs #if 0 if (sps->long_term_ref_pics_present_flag) { if (sps->num_long_term_ref_pics_sps > 0) bitstream_put_ue(bs, slice_header->num_long_term_sps); bitstream_put_ue(bs, slice_header->num_long_term_pics); } #endif if (sps->seq_fields.bits.sps_temporal_mvp_enabled_flag) bitstream_put_ui(bs, slice->slice_fields.bits.slice_temporal_mvp_enabled_flag, 1); } if (sps->seq_fields.bits.sample_adaptive_offset_enabled_flag ) { // sample_adaptive_offset_enabled_flag bitstream_put_ui(bs, slice->slice_fields.bits.slice_sao_luma_flag, 1);// slice_sao_luma_flag bitstream_put_ui(bs, slice->slice_fields.bits.slice_sao_chroma_flag, 1);//slice_sao_chroma_flag } if (slice_type != SLICE_I) { bitstream_put_ui(bs, slice->slice_fields.bits.num_ref_idx_active_override_flag, 1); //num_ref_idx_active_override_flag if (slice->slice_fields.bits.num_ref_idx_active_override_flag) { bitstream_put_ue(bs, slice->num_ref_idx_l0_active_minus1); //if (slice->slice_type == SLICE_B) //bitstream_put_ue(bs, slice->num_ref_idx_l1_active_minus1); } #if 0 if (pps->lists_modification_present_flag && slice_header->num_poc_total_cur > 1) { /* ref_pic_list_modification */ bitstream_put_ui(bs, slice_header->ref_pic_list_modification_flag_l0, 1); if (slice_header->ref_pic_list_modification_flag_l0) { for (i = 0; i <= slice_header->num_ref_idx_l0_active_minus1; i++) { bitstream_put_ui(bs, slice_header->list_entry_l0[i], (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0)))); } } bitstream_put_ui(bs, slice_header->ref_pic_list_modification_flag_l1, 1); if (slice_header->ref_pic_list_modification_flag_l1) { for (i = 0; i <= slice_header->num_ref_idx_l1_active_minus1; i++) { bitstream_put_ui(bs, slice_header->list_entry_l1[i], (uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0)))); } } } #endif /*if (slice_header->slice_type == SLICE_B) { bitstream_put_ui(bs, slice_header->mvd_l1_zero_flag, 1); }*/ if (PPS_CABAC_INIT_PRESENT_FLAG) { bitstream_put_ui(bs, slice->slice_fields.bits.cabac_init_flag, 1); //slice_header->cabac_init_present_flag } if (slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) { int collocated_from_l0_flag = 1; if (slice->slice_type == SLICE_B) { collocated_from_l0_flag = slice->slice_fields.bits.collocated_from_l0_flag; bitstream_put_ui(bs, collocated_from_l0_flag , 1); } if (((collocated_from_l0_flag && (slice->num_ref_idx_l0_active_minus1 > 0)) || (!collocated_from_l0_flag && (slice->num_ref_idx_l1_active_minus1 > 0)))) { bitstream_put_ue(bs, pic->num_ref_idx_l0_default_active_minus1); // collocated_ref_idx } } bitstream_put_ue(bs, 5 - slice->max_num_merge_cand);//slice_header->five_minus_max_num_merge_cand } bitstream_put_se(bs, slice_qp_delta); /*if (pps->chroma_qp_offset_list_enabled_flag) { bitstream_put_se(bs, slice_header->slice_qp_delta_cb); bitstream_put_se(bs, slice_header->slice_qp_delta_cr); } if (pps->deblocking_filter_override_enabled_flag) { bitstream_put_ui(bs, slice_header->deblocking_filter_override_flag, 1); } if (slice_header->deblocking_filter_override_flag) { bitstream_put_ui(bs, slice_header->disable_deblocking_filter_flag, 1); if (!slice_header->disable_deblocking_filter_flag) { bitstream_put_se(bs, slice_header->beta_offset_div2); bitstream_put_se(bs, slice_header->tc_offset_div2); } }*/ if (pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag && (slice->slice_fields.bits.slice_sao_luma_flag || slice->slice_fields.bits.slice_sao_chroma_flag || !slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) { bitstream_put_ui(bs, slice->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag, 1); } } if ((pic->pic_fields.bits.tiles_enabled_flag) || (pic->pic_fields.bits.entropy_coding_sync_enabled_flag)) { int num_entry_point_offsets = 0, offset_len_minus1 = 0; bitstream_put_ue(bs, num_entry_point_offsets); if (num_entry_point_offsets > 0) { bitstream_put_ue(bs, offset_len_minus1); } } /*if (pps->slice_segment_header_extension_present_flag) { int slice_header_extension_length = 0; bitstream_put_ue(bs, slice_header_extension_length); }*/ } void bitstream_dump(const char *name, bitstream *bs) { #if 0 FILE *f = fopen(name,"wb"); fwrite(bs->buffer, 1, bs->bit_offset / 8,f); fclose(f); #endif } static int build_packed_pic_buffer(struct vaapi_recorder *r, void **header_buffer) { struct bitstream bs; bitstream_start(&bs); nal_start_code_prefix(&bs, NALU_PPS); nal_header(&bs, NALU_PPS); pps_rbsp(&bs,&r->encoder.param.pic); rbsp_trailing_bits(&bs); bitstream_end(&bs); bitstream_dump("pic.bin",&bs); *header_buffer = bs.buffer; return bs.bit_offset; } static int build_packed_video_buffer(struct vaapi_recorder *r, void **header_buffer) { struct bitstream bs; bitstream_start(&bs); nal_start_code_prefix(&bs, NALU_VPS); nal_header(&bs, NALU_VPS); vps_rbsp(&bs, &r->encoder.param.seq); rbsp_trailing_bits(&bs); bitstream_end(&bs); bitstream_dump("vid.bin",&bs); *header_buffer = bs.buffer; return bs.bit_offset; } static int build_packed_seq_buffer(struct vaapi_recorder *r, void **header_buffer) { struct bitstream bs; bitstream_start(&bs); nal_start_code_prefix(&bs, NALU_SPS); nal_header(&bs, NALU_SPS); sps_rbsp(&bs, &r->encoder.param.seq, r->width, r->height); rbsp_trailing_bits(&bs); bitstream_end(&bs); bitstream_dump("seq.bin",&bs); *header_buffer = bs.buffer; return bs.bit_offset; } static int build_packed_slice_buffer(struct vaapi_recorder *r, void **header_buffer) { struct bitstream bs; int is_idr = r->frame_count == 0; // !!pic_param.pic_fields.bits.idr_pic_flag; int naluType = is_idr ? NALU_IDR_W_DLP : NALU_TRAIL_R; bitstream_start(&bs); nal_start_code_prefix(&bs, naluType); nal_header(&bs, naluType); sliceHeader_rbsp(&bs, r->frame_count, &r->encoder.param.seq, &r->encoder.param.slice, &r->encoder.param.pic); rbsp_trailing_bits(&bs); bitstream_end(&bs); bitstream_dump("slice.bin",&bs); *header_buffer = bs.buffer; return bs.bit_offset; } static int create_packed_header_buffers(struct vaapi_recorder *r, VABufferID *buffers, VAEncPackedHeaderType type, void *data, int bit_length) { VAEncPackedHeaderParameterBuffer packed_header; VAStatus status; packed_header.type = type; packed_header.bit_length = bit_length; packed_header.has_emulation_bytes = 0; status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncPackedHeaderParameterBufferType, sizeof packed_header, 1, &packed_header, &buffers[0]); if (status != VA_STATUS_SUCCESS) { printf("%s: %d\n", __PRETTY_FUNCTION__, status); return 0; } status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncPackedHeaderDataBufferType, (bit_length + 7) / 8, 1, data, &buffers[1]); if (status != VA_STATUS_SUCCESS) { printf("%s: %d\n", __PRETTY_FUNCTION__, status); vaDestroyBuffer(r->va_dpy, buffers[0]); return 0; } return 2; } static int encoder_prepare_headers(struct vaapi_recorder *r, VABufferID *buffers) { VABufferID *p; int bit_length; void *data; p = buffers; bit_length = build_packed_video_buffer(r, &data); p += create_packed_header_buffers(r, p, VAEncPackedHeaderSequence, data, bit_length); free(data); bit_length = build_packed_seq_buffer(r, &data); p += create_packed_header_buffers(r, p, VAEncPackedHeaderSequence, data, bit_length); free(data); return p - buffers; } static VAStatus encoder_render_picture(struct vaapi_recorder *r, VASurfaceID input, VABufferID *buffers, int count) { } static VABufferID encoder_create_output_buffer(struct vaapi_recorder *r) { VABufferID output_buf; VAStatus status; status = vaCreateBuffer(r->va_dpy, r->encoder.ctx, VAEncCodedBufferType, r->encoder.output_size, 1, NULL, &output_buf); if (status == VA_STATUS_SUCCESS) return output_buf; else return VA_INVALID_ID; } enum output_write_status { OUTPUT_WRITE_SUCCESS, OUTPUT_WRITE_OVERFLOW, OUTPUT_WRITE_FATAL }; static enum output_write_status encoder_write_output(struct vaapi_recorder *r, VABufferID output_buf) { VACodedBufferSegment *segment; VAStatus status; int count = 0; status = vaMapBuffer(r->va_dpy, output_buf, (void **) &segment); if (status != VA_STATUS_SUCCESS) return OUTPUT_WRITE_FATAL; do { if (segment->status & VA_CODED_BUF_STATUS_SLICE_OVERFLOW_MASK) { r->encoder.output_size *= 2; vaUnmapBuffer(r->va_dpy, output_buf); return OUTPUT_WRITE_OVERFLOW; } count += write(r->output_fd, segment->buf, segment->size); segment = (VACodedBufferSegment *)segment->next; } while(segment); vaUnmapBuffer(r->va_dpy, output_buf); if (count < 0) return OUTPUT_WRITE_FATAL; return OUTPUT_WRITE_SUCCESS; } static int push_output_buffer(struct vaapi_recorder *r, VABufferID buf, VASurfaceID surf); static void encoder_encode(struct vaapi_recorder *r, VASurfaceID input) { VABufferID output_buf = VA_INVALID_ID; VABufferID buffers[13]; int count = 0; int i, slice_type; enum output_write_status ret; if ((r->frame_count % r->encoder.intra_period) == 0) slice_type = SLICE_I; else slice_type = SLICE_P; if (r->frame_count == 0) buffers[count++] = encoder_update_seq_parameters(r); VABufferID slice_buf = encoder_update_slice_parameter(r, slice_type); //buffers[count++] = slice_buf; for (i = 0; i < count; i++) if (buffers[i] == VA_INVALID_ID) goto bail; VAStatus status; status = vaBeginPicture(r->va_dpy, r->encoder.ctx, input); if (status != VA_STATUS_SUCCESS) goto bail; //status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[0], 1); if (status != VA_STATUS_SUCCESS) goto bail; //status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[2], 1); do { output_buf = encoder_create_output_buffer(r); if (output_buf == VA_INVALID_ID) goto bail; VABufferID pic_buf = encoder_update_pic_parameters(r, output_buf); if (count && buffers[count - 1] == VA_INVALID_ID) goto bail; if (r->frame_count == 0) count += encoder_prepare_headers(r, buffers + count); // todo: this might be required in every frame //if(r->frame_count == 0) { void *data; uint32_t bit_length; if(r->frame_count == 0) { buffers[count++] = encoder_update_misc_framerate_parameter(r); buffers[count++] = encoder_update_misc_rate_parameter(r); buffers[count++] = encoder_update_misc_hrd_parameter(r); bit_length = build_packed_pic_buffer(r, &data); count += create_packed_header_buffers(r, buffers + count, VAEncPackedHeaderPicture, data, bit_length); free(data); } buffers[count++]= pic_buf; bit_length = build_packed_slice_buffer(r, &data ); count += create_packed_header_buffers(r, buffers + count, VAEncPackedHeaderSlice, data, bit_length); free(data); } buffers[count++] = slice_buf; status = vaRenderPicture(r->va_dpy, r->encoder.ctx, buffers, count); /*for(int i = 0; i < count; i++) { status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[i], 1); printf("%d %d\n", i, status); }*/ /*VAStatus er = encoder_render_picture(r, input, buffers, count); if(er != VA_STATUS_SUCCESS) { printf("render error %d\n", er); goto bail; }*/ status = vaEndPicture(r->va_dpy, r->encoder.ctx); if (status != VA_STATUS_SUCCESS) { printf("end %d\n", status); goto bail; } //status = vaSyncSurface(r->va_dpy, input); //ret = encoder_write_output(r, output_buf); //vaDestroyBuffer(r->va_dpy, output_buf); push_output_buffer(r, output_buf, input); output_buf = VA_INVALID_ID; vaDestroyBuffer(r->va_dpy, buffers[--count]); if(ret == OUTPUT_WRITE_OVERFLOW) exit(1); } while (ret == OUTPUT_WRITE_OVERFLOW); if (ret == OUTPUT_WRITE_FATAL) { r->error = errno; exit(1); } for (i = 0; i < count; i++) vaDestroyBuffer(r->va_dpy, buffers[i]); r->frame_count++; return; bail: printf("buffer errors?\n"); for (i = 0; i < count; i++) vaDestroyBuffer(r->va_dpy, buffers[i]); if (output_buf != VA_INVALID_ID) vaDestroyBuffer(r->va_dpy, output_buf); } static int setup_vpp(struct vaapi_recorder *r) { VAStatus status; VASurfaceAttrib attrs[2] = { {VASurfaceAttribMemoryType, VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},}, {VASurfaceAttribPixelFormat,VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},} }; attrs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA; attrs[1].value.value.i = USE_P010?VA_FOURCC_P010:VA_FOURCC_NV12; status = vaCreateConfig(r->va_dpy, VAProfileNone, VAEntrypointVideoProc, NULL, 0, &r->vpp.cfg); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to create VPP config\n"); return -1; } status = vaCreateContext(r->va_dpy, r->vpp.cfg, r->width, r->height, 0, NULL, 0, &r->vpp.ctx); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to create VPP context\n"); goto err_cfg; } status = vaCreateBuffer(r->va_dpy, r->vpp.ctx, VAProcPipelineParameterBufferType, sizeof(VAProcPipelineParameterBuffer), 1, NULL, &r->vpp.pipeline_buf); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to create VPP pipeline buffer\n"); goto err_ctx; } status = vaCreateSurfaces(r->va_dpy, USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420, r->width, r->height, &r->vpp.output, 1, attrs, 2); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to create YUV surface\n"); goto err_buf; } return 0; err_buf: vaDestroyBuffer(r->va_dpy, r->vpp.pipeline_buf); err_ctx: vaDestroyConfig(r->va_dpy, r->vpp.ctx); err_cfg: vaDestroyConfig(r->va_dpy, r->vpp.cfg); return -1; } static void vpp_destroy(struct vaapi_recorder *r) { vaDestroySurfaces(r->va_dpy, &r->vpp.output, 1); vaDestroyBuffer(r->va_dpy, r->vpp.pipeline_buf); vaDestroyConfig(r->va_dpy, r->vpp.ctx); vaDestroyConfig(r->va_dpy, r->vpp.cfg); } static int setup_worker_thread(struct vaapi_recorder *r) { pthread_mutex_init(&r->mutex, NULL); pthread_cond_init(&r->input_cond, NULL); pthread_create(&r->worker_thread, NULL, worker_thread_function, r); return 1; } static void * output_thread_function(void *data); static int setup_output_thread(struct vaapi_recorder *r) { pthread_mutex_init(&r->encoder.mutex, NULL); pthread_cond_init(&r->encoder.output_cond, NULL); pthread_create(&r->encoder.output_thread, NULL, output_thread_function, r); return 1; } static void destroy_worker_thread(struct vaapi_recorder *r) { pthread_mutex_lock(&r->mutex); /* Make sure the worker thread finishes */ r->destroying = 1; pthread_cond_signal(&r->input_cond); pthread_mutex_unlock(&r->mutex); pthread_join(r->worker_thread, NULL); pthread_mutex_destroy(&r->mutex); pthread_cond_destroy(&r->input_cond); } struct vaapi_recorder * vaapi_recorder_create(int drm_fd, int width, int height, const char *filename) { struct vaapi_recorder *r; VAStatus status; int major, minor; int flags; r = (vaapi_recorder*)calloc(sizeof *r,1); if (r == NULL) return NULL; r->width = width; r->height = height; r->drm_fd = drm_fd; if (setup_worker_thread(r) < 0) goto err_free; flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC; r->output_fd = open(filename, flags, 0644); if (r->output_fd < 0) goto err_thread; r->va_dpy = vaGetDisplayDRM(drm_fd); if (!r->va_dpy) { printf("failed to create VA display\n"); goto err_fd; } status = vaInitialize(r->va_dpy, &major, &minor); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to initialize display\n"); goto err_fd; } if (setup_vpp(r) < 0) { printf("vaapi: failed to initialize VPP pipeline\n"); goto err_va_dpy; } if (setup_encoder(r) < 0) { goto err_vpp; } return r; err_vpp: vpp_destroy(r); err_va_dpy: vaTerminate(r->va_dpy); err_fd: close(r->output_fd); err_thread: destroy_worker_thread(r); err_free: free(r); return NULL; } static VASurfaceID gInputRGBA; static VAStatus create_surface_from_fd(struct vaapi_recorder *r, int prime_fd, int stride, VASurfaceID *surface); struct vaapi_recorder * vaapi_recorder_create2(int drm_fd, int width, int height, const char *filename, int dmabuf_fd, int dmabuf_stride) { struct vaapi_recorder *r; VAStatus status; int major, minor; int flags; r = (vaapi_recorder*)calloc(sizeof *r,1); if (r == NULL) return NULL; r->width = width; r->height = height; r->drm_fd = drm_fd; flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC; r->output_fd = open(filename, flags, 0644); if (r->output_fd < 0) goto err_thread; r->va_dpy = vaGetDisplayDRM(drm_fd); if (!r->va_dpy) { printf("failed to create VA display\n"); goto err_fd; } status = vaInitialize(r->va_dpy, &major, &minor); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to initialize display\n"); goto err_fd; } if (setup_vpp(r) < 0) { printf("vaapi: failed to initialize VPP pipeline\n"); goto err_va_dpy; } if (setup_encoder(r) < 0) { goto err_vpp; } create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA); r->encoder.output_buf = VA_INVALID_ID; setup_output_thread(r); return r; err_vpp: vpp_destroy(r); err_va_dpy: vaTerminate(r->va_dpy); err_fd: close(r->output_fd); err_thread: destroy_worker_thread(r); err_free: free(r); return NULL; } #include struct vaapi_recorder * vaapi_recorder_create3(int drm_fd, int width, int height, const char *filename, int dmabuf_fd, int dmabuf_stride, int dmabuf_fd_uv, int dmabuf_stride_uv) { struct vaapi_recorder *r; VAStatus status; int major, minor; int flags; VADRMPRIMESurfaceDescriptor drmSurface = {0}; r = (vaapi_recorder*)calloc(sizeof *r,1); if (r == NULL) return NULL; r->width = width; r->height = height; r->drm_fd = drm_fd; flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC; r->output_fd = open(filename, flags, 0644); if (r->output_fd < 0) goto err_thread; r->va_dpy = vaGetDisplayDRM(drm_fd); if (!r->va_dpy) { printf("failed to create VA display\n"); goto err_fd; } status = vaInitialize(r->va_dpy, &major, &minor); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to initialize display\n"); goto err_fd; } if (setup_vpp(r) < 0) { printf("vaapi: failed to initialize VPP pipeline\n"); goto err_va_dpy; } if (setup_encoder(r) < 0) { goto err_vpp; } //create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA); VASurfaceAttrib va_attribs[4]; //VASurfaceAttribExternalBuffers va_attrib_extbuf; /* * * {fourcc = 842094158, width = 1920, height = 1080, num_objects = 1, objects = {{fd = 15, size = 3133440, drm_format_modifier = 72057594037927938}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}}, num_layers = 2, layers = {{drm_format = 538982482, num_planes = 1, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 943215175, num_planes = 1, object_index = {0, 0, 0, 0}, offset = { 2088960, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}}} * {fourcc = 842094158, width = 1920, height = 1080, num_objects = 1, objects = {{fd = 13, size = 3133440, drm_format_modifier = 72057594037927938}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}}, num_layers = 2, layers = {{drm_format = 538982482, num_planes = 1, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 943215175, num_planes = 1, object_index = {0, 0, 0, 0}, offset = { 2088960, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}}} */ drmSurface.fourcc = VA_FOURCC_NV12; drmSurface.width = width; drmSurface.height = height; drmSurface.num_objects = 1; drmSurface.objects[0].fd = dmabuf_fd; drmSurface.objects[0].drm_format_modifier = 72057594037927938;//0x20000002096bb03; drmSurface.objects[0].size = 3133440; //drmSurface.objects[1].fd = dmabuf_fd_uv; //drmSurface.objects[1].drm_format_modifier = 0;//0x20000002096bb03; //drmSurface.objects[1].size = 1920 * height/2; drmSurface.num_layers = 2; drmSurface.layers[0].drm_format = DRM_FORMAT_R8; drmSurface.layers[0].num_planes = 1; drmSurface.layers[0].object_index[0] = 0; drmSurface.layers[0].offset[0] = 0; drmSurface.layers[0].pitch[0] = 1920; drmSurface.layers[1].drm_format = DRM_FORMAT_GR88; drmSurface.layers[1].num_planes = 1; drmSurface.layers[1].object_index[0] = 0; drmSurface.layers[1].offset[0] = 2088960; drmSurface.layers[1].pitch[0] = 1920; /*unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv}; va_attrib_extbuf.pixel_format = VA_FOURCC_NV12; va_attrib_extbuf.width = r->width; va_attrib_extbuf.height = r->height; va_attrib_extbuf.data_size = r->height * stride; va_attrib_extbuf.num_planes = 1; va_attrib_extbuf.pitches[0] = stride; va_attrib_extbuf.offsets[0] = 0; va_attrib_extbuf.buffers = &buffer_fd; va_attrib_extbuf.num_buffers = 1; va_attrib_extbuf.flags = 0; va_attrib_extbuf.private_data = NULL;*/ va_attribs[0].type = VASurfaceAttribMemoryType; va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[0].value.type = VAGenericValueTypeInteger; va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2; va_attribs[1].type = VASurfaceAttribExternalBufferDescriptor; va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[1].value.type = VAGenericValueTypePointer; va_attribs[1].value.value.p = &drmSurface; va_attribs[2].type = VASurfaceAttribUsageHint; va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[2].value.type = VAGenericValueTypeInteger; va_attribs[2].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER; va_attribs[3].type = VASurfaceAttribPixelFormat; va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[3].value.type = VAGenericValueTypeInteger; va_attribs[3].value.value.i = VA_FOURCC_NV12; status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420, r->width, r->height, &r->vpp.output, 1, va_attribs, 2); printf("%d\n", status); if(status != VA_STATUS_SUCCESS) exit(1); r->encoder.output_buf = VA_INVALID_ID; setup_output_thread(r); return r; err_vpp: vpp_destroy(r); err_va_dpy: vaTerminate(r->va_dpy); err_fd: close(r->output_fd); err_thread: destroy_worker_thread(r); err_free: free(r); return NULL; } struct vaapi_recorder * vaapi_recorder_create4(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount) { struct vaapi_recorder *r; VAStatus status; int major, minor; int flags; VASurfaceAttribExternalBuffers va_attrib_extbuf = {0}; VADRMPRIMESurfaceDescriptor drmSurface = {0}; r = (vaapi_recorder*)calloc(sizeof *r,1); if (r == NULL) return NULL; r->width = width; r->height = height; r->drm_fd = drm_fd; flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC; r->output_fd = open(filename, flags, 0644); if (r->output_fd < 0) goto err_thread; r->va_dpy = vaGetDisplayDRM(drm_fd); if (!r->va_dpy) { printf("failed to create VA display\n"); goto err_fd; } status = vaInitialize(r->va_dpy, &major, &minor); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to initialize display\n"); goto err_fd; } if (setup_vpp(r) < 0) { printf("vaapi: failed to initialize VPP pipeline\n"); goto err_va_dpy; } if (setup_encoder(r) < 0) { goto err_vpp; } //create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA); VASurfaceAttrib va_attribs[5]; //unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv}; va_attrib_extbuf.pixel_format = VA_FOURCC_P010; va_attrib_extbuf.width = r->width; va_attrib_extbuf.height = r->height; //va_attrib_extbuf.data_size = r->height * stride; //va_attrib_extbuf.num_planes = 1; //va_attrib_extbuf.pitches[0] = stride; //va_attrib_extbuf.offsets[0] = 0; //va_attrib_extbuf.buffers = &buffer_fd; //va_attrib_extbuf.num_buffers = 1; va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING; va_attrib_extbuf.private_data = NULL; VADRMFormatModifierList modList; modList.modifiers = modifiers; modList.num_modifiers = modifierscount; va_attribs[0].type = VASurfaceAttribMemoryType; va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[0].value.type = VAGenericValueTypeInteger; va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA; va_attribs[1].type = VASurfaceAttribUsageHint; va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[1].value.type = VAGenericValueTypeInteger; va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER; va_attribs[2].type = VASurfaceAttribPixelFormat; va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[2].value.type = VAGenericValueTypeInteger; va_attribs[2].value.value.i = VA_FOURCC_P010; va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor; va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[3].value.type = VAGenericValueTypePointer; va_attribs[3].value.value.p = &va_attrib_extbuf; va_attribs[4].type = VASurfaceAttribDRMFormatModifiers; va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[4].value.type = VAGenericValueTypePointer; va_attribs[4].value.value.p = &modList; status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10, r->width, r->height, &r->vpp.output, 1, &va_attribs[0], 5); printf("%d\n", status); status = vaExportSurfaceHandle(r->va_dpy, r->vpp.output, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface ); printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier); *dmabuf_fd = drmSurface.objects[0].fd; *mod = drmSurface.objects[0].drm_format_modifier; *size = drmSurface.objects[0].size; *offset = drmSurface.layers[1].offset[0]; *pitch1 = drmSurface.layers[0].pitch[0]; *pitch2 = drmSurface.layers[1].pitch[0]; if(status != VA_STATUS_SUCCESS) exit(1); r->encoder.output_buf = VA_INVALID_ID; setup_output_thread(r); return r; err_vpp: vpp_destroy(r); err_va_dpy: vaTerminate(r->va_dpy); err_fd: close(r->output_fd); err_thread: destroy_worker_thread(r); err_free: free(r); return NULL; } struct vaapi_recorder * vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount) { struct vaapi_recorder *r; VAStatus status; int major, minor; int flags; VASurfaceAttribExternalBuffers va_attrib_extbuf = {0}; VADRMPRIMESurfaceDescriptor drmSurface = {0}; r = (vaapi_recorder*)calloc(sizeof *r,1); if (r == NULL) return NULL; r->width = width; r->height = height; r->drm_fd = drm_fd; flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC; r->output_fd = open(filename, flags, 0644); if (r->output_fd < 0) goto err_thread; r->va_dpy = vaGetDisplayDRM(drm_fd); if (!r->va_dpy) { printf("failed to create VA display\n"); goto err_fd; } status = vaInitialize(r->va_dpy, &major, &minor); if (status != VA_STATUS_SUCCESS) { printf("vaapi: failed to initialize display\n"); goto err_fd; } if (setup_vpp(r) < 0) { printf("vaapi: failed to initialize VPP pipeline\n"); goto err_va_dpy; } if (setup_encoder(r) < 0) { goto err_vpp; } //create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA); VASurfaceAttrib va_attribs[5]; //unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv}; va_attrib_extbuf.pixel_format = VA_FOURCC_P010; va_attrib_extbuf.width = r->width; va_attrib_extbuf.height = r->height; //va_attrib_extbuf.data_size = r->height * stride; //va_attrib_extbuf.num_planes = 1; //va_attrib_extbuf.pitches[0] = stride; //va_attrib_extbuf.offsets[0] = 0; //va_attrib_extbuf.buffers = &buffer_fd; //va_attrib_extbuf.num_buffers = 1; va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING; va_attrib_extbuf.private_data = NULL; VADRMFormatModifierList modList; modList.modifiers = modifiers; modList.num_modifiers = modifierscount; va_attribs[0].type = VASurfaceAttribMemoryType; va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[0].value.type = VAGenericValueTypeInteger; va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA; va_attribs[1].type = VASurfaceAttribUsageHint; va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[1].value.type = VAGenericValueTypeInteger; va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER; va_attribs[2].type = VASurfaceAttribPixelFormat; va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[2].value.type = VAGenericValueTypeInteger; va_attribs[2].value.value.i = VA_FOURCC_P010; va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor; va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[3].value.type = VAGenericValueTypePointer; va_attribs[3].value.value.p = &va_attrib_extbuf; va_attribs[4].type = VASurfaceAttribDRMFormatModifiers; va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[4].value.type = VAGenericValueTypePointer; va_attribs[4].value.value.p = &modList; status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10, r->width, r->height, r->inputFrames, CHAIN_SIZE, &va_attribs[0], 5); printf("%d\n", status); for(int i = 0; i < CHAIN_SIZE; i++) { status = vaExportSurfaceHandle(r->va_dpy, r->inputFrames[i], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface ); printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier); dmabuf_fd[i] = drmSurface.objects[0].fd; if(status != VA_STATUS_SUCCESS) exit(1); } *mod = drmSurface.objects[0].drm_format_modifier; *size = drmSurface.objects[0].size; *offset = drmSurface.layers[1].offset[0]; *pitch1 = drmSurface.layers[0].pitch[0]; *pitch2 = drmSurface.layers[1].pitch[0]; r->encoder.output_buf = VA_INVALID_ID; setup_output_thread(r); return r; err_vpp: vpp_destroy(r); err_va_dpy: vaTerminate(r->va_dpy); err_fd: close(r->output_fd); err_thread: destroy_worker_thread(r); err_free: free(r); return NULL; } void vaapi_recorder_destroy(struct vaapi_recorder *r) { destroy_worker_thread(r); encoder_destroy(r); vpp_destroy(r); vaTerminate(r->va_dpy); close(r->output_fd); close(r->drm_fd); free(r); } static VAStatus create_surface_from_fd(struct vaapi_recorder *r, int prime_fd, int stride, VASurfaceID *surface) { VASurfaceAttrib va_attribs[2]; VASurfaceAttribExternalBuffers va_attrib_extbuf; VAStatus status; unsigned long buffer_fd = prime_fd; va_attrib_extbuf.pixel_format = VA_FOURCC_BGRX; va_attrib_extbuf.width = r->width; va_attrib_extbuf.height = r->height; va_attrib_extbuf.data_size = r->height * stride; va_attrib_extbuf.num_planes = 1; va_attrib_extbuf.pitches[0] = stride; va_attrib_extbuf.offsets[0] = 0; va_attrib_extbuf.buffers = &buffer_fd; va_attrib_extbuf.num_buffers = 1; va_attrib_extbuf.flags = 0; va_attrib_extbuf.private_data = NULL; va_attribs[0].type = VASurfaceAttribMemoryType; va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[0].value.type = VAGenericValueTypeInteger; va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; va_attribs[1].type = VASurfaceAttribExternalBufferDescriptor; va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE; va_attribs[1].value.type = VAGenericValueTypePointer; va_attribs[1].value.value.p = &va_attrib_extbuf; status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_RGB32, r->width, r->height, surface, 1, va_attribs, 2); return status; } static VAStatus convert_rgb_to_yuv(struct vaapi_recorder *r, VASurfaceID rgb_surface) { VAProcPipelineParameterBuffer *pipeline_param; VAStatus status; status = vaMapBuffer(r->va_dpy, r->vpp.pipeline_buf, (void **) &pipeline_param); if (status != VA_STATUS_SUCCESS) return status; memset(pipeline_param, 0, sizeof *pipeline_param); pipeline_param->surface = rgb_surface; pipeline_param->surface_color_standard = VAProcColorStandardNone; pipeline_param->output_background_color = 0xff000000; pipeline_param->output_color_standard = VAProcColorStandardNone; status = vaUnmapBuffer(r->va_dpy, r->vpp.pipeline_buf); if (status != VA_STATUS_SUCCESS) return status; status = vaBeginPicture(r->va_dpy, r->vpp.ctx, r->vpp.output); if (status != VA_STATUS_SUCCESS) return status; status = vaRenderPicture(r->va_dpy, r->vpp.ctx, &r->vpp.pipeline_buf, 1); if (status != VA_STATUS_SUCCESS) return status; status = vaEndPicture(r->va_dpy, r->vpp.ctx); if (status != VA_STATUS_SUCCESS) return status; return status; } static void recorder_frame(struct vaapi_recorder *r) { VASurfaceID rgb_surface; VAStatus status; status = create_surface_from_fd(r, r->input.prime_fd, r->input.stride, &rgb_surface); if (status != VA_STATUS_SUCCESS) { printf("[libva recorder] " "failed to create surface from bo\n"); return; } close(r->input.prime_fd); status = convert_rgb_to_yuv(r, rgb_surface); if (status != VA_STATUS_SUCCESS) { printf("[libva recorder] " "color space conversion failed\n"); return; } encoder_encode(r, r->vpp.output); vaDestroySurfaces(r->va_dpy, &rgb_surface, 1); } void recorder_frame2(struct vaapi_recorder *r) { VAStatus status; status = convert_rgb_to_yuv(r, gInputRGBA); if (status != VA_STATUS_SUCCESS) { printf("[libva recorder] " "color space conversion failed\n"); return; } encoder_encode(r, r->vpp.output); } void recorder_frame3(struct vaapi_recorder *r) { encoder_encode(r, r->vpp.output); } void recorder_frame4(struct vaapi_recorder *r, int idx) { encoder_encode(r, r->inputFrames[idx]); } static void * worker_thread_function(void *data) { struct vaapi_recorder *r = (vaapi_recorder*)data; pthread_mutex_lock(&r->mutex); while (!r->destroying) { if (!r->input.valid) pthread_cond_wait(&r->input_cond, &r->mutex); /* If the thread is awaken by destroy_worker_thread(), * there might not be valid input */ if (!r->input.valid) continue; recorder_frame(r); r->input.valid = 0; } pthread_mutex_unlock(&r->mutex); return NULL; } static void * output_thread_function(void *data) { struct vaapi_recorder *r = (vaapi_recorder*)data; pthread_mutex_lock(&r->encoder.mutex); while (!r->destroying) { if (r->encoder.output_buf == VA_INVALID_ID) pthread_cond_wait(&r->encoder.output_cond, &r->encoder.mutex); /* If the thread is awaken by destroy_worker_thread(), * there might not be valid input */ if (r->encoder.output_buf == VA_INVALID_ID) continue; //output_frame(r); //vaSyncSurface(r->va_dpy, r->encoder.output_sync_surf); vaSyncBuffer(r->va_dpy, r->encoder.output_buf, UINT64_MAX); encoder_write_output(r, r->encoder.output_buf); vaDestroyBuffer(r->va_dpy, r->encoder.output_buf); r->encoder.output_buf = VA_INVALID_ID; } pthread_mutex_unlock(&r->encoder.mutex); return NULL; } static int push_output_buffer(struct vaapi_recorder *r, VABufferID buf, VASurfaceID surf) { int ret = 0; pthread_mutex_lock(&r->encoder.mutex); if (r->error) { errno = r->error; ret = -1; goto unlock; } /* The mutex is never released while encoding, so this point should * never be reached if input.valid is true. */ assert(r->encoder.output_buf == VA_INVALID_ID); r->encoder.output_buf = buf; r->encoder.output_sync_surf = surf; pthread_cond_signal(&r->encoder.output_cond); unlock: pthread_mutex_unlock(&r->encoder.mutex); return ret; } int vaapi_recorder_frame(struct vaapi_recorder *r, int prime_fd, int stride) { int ret = 0; pthread_mutex_lock(&r->mutex); if (r->error) { errno = r->error; ret = -1; goto unlock; } /* The mutex is never released while encoding, so this point should * never be reached if input.valid is true. */ assert(!r->input.valid); r->input.prime_fd = prime_fd; r->input.stride = stride; r->input.valid = 1; pthread_cond_signal(&r->input_cond); unlock: pthread_mutex_unlock(&r->mutex); return ret; }