vulkan-playground/vaapi-recorder-hevc.cpp

2724 lines
92 KiB
C++

/*
* Copyright (c) 2012 Intel Corporation. All Rights Reserved.
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
//#include "va_enc_hevc.h"
#include <va/va.h>
#include <va/va_enc_hevc.h>
#include <va/va_drm.h>
#include <va/va_drmcommon.h>
#include <va/va_vpp.h>
#define NAL_REF_IDC_NONE 0
#define NAL_REF_IDC_LOW 1
#define NAL_REF_IDC_MEDIUM 2
#define NAL_REF_IDC_HIGH 3
// SLICE TYPE HEVC ENUM
enum {
SLICE_B = 0,
SLICE_P = 1,
SLICE_I = 2,
};
#define IS_I_SLICE(type) (SLICE_I == (type))
#define IS_P_SLICE(type) (SLICE_P == (type))
#define IS_B_SLICE(type) (SLICE_B == (type))
#define ENTROPY_MODE_CAVLC 0
#define ENTROPY_MODE_CABAC 1
#define PROFILE_IDC_MAIN 1
#define PROFILE_IDC_MAIN10 2
enum NALUType {
NALU_TRAIL_N = 0x00, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
NALU_TRAIL_R = 0x01, // Coded slice segment of a non-TSA, non-STSA trailing picture - slice_segment_layer_rbsp, VLC
NALU_TSA_N = 0x02, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
NALU_TSA_R = 0x03, // Coded slice segment of a TSA picture - slice_segment_layer_rbsp, VLC
NALU_STSA_N = 0x04, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
NALU_STSA_R = 0x05, // Coded slice of an STSA picture - slice_layer_rbsp, VLC
NALU_RADL_N = 0x06, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
NALU_RADL_R = 0x07, // Coded slice of an RADL picture - slice_layer_rbsp, VLC
NALU_RASL_N = 0x08, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
NALU_RASL_R = 0x09, // Coded slice of an RASL picture - slice_layer_rbsp, VLC
/* 0x0a..0x0f - Reserved */
NALU_BLA_W_LP = 0x10, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_BLA_W_DLP = 0x11, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_BLA_N_LP = 0x12, // Coded slice segment of an BLA picture - slice_segment_layer_rbsp, VLC
NALU_IDR_W_DLP = 0x13, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
NALU_IDR_N_LP = 0x14, // Coded slice segment of an IDR picture - slice_segment_layer_rbsp, VLC
NALU_CRA = 0x15, // Coded slice segment of an CRA picture - slice_segment_layer_rbsp, VLC
/* 0x16..0x1f - Reserved */
NALU_VPS = 0x20, // Video parameter set - video_parameter_set_rbsp, non-VLC
NALU_SPS = 0x21, // Sequence parameter set - seq_parameter_set_rbsp, non-VLC
NALU_PPS = 0x22, // Picture parameter set - pic_parameter_set_rbsp, non-VLC
NALU_AUD = 0x23, // Access unit delimiter - access_unit_delimiter_rbsp, non-VLC
NALU_EOS = 0x24, // End of sequence - end_of_seq_rbsp, non-VLC
NALU_EOB = 0x25, // End of bitsteam - end_of_bitsteam_rbsp, non-VLC
NALU_FD = 0x26, // Filler data - filler_data_rbsp, non-VLC
NALU_PREFIX_SEI = 0x27, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
NALU_SUFFIX_SEI = 0x28, // Supplemental enhancement information (SEI) - sei_rbsp, non_VLC
/* 0x29..0x2f - Reserved */
/* 0x30..0x3f - Unspecified */
//this should be the last element of this enum
//chagne this value if NAL unit type increased
MAX_HEVC_NAL_TYPE = 0x3f,
};
#define ALIGN16(x) ((x+15)&~15)
#define CHAIN_SIZE 4
struct vaapi_recorder {
int drm_fd, output_fd;
int width, height;
int frame_count;
int error;
int destroying;
pthread_t worker_thread;
pthread_mutex_t mutex;
pthread_cond_t input_cond;
struct {
int valid;
int prime_fd, stride;
} input;
VADisplay va_dpy;
/* video post processing is used for colorspace conversion */
struct {
VAConfigID cfg;
VAContextID ctx;
VABufferID pipeline_buf;
VASurfaceID output;
} vpp;
struct {
VAConfigID cfg;
VAContextID ctx;
VASurfaceID reference_picture[3];
int intra_period;
int output_size;
struct {
VAEncSequenceParameterBufferHEVC seq;
VAEncPictureParameterBufferHEVC pic;
VAEncSliceParameterBufferHEVC slice;
} param;
VAConfigAttrib attrib[VAConfigAttribTypeMax];
pthread_t output_thread;
pthread_mutex_t mutex;
pthread_cond_t output_cond;
VABufferID output_buf;
VASurfaceID output_sync_surf;
} encoder;
VASurfaceID inputFrames[CHAIN_SIZE];
};
/*
seq
{general_profile_idc = 1 '\001', general_level_idc = 150 '\226', general_tier_flag = 0 '\000', intra_period = 32767, intra_idr_period = 32767,
ip_period = 1, bits_per_second = 0, pic_width_in_luma_samples = 3200, pic_height_in_luma_samples = 2200, seq_fields = {bits = {
chroma_format_idc = 1, separate_colour_plane_flag = 0, bit_depth_luma_minus8 = 0, bit_depth_chroma_minus8 = 0, scaling_list_enabled_flag = 0,
strong_intra_smoothing_enabled_flag = 0, amp_enabled_flag = 1, sample_adaptive_offset_enabled_flag = 1, pcm_enabled_flag = 0,
pcm_loop_filter_disabled_flag = 0, sps_temporal_mvp_enabled_flag = 0, low_delay_seq = 0, hierachical_flag = 0, reserved_bits = 0},
value = 6145}, log2_min_luma_coding_block_size_minus3 = 0 '\000', log2_diff_max_min_luma_coding_block_size = 3 '\003',
log2_min_transform_block_size_minus2 = 0 '\000', log2_diff_max_min_transform_block_size = 3 '\003', max_transform_hierarchy_depth_inter = 0 '\000',
max_transform_hierarchy_depth_intra = 0 '\000', pcm_sample_bit_depth_luma_minus1 = 0, pcm_sample_bit_depth_chroma_minus1 = 0,
log2_min_pcm_luma_coding_block_size_minus3 = 0, log2_max_pcm_luma_coding_block_size_minus3 = 0, vui_parameters_present_flag = 0 '\000',
vui_fields = {bits = {aspect_ratio_info_present_flag = 0, neutral_chroma_indication_flag = 0, field_seq_flag = 0, vui_timing_info_present_flag = 0,
bitstream_restriction_flag = 0, tiles_fixed_structure_flag = 0, motion_vectors_over_pic_boundaries_flag = 0, restricted_ref_pic_lists_flag = 0,
log2_max_mv_length_horizontal = 0, log2_max_mv_length_vertical = 0}, value = 0}, aspect_ratio_idc = 0 '\000', sar_width = 0, sar_height = 0,
vui_num_units_in_tick = 0, vui_time_scale = 0, min_spatial_segmentation_idc = 0, max_bytes_per_pic_denom = 0 '\000',
max_bits_per_min_cu_denom = 0 '\000', scc_fields = {bits = {palette_mode_enabled_flag = 0, reserved = 0}, value = 0}, va_reserved = {0, 0, 0, 0, 0,
0, 0}}
pic
{decoded_curr_pic = {picture_id = 5, pic_order_cnt = 0, flags = 0, va_reserved = {0, 0, 0, 0}}, reference_frames = {{picture_id = 4294967295,
pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} <repeats 15 times>}, coded_buf = 9, collocated_ref_pic_index = 255 '\377',
last_picture = 0 '\000', pic_init_qp = 25 '\031', diff_cu_qp_delta_depth = 0 '\000', pps_cb_qp_offset = 0 '\000', pps_cr_qp_offset = 0 '\000',
num_tile_columns_minus1 = 0 '\000', num_tile_rows_minus1 = 0 '\000', column_width_minus1 = '\000' <repeats 18 times>,
row_height_minus1 = '\000' <repeats 20 times>, log2_parallel_merge_level_minus2 = 0 '\000', ctu_max_bitsize_allowed = 0 '\000',
num_ref_idx_l0_default_active_minus1 = 0 '\000', num_ref_idx_l1_default_active_minus1 = 0 '\000', slice_pic_parameter_set_id = 0 '\000',
nal_unit_type = 19 '\023', pic_fields = {bits = {idr_pic_flag = 1, coding_type = 1, reference_pic_flag = 1,
dependent_slice_segments_enabled_flag = 0, sign_data_hiding_enabled_flag = 0, constrained_intra_pred_flag = 0, transform_skip_enabled_flag = 0,
cu_qp_delta_enabled_flag = 0, weighted_pred_flag = 0, weighted_bipred_flag = 0, transquant_bypass_enabled_flag = 0, tiles_enabled_flag = 0,
entropy_coding_sync_enabled_flag = 0, loop_filter_across_tiles_enabled_flag = 0, pps_loop_filter_across_slices_enabled_flag = 1,
scaling_list_data_present_flag = 0, screen_content_flag = 0, enable_gpu_weighted_prediction = 0, no_output_of_prior_pics_flag = 0,
reserved = 0}, value = 65555}, hierarchical_level_plus1 = 0 '\000', va_byte_reserved = 0 '\000', scc_fields = {bits = {
pps_curr_pic_ref_enabled_flag = 0, reserved = 0}, value = 0}, va_reserved = {0 <repeats 15 times>}}
slice
{slice_segment_address = 0, num_ctu_in_slice = 1750, slice_type = 2 '\002', slice_pic_parameter_set_id = 0 '\000',
num_ref_idx_l0_active_minus1 = 0 '\000', num_ref_idx_l1_active_minus1 = 0 '\000', ref_pic_list0 = {{picture_id = 4294967295, pic_order_cnt = 0,
flags = 1, va_reserved = {0, 0, 0, 0}} <repeats 15 times>}, ref_pic_list1 = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1,
va_reserved = {0, 0, 0, 0}} <repeats 15 times>}, luma_log2_weight_denom = 0 '\000', delta_chroma_log2_weight_denom = 0 '\000',
delta_luma_weight_l0 = '\000' <repeats 14 times>, luma_offset_l0 = '\000' <repeats 14 times>, delta_chroma_weight_l0 = {"\000" <repeats 15 times>},
chroma_offset_l0 = {"\000" <repeats 15 times>}, delta_luma_weight_l1 = '\000' <repeats 14 times>, luma_offset_l1 = '\000' <repeats 14 times>,
delta_chroma_weight_l1 = {"\000" <repeats 15 times>}, chroma_offset_l1 = {"\000" <repeats 15 times>}, max_num_merge_cand = 5 '\005',
slice_qp_delta = 0 '\000', slice_cb_qp_offset = 0 '\000', slice_cr_qp_offset = 0 '\000', slice_beta_offset_div2 = 0 '\000',
slice_tc_offset_div2 = 0 '\000', slice_fields = {bits = {last_slice_of_pic_flag = 1, dependent_slice_segment_flag = 0, colour_plane_id = 0,
slice_temporal_mvp_enabled_flag = 0, slice_sao_luma_flag = 1, slice_sao_chroma_flag = 1, num_ref_idx_active_override_flag = 0,
mvd_l1_zero_flag = 0, cabac_init_flag = 0, slice_deblocking_filter_disabled_flag = 0, slice_loop_filter_across_slices_enabled_flag = 0,
collocated_from_l0_flag = 0}, value = 97}, pred_weight_table_bit_offset = 0, pred_weight_table_bit_length = 0, va_reserved = {0, 0, 0, 0, 0, 0}}
pic
{decoded_curr_pic = {picture_id = 1, pic_order_cnt = 1, flags = 0, va_reserved = {0, 0, 0, 0}}, reference_frames = {{picture_id = 5,
pic_order_cnt = 0, flags = 16, va_reserved = {0, 0, 0, 0}}, {picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0,
0}} <repeats 14 times>}, coded_buf = 3, collocated_ref_pic_index = 255 '\377', last_picture = 0 '\000', pic_init_qp = 25 '\031',
diff_cu_qp_delta_depth = 0 '\000', pps_cb_qp_offset = 0 '\000', pps_cr_qp_offset = 0 '\000', num_tile_columns_minus1 = 0 '\000',
num_tile_rows_minus1 = 0 '\000', column_width_minus1 = '\000' <repeats 18 times>, row_height_minus1 = '\000' <repeats 20 times>,
log2_parallel_merge_level_minus2 = 0 '\000', ctu_max_bitsize_allowed = 0 '\000', num_ref_idx_l0_default_active_minus1 = 0 '\000',
num_ref_idx_l1_default_active_minus1 = 0 '\000', slice_pic_parameter_set_id = 0 '\000', nal_unit_type = 1 '\001', pic_fields = {bits = {
idr_pic_flag = 0, coding_type = 2, reference_pic_flag = 1, dependent_slice_segments_enabled_flag = 0, sign_data_hiding_enabled_flag = 0,
constrained_intra_pred_flag = 0, transform_skip_enabled_flag = 0, cu_qp_delta_enabled_flag = 0, weighted_pred_flag = 0,
weighted_bipred_flag = 0, transquant_bypass_enabled_flag = 0, tiles_enabled_flag = 0, entropy_coding_sync_enabled_flag = 0,
loop_filter_across_tiles_enabled_flag = 0, pps_loop_filter_across_slices_enabled_flag = 1, scaling_list_data_present_flag = 0,
screen_content_flag = 0, enable_gpu_weighted_prediction = 0, no_output_of_prior_pics_flag = 0, reserved = 0}, value = 65556},
hierarchical_level_plus1 = 0 '\000', va_byte_reserved = 0 '\000', scc_fields = {bits = {pps_curr_pic_ref_enabled_flag = 0, reserved = 0},
value = 0}, va_reserved = {0 <repeats 15 times>}}
slice
{slice_segment_address = 0, num_ctu_in_slice = 1750, slice_type = 1 '\001', slice_pic_parameter_set_id = 0 '\000',
num_ref_idx_l0_active_minus1 = 0 '\000', num_ref_idx_l1_active_minus1 = 0 '\000', ref_pic_list0 = {{picture_id = 5, pic_order_cnt = 0, flags = 16,
va_reserved = {0, 0, 0, 0}}, {picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} <repeats 14 times>},
ref_pic_list1 = {{picture_id = 4294967295, pic_order_cnt = 0, flags = 1, va_reserved = {0, 0, 0, 0}} <repeats 15 times>},
luma_log2_weight_denom = 0 '\000', delta_chroma_log2_weight_denom = 0 '\000', delta_luma_weight_l0 = '\000' <repeats 14 times>,
luma_offset_l0 = '\000' <repeats 14 times>, delta_chroma_weight_l0 = {"\000" <repeats 15 times>}, chroma_offset_l0 = {"\000" <repeats 15 times>},
delta_luma_weight_l1 = '\000' <repeats 14 times>, luma_offset_l1 = '\000' <repeats 14 times>, delta_chroma_weight_l1 = {"\000" <repeats 15 times>},
chroma_offset_l1 = {"\000" <repeats 15 times>}, max_num_merge_cand = 5 '\005', slice_qp_delta = 0 '\000', slice_cb_qp_offset = 0 '\000',
slice_cr_qp_offset = 0 '\000', slice_beta_offset_div2 = 0 '\000', slice_tc_offset_div2 = 0 '\000', slice_fields = {bits = {
last_slice_of_pic_flag = 1, dependent_slice_segment_flag = 0, colour_plane_id = 0, slice_temporal_mvp_enabled_flag = 0,
slice_sao_luma_flag = 1, slice_sao_chroma_flag = 1, num_ref_idx_active_override_flag = 0, mvd_l1_zero_flag = 0, cabac_init_flag = 0,
slice_deblocking_filter_disabled_flag = 0, slice_loop_filter_across_slices_enabled_flag = 0, collocated_from_l0_flag = 1}, value = 8289},
pred_weight_table_bit_offset = 0, pred_weight_table_bit_length = 0, va_reserved = {0, 0, 0, 0, 0, 0}}
VPS
vps_video_parameter_set_id:0
vps_base_layer_internal_flag:1
vps_base_layer_available_flag:1
vps_max_layers_minus1:0
vps_max_sub_layers_minus1:0
vps_temporal_id_nesting_flag:1
profile_tier_level( 1, vps_max_sub_layers_minus1 ):
general_profile_space:0
general_tier_flag:0
general_profile_idc:1
general_profile_compatibility_flag: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
general_progressive_source_flag:1
general_interlaced_source_flag:0
general_non_packed_constraint_flag:1
general_frame_only_constraint_flag:1
general_level_idc:150
vps_sub_layer_ordering_info_present_flag:0
vps_max_layer_id:0
vps_num_layer_sets_minus1:0
vps_timing_info_present_flag:0
SPS:
sps_video_parameter_set_id:0
sps_max_sub_layers_minus1:0
sps_temporal_id_nesting_flag:1
profile_tier_level( 1, vps_max_sub_layers_minus1 ):
general_profile_space:0
general_tier_flag:0
general_profile_idc:1
general_profile_compatibility_flag: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
general_progressive_source_flag:1
general_interlaced_source_flag:0
general_non_packed_constraint_flag:1
general_frame_only_constraint_flag:1
general_level_idc:150
sps_seq_parameter_set_id:0
chroma_format_idc:1
pic_width_in_luma_samples:3200
pic_height_in_luma_samples:2208
conformance_window_flag:1
!! Found NAL at offset 75 (0x004B), size 7 (0x0007)
forbidden_zero_bit: 0
nal->nal_unit_type: 34
nal->nuh_layer_id: 0
nal->nuh_temporal_id_plus1: 1
PPS:
pps_pic_parameter_set_id:0
pps_seq_parameter_set_id:0
dependent_slice_segments_enabled_flag:1
output_flag_present_flag:0
num_extra_slice_header_bits:0
sign_data_hiding_enabled_flag:0
cabac_init_present_flag:1
num_ref_idx_l0_default_active_minus1:0
num_ref_idx_l1_default_active_minus1:0
init_qp_minus26:0
constrained_intra_pred_flag:0
transform_skip_enabled_flag:1
cu_qp_delta_enabled_flag:0
pps_cb_qp_offset:0
pps_cr_qp_offset:0
pps_slice_chroma_qp_offsets_present_flag:0
weighted_pred_flag:0
weighted_bipred_flag:0
transquant_bypass_enabled_flag:0
tiles_enabled_flag:0
entropy_coding_sync_enabled_flag:0
*/
static void *
worker_thread_function(void *);
/* bitstream code used for writing the packed headers */
#define BITSTREAM_ALLOCATE_STEPPING 4096
struct bitstream {
unsigned int *buffer;
int bit_offset;
int max_size_in_dword;
};
static unsigned int
va_swap32(unsigned int val)
{
unsigned char *pval = (unsigned char *)&val;
return ((pval[0] << 24) |
(pval[1] << 16) |
(pval[2] << 8) |
(pval[3] << 0));
}
static void
bitstream_start(struct bitstream *bs)
{
bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING;
bs->buffer = (unsigned int*)calloc(bs->max_size_in_dword * sizeof(unsigned int), 1);
bs->bit_offset = 0;
}
static void
bitstream_end(struct bitstream *bs)
{
int pos = (bs->bit_offset >> 5);
int bit_offset = (bs->bit_offset & 0x1f);
int bit_left = 32 - bit_offset;
if (bit_offset) {
bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left));
}
}
static void
bitstream_put_ui(struct bitstream *bs, unsigned int val, int size_in_bits)
{
int pos = (bs->bit_offset >> 5);
int bit_offset = (bs->bit_offset & 0x1f);
int bit_left = 32 - bit_offset;
if (!size_in_bits)
return;
bs->bit_offset += size_in_bits;
if (bit_left > size_in_bits) {
bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
} else {
size_in_bits -= bit_left;
bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
bs->buffer[pos] = va_swap32(bs->buffer[pos]);
if (pos + 1 == bs->max_size_in_dword) {
bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING;
bs->buffer = (unsigned int*)realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int));
assert(bs->buffer);
}
bs->buffer[pos + 1] = val;
}
}
static void
bitstream_put_ue(struct bitstream *bs, unsigned int val)
{
int size_in_bits = 0;
int tmp_val = ++val;
while (tmp_val) {
tmp_val >>= 1;
size_in_bits++;
}
bitstream_put_ui(bs, 0, size_in_bits - 1); /* leading zero */
bitstream_put_ui(bs, val, size_in_bits);
}
static void
bitstream_put_se(struct bitstream *bs, int val)
{
unsigned int new_val;
if (val <= 0)
new_val = -2 * val;
else
new_val = 2 * val - 1;
bitstream_put_ue(bs, new_val);
}
static void
bitstream_byte_aligning(struct bitstream *bs, int bit)
{
int bit_offset = (bs->bit_offset & 0x7);
int bit_left = 8 - bit_offset;
int new_val;
if (!bit_offset)
return;
if (bit)
new_val = (1 << bit_left) - 1;
else
new_val = 0;
bitstream_put_ui(bs, new_val, bit_left);
}
#define USE_P010 1
#define PRINT_FLAG(f,name) if(f & name)printf(" %s", #name )
#define PRINT_INT_ATTR(name) if(r->encoder.attrib[name].value != VA_ATTRIB_NOT_SUPPORTED)printf("Supported %s: %d\n", #name, (int)r->encoder.attrib[name].value)
#define PRINT_BIT_FIELD(name) if(val.bits.name)printf(" %s=%d", #name, val.bits.name)
static VAStatus
encoder_create_config(struct vaapi_recorder *r)
{
VAConfigAttrib attrib[2];
VAStatus status;
VAProfile profile = USE_P010?VAProfileHEVCMain10:VAProfileHEVCMain;
for (int i = 0; i < VAConfigAttribTypeMax; i++)
r->encoder.attrib[i].type = (VAConfigAttribType)i;
vaGetConfigAttributes(r->va_dpy, profile, VAEntrypointEncSlice, r->encoder.attrib, VAConfigAttribTypeMax);
uint32_t flags= r->encoder.attrib[VAConfigAttribRTFormat].value;
printf("Supported RT formats: %d", flags);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV420);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV422);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV444);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV411);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV400);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV420_10);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV422_10);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV444_10);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV420_12);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV422_12);
PRINT_FLAG(flags, VA_RT_FORMAT_YUV444_12);
PRINT_FLAG(flags, VA_RT_FORMAT_RGB16);
PRINT_FLAG(flags, VA_RT_FORMAT_RGB32);
PRINT_FLAG(flags, VA_RT_FORMAT_RGBP);
PRINT_FLAG(flags, VA_RT_FORMAT_RGB32_10);
PRINT_FLAG(flags, VA_RT_FORMAT_PROTECTED);
printf("\n");
flags= r->encoder.attrib[VAConfigAttribRateControl].value;
printf("Supported rate control: %d", flags);
PRINT_FLAG(flags, VA_RC_NONE);
PRINT_FLAG(flags, VA_RC_CBR);
PRINT_FLAG(flags, VA_RC_VBR);
PRINT_FLAG(flags, VA_RC_VCM);
PRINT_FLAG(flags, VA_RC_CQP);
PRINT_FLAG(flags, VA_RC_VBR_CONSTRAINED);
PRINT_FLAG(flags, VA_RC_ICQ);
PRINT_FLAG(flags, VA_RC_MB);
PRINT_FLAG(flags, VA_RC_CFS);
PRINT_FLAG(flags, VA_RC_PARALLEL);
PRINT_FLAG(flags, VA_RC_QVBR);
PRINT_FLAG(flags, VA_RC_AVBR);
PRINT_FLAG(flags, VA_RC_TCBRC);
printf("\n");
flags = r->encoder.attrib[VAConfigAttribEncPackedHeaders].value;
printf("Supported packed headers: %d", flags);
PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_SEQUENCE);
PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_PICTURE);
PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_SLICE);
PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_MISC);
PRINT_FLAG(flags, VA_ENC_PACKED_HEADER_RAW_DATA);
printf("\n");
PRINT_INT_ATTR(VAConfigAttribEncMaxSlices);
flags = r->encoder.attrib[VAConfigAttribEncSliceStructure].value;
printf("Supported slice structure: %d", flags);
PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS);
PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS);
PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS);
PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_MAX_SLICE_SIZE);
PRINT_FLAG(flags, VA_ENC_SLICE_STRUCTURE_EQUAL_MULTI_ROWS);
printf("\n");
PRINT_INT_ATTR(VAConfigAttribEncMacroblockInfo);
PRINT_INT_ATTR(VAConfigAttribMaxPictureWidth);
PRINT_INT_ATTR(VAConfigAttribMaxPictureHeight);
PRINT_INT_ATTR(VAConfigAttribEncSkipFrame);
//PRINT_INT_ATTR(VAConfigAttribEncROI);
if(r->encoder.attrib[VAConfigAttribEncROI].value != VA_ATTRIB_NOT_SUPPORTED)
{
VAConfigAttribValEncROI val;
val.value = r->encoder.attrib[VAConfigAttribEncROI].value;
printf("Supported ROI attrs: %d", val.value);
PRINT_BIT_FIELD(num_roi_regions);
PRINT_BIT_FIELD(roi_rc_priority_support);
PRINT_BIT_FIELD(roi_rc_qp_delta_support);
printf("\n");
}
PRINT_INT_ATTR(VAConfigAttribEncRateControlExt);
PRINT_INT_ATTR(VAConfigAttribContextPriority);
PRINT_INT_ATTR(VAConfigAttribEncDirtyRect);
PRINT_INT_ATTR(VAConfigAttribEncParallelRateControl);
PRINT_INT_ATTR(VAConfigAttribEncDynamicScaling);
PRINT_INT_ATTR(VAConfigAttribFrameSizeToleranceSupport);
PRINT_INT_ATTR(VAConfigAttribStats);
PRINT_INT_ATTR(VAConfigAttribEncTileSupport);
PRINT_INT_ATTR(VAConfigAttribCustomRoundingControl);
PRINT_INT_ATTR(VAConfigAttribQPBlockSize);
PRINT_INT_ATTR(VAConfigAttribEncHEVCFeatures);
PRINT_INT_ATTR(VAConfigAttribEncHEVCBlockSizes);
if(r->encoder.attrib[VAConfigAttribEncHEVCFeatures].value != VA_ATTRIB_NOT_SUPPORTED)
{
VAConfigAttribValEncHEVCFeatures val;
val.value = r->encoder.attrib[VAConfigAttribEncHEVCFeatures].value;
printf("Supported HEVC features: %d", val.value);
PRINT_BIT_FIELD(separate_colour_planes);
PRINT_BIT_FIELD(scaling_lists);
PRINT_BIT_FIELD(amp);
PRINT_BIT_FIELD(sao);
PRINT_BIT_FIELD(pcm);
PRINT_BIT_FIELD(temporal_mvp);
PRINT_BIT_FIELD(strong_intra_smoothing);
PRINT_BIT_FIELD(dependent_slices);
PRINT_BIT_FIELD(sign_data_hiding);
PRINT_BIT_FIELD(constrained_intra_pred);
PRINT_BIT_FIELD(transform_skip);
PRINT_BIT_FIELD(cu_qp_delta);
PRINT_BIT_FIELD(weighted_prediction);
PRINT_BIT_FIELD(transquant_bypass);
PRINT_BIT_FIELD(deblocking_filter_disable);
printf("\n");
}
if(r->encoder.attrib[VAConfigAttribEncHEVCBlockSizes].value != VA_ATTRIB_NOT_SUPPORTED)
{
VAConfigAttribValEncHEVCBlockSizes val;
val.value = r->encoder.attrib[VAConfigAttribEncHEVCBlockSizes].value;
printf("Supported HEVC block sizes: %d", val.value);
PRINT_BIT_FIELD(log2_max_coding_tree_block_size_minus3);
PRINT_BIT_FIELD(log2_min_coding_tree_block_size_minus3);
PRINT_BIT_FIELD(log2_min_luma_coding_block_size_minus3);
PRINT_BIT_FIELD(log2_max_luma_transform_block_size_minus2);
PRINT_BIT_FIELD(log2_min_luma_transform_block_size_minus2);
PRINT_BIT_FIELD(max_max_transform_hierarchy_depth_inter);
PRINT_BIT_FIELD(min_max_transform_hierarchy_depth_inter);
PRINT_BIT_FIELD(max_max_transform_hierarchy_depth_intra);
PRINT_BIT_FIELD(min_max_transform_hierarchy_depth_intra);
PRINT_BIT_FIELD(log2_max_pcm_coding_block_size_minus3);
PRINT_BIT_FIELD(log2_min_pcm_coding_block_size_minus3);
printf("\n");
}
flags = r->encoder.attrib[VAConfigAttribEncQuantization].value;
printf("Supported enc quantization: %d", flags);
PRINT_FLAG(flags, VA_ENC_QUANTIZATION_NONE);
PRINT_FLAG(flags, VA_ENC_QUANTIZATION_TRELLIS_SUPPORTED);
printf("\n");
flags = r->encoder.attrib[VAConfigAttribEncIntraRefresh].value;
printf("Supported intra refresh: %d", flags);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ROLLING_COLUMN);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ROLLING_ROW);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_ADAPTIVE);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_CYCLIC);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_P_FRAME);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_B_FRAME);
PRINT_FLAG(flags,VA_ENC_INTRA_REFRESH_MULTI_REF);
printf("\n");
/* FIXME: should check if VAEntrypointEncSlice is supported */
/* FIXME: should check if specified attributes are supported */
attrib[0].type = VAConfigAttribRTFormat;
attrib[0].value = USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420;
attrib[1].type = VAConfigAttribRateControl;
attrib[1].value = VA_RC_CBR;
status = vaCreateConfig(r->va_dpy, profile,
VAEntrypointEncSlice, attrib, 2,
&r->encoder.cfg);
if (status != VA_STATUS_SUCCESS)
return status;
VASurfaceID ctx_surfaces[4];
memcpy(ctx_surfaces, r->encoder.reference_picture, sizeof(VASurfaceID) * 3);
ctx_surfaces[3] = r->vpp.output;
status = vaCreateContext(r->va_dpy, r->encoder.cfg,
r->width, r->height, VA_PROGRESSIVE, ctx_surfaces, 4,
&r->encoder.ctx);
if (status != VA_STATUS_SUCCESS) {
vaDestroyConfig(r->va_dpy, r->encoder.cfg);
return status;
}
return VA_STATUS_SUCCESS;
}
static void
encoder_destroy_config(struct vaapi_recorder *r)
{
vaDestroyContext(r->va_dpy, r->encoder.ctx);
vaDestroyConfig(r->va_dpy, r->encoder.cfg);
}
static void
encoder_init_parameters(struct vaapi_recorder *r)
{
int width_in_mbs, height_in_mbs;
int frame_cropping_flag = 0;
int frame_crop_bottom_offset = 0;
width_in_mbs = (r->width + 15) / 16;
height_in_mbs = (r->height + 15) / 16;
// sps
// seems work
r->encoder.param.seq.seq_fields.bits.sps_temporal_mvp_enabled_flag = 1;
// todo: check if we can use 4:4:4/4:2:2
r->encoder.param.seq.seq_fields.bits.chroma_format_idc = 1; // 4:2:0
r->encoder.param.seq.pic_width_in_luma_samples = ALIGN16(r->width);
r->encoder.param.seq.pic_height_in_luma_samples = ALIGN16(r->height);
// todo: tunable block size
r->encoder.param.seq.log2_diff_max_min_luma_coding_block_size = 2;
r->encoder.param.seq.log2_diff_max_min_transform_block_size = 3;
r->encoder.param.seq.log2_min_transform_block_size_minus2 = 0;
r->encoder.param.seq.max_transform_hierarchy_depth_inter = 2;
r->encoder.param.seq.max_transform_hierarchy_depth_intra = 2;
r->encoder.param.seq.seq_fields.bits.amp_enabled_flag = 1;
// broken on intel? or broken bistream?
r->encoder.param.seq.seq_fields.bits.sample_adaptive_offset_enabled_flag = 0;
r->encoder.param.seq.intra_idr_period = 32767;
r->encoder.param.seq.intra_period = 32767;
r->encoder.param.seq.ip_period = 1;
if(USE_P010)
{
r->encoder.param.seq.seq_fields.bits.bit_depth_chroma_minus8 = 2;
r->encoder.param.seq.seq_fields.bits.bit_depth_luma_minus8 = 2;
}
r->encoder.param.seq.bits_per_second = 150*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50;
// vps
// (none?)
// profile
r->encoder.param.seq.general_level_idc = 120;
r->encoder.param.seq.general_profile_idc = USE_P010? 2: 1;
// pps
r->encoder.param.pic.pic_fields.bits.dependent_slice_segments_enabled_flag = 1; // seens work both
r->encoder.param.pic.pic_fields.bits.transform_skip_enabled_flag = 1;
// pic
r->encoder.param.pic.collocated_ref_pic_index = 0;//255;
r->encoder.param.pic.pic_init_qp = 26;
r->encoder.param.pic.nal_unit_type = NALU_IDR_W_DLP;
r->encoder.param.pic.pic_fields.bits.idr_pic_flag = 1;
r->encoder.param.pic.pic_fields.bits.coding_type = 1;
r->encoder.param.pic.pic_fields.bits.reference_pic_flag = 1;
// seems work
r->encoder.param.pic.pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = 1;
r->encoder.param.pic.pic_fields.bits.cu_qp_delta_enabled_flag = 1; // CBR
if(r->encoder.param.pic.pic_fields.bits.cu_qp_delta_enabled_flag)
r->encoder.param.pic.diff_cu_qp_delta_depth = 2;
for(int i = 0; i < 15; i++)
{
r->encoder.param.pic.reference_frames[i].picture_id = VA_INVALID_SURFACE;
r->encoder.param.pic.reference_frames[i].flags = VA_PICTURE_HEVC_INVALID;
r->encoder.param.pic.reference_frames[i].pic_order_cnt = 0;
}
// slice
//r->encoder.param.slice.slice_fields.bits.num_ref_idx_active_override_flag = 0;
r->encoder.param.slice.slice_qp_delta = 0;
int lcu_size = 32; // todo: block size settings?
int picture_width_in_ctus = (r->width + lcu_size - 1) / lcu_size;
int picture_height_in_ctus = (r->height + lcu_size - 1) / lcu_size;
r->encoder.param.slice.num_ctu_in_slice = picture_width_in_ctus * picture_height_in_ctus;
r->encoder.param.slice.max_num_merge_cand = 5;
// seems works
//r->encoder.param.slice.slice_fields.bits.collocated_from_l0_flag = 1;
// broken on intel???
//r->encoder.param.slice.slice_fields.bits.slice_sao_chroma_flag = 1;
//r->encoder.param.slice.slice_fields.bits.slice_sao_luma_flag = 1;
memset((void*)r->encoder.param.slice.ref_pic_list0, -1, sizeof(r->encoder.param.slice.ref_pic_list0));
memset((void*)r->encoder.param.slice.ref_pic_list1, -1, sizeof(r->encoder.param.slice.ref_pic_list1));
for(int i = 0; i < 15; i++)
{
r->encoder.param.slice.ref_pic_list0[i].flags = -1;//VA_PICTURE_HEVC_INVALID;
r->encoder.param.slice.ref_pic_list0[i].picture_id = VA_INVALID_SURFACE;
r->encoder.param.slice.ref_pic_list0[i].pic_order_cnt = -1;
r->encoder.param.slice.ref_pic_list1[i].flags = -1;//VA_PICTURE_HEVC_INVALID;
r->encoder.param.slice.ref_pic_list1[i].picture_id = VA_INVALID_SURFACE;
r->encoder.param.slice.ref_pic_list1[i].pic_order_cnt = -1;
}
}
static VABufferID
encoder_update_seq_parameters(struct vaapi_recorder *r)
{
VABufferID seq_buf;
VAStatus status;
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncSequenceParameterBufferType,
sizeof(r->encoder.param.seq),
1, &r->encoder.param.seq,
&seq_buf);
if (status == VA_STATUS_SUCCESS)
return seq_buf;
else
{
printf("%s: %d\n", __PRETTY_FUNCTION__, status);
return VA_INVALID_ID;
}
}
static VABufferID
encoder_update_pic_parameters(struct vaapi_recorder *r,
VABufferID output_buf)
{
VAEncPictureParameterBufferHEVC *pic = &r->encoder.param.pic;
VAStatus status;
VABufferID pic_param_buf;
VASurfaceID curr_pic, pic0;
curr_pic = r->encoder.reference_picture[r->frame_count % 2];
pic0 = r->encoder.reference_picture[(r->frame_count + 1) % 2];
pic->decoded_curr_pic.picture_id = curr_pic;
pic->decoded_curr_pic.pic_order_cnt = r->frame_count;
if(r->frame_count)
{
pic->reference_frames[0].picture_id = pic0;
pic->reference_frames[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE;
pic->reference_frames[0].pic_order_cnt = r->frame_count - 1;
}
else
{
pic->reference_frames[0].picture_id = VA_INVALID_SURFACE;
pic->reference_frames[0].flags = VA_PICTURE_HEVC_INVALID;
pic->reference_frames[0].pic_order_cnt = 0;
}
pic->coded_buf = output_buf;
pic->pic_fields.bits.idr_pic_flag = (r->frame_count == 0);
pic->nal_unit_type = (r->frame_count == 0)? NALU_IDR_W_DLP : NALU_TRAIL_R;
pic->pic_fields.bits.reference_pic_flag = 1;
pic->pic_fields.bits.coding_type = (r->frame_count == 0)? 1:2;
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncPictureParameterBufferType,
sizeof(VAEncPictureParameterBufferHEVC), 1,
pic, &pic_param_buf);
if (status == VA_STATUS_SUCCESS)
return pic_param_buf;
else
{
printf("%s: %d\n", __PRETTY_FUNCTION__, status);
return VA_INVALID_ID;
}
}
static VABufferID
encoder_update_slice_parameter(struct vaapi_recorder *r, int slice_type)
{
VABufferID slice_param_buf;
VAStatus status;
r->encoder.param.slice.slice_type = r->frame_count == 0? SLICE_I: SLICE_P;
if(r->frame_count)
{
r->encoder.param.slice.ref_pic_list0[0].pic_order_cnt = r->frame_count - 1;
r->encoder.param.slice.ref_pic_list0[0].picture_id = r->encoder.reference_picture[(r->frame_count - 1)% 2];
r->encoder.param.slice.ref_pic_list0[0].flags = 0;//VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE;
}
else
{
r->encoder.param.slice.ref_pic_list0[0].pic_order_cnt = -1;
r->encoder.param.slice.ref_pic_list0[0].picture_id = VA_INVALID_SURFACE;
r->encoder.param.slice.ref_pic_list0[0].flags = -1;//VA_PICTURE_HEVC_INVALID;
}
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncSliceParameterBufferType,
sizeof(r->encoder.param.slice), 1,
&r->encoder.param.slice,
&slice_param_buf);
if (status == VA_STATUS_SUCCESS)
return slice_param_buf;
else
{
printf("%s: %d\n", __PRETTY_FUNCTION__, status);
return VA_INVALID_ID;
}
}
static VABufferID
encoder_update_misc_rate_parameter(struct vaapi_recorder *r)
{
VAEncMiscParameterBuffer *misc_param;
VAEncMiscParameterHRD *hrd;
VAEncMiscParameterRateControl *rc;
VABufferID buffer;
VAStatus status;
int total_size =
sizeof(VAEncMiscParameterBuffer) +
sizeof(VAEncMiscParameterRateControl);
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncMiscParameterBufferType, total_size,
1, NULL, &buffer);
if (status != VA_STATUS_SUCCESS)
return VA_INVALID_ID;
status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param);
if (status != VA_STATUS_SUCCESS) {
vaDestroyBuffer(r->va_dpy, buffer);
return VA_INVALID_ID;
}
misc_param->type = VAEncMiscParameterTypeRateControl;
rc = (VAEncMiscParameterRateControl *)misc_param->data;
rc->initial_qp = 25;
rc->max_qp = rc->max_qp = 0;
rc->basic_unit_size = 0;
rc->window_size = 1000;
rc->target_percentage = 66;
rc->bits_per_second = 150*1024*1024;
vaUnmapBuffer(r->va_dpy, buffer);
return buffer;
}
static VABufferID
encoder_update_misc_framerate_parameter(struct vaapi_recorder *r)
{
VAEncMiscParameterBuffer *misc_param;
VAEncMiscParameterHRD *hrd;
VAEncMiscParameterFrameRate *fps;
VABufferID buffer;
VAStatus status;
int total_size =
sizeof(VAEncMiscParameterBuffer) +
sizeof(VAEncMiscParameterRateControl);
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncMiscParameterBufferType, total_size,
1, NULL, &buffer);
if (status != VA_STATUS_SUCCESS)
return VA_INVALID_ID;
status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param);
if (status != VA_STATUS_SUCCESS) {
vaDestroyBuffer(r->va_dpy, buffer);
return VA_INVALID_ID;
}
misc_param->type = VAEncMiscParameterTypeFrameRate;
fps = (VAEncMiscParameterFrameRate *)misc_param->data;
fps->framerate = 90;
vaUnmapBuffer(r->va_dpy, buffer);
return buffer;
}
static VABufferID
encoder_update_misc_hrd_parameter(struct vaapi_recorder *r)
{
VAEncMiscParameterBuffer *misc_param;
VAEncMiscParameterHRD *hrd;
VAEncMiscParameterRateControl *rc;
VABufferID buffer;
VAStatus status;
int total_size =
sizeof(VAEncMiscParameterBuffer) +
sizeof(VAEncMiscParameterHRD);
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncMiscParameterBufferType, total_size,
1, NULL, &buffer);
if (status != VA_STATUS_SUCCESS)
return VA_INVALID_ID;
status = vaMapBuffer(r->va_dpy, buffer, (void **) &misc_param);
if (status != VA_STATUS_SUCCESS) {
vaDestroyBuffer(r->va_dpy, buffer);
return VA_INVALID_ID;
}
misc_param->type = VAEncMiscParameterTypeHRD;
hrd = (VAEncMiscParameterHRD *) misc_param->data;
hrd->initial_buffer_fullness = 0;
hrd->buffer_size = 0;
vaUnmapBuffer(r->va_dpy, buffer);
return buffer;
}
static int
setup_encoder(struct vaapi_recorder *r)
{
VAStatus status;
VASurfaceAttrib attrs[2] = { {VASurfaceAttribMemoryType, VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},},
{VASurfaceAttribPixelFormat,VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},} };
attrs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
attrs[1].value.value.i = USE_P010?VA_FOURCC_P010:VA_FOURCC_NV12;
status = vaCreateSurfaces(r->va_dpy, USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420,
r->width, r->height,
r->encoder.reference_picture, 3,
attrs, 2);
status = encoder_create_config(r);
if (status != VA_STATUS_SUCCESS) {
return -1;
}
if (status != VA_STATUS_SUCCESS) {
encoder_destroy_config(r);
return -1;
}
r->encoder.output_size = r->width * r->height;
r->encoder.intra_period = 32767;
encoder_init_parameters(r);
return 0;
}
static void
encoder_destroy(struct vaapi_recorder *r)
{
vaDestroySurfaces(r->va_dpy, r->encoder.reference_picture, 3);
encoder_destroy_config(r);
}
static void nal_start_code_prefix(bitstream *bs, int nal_unit_type)
{
if (nal_unit_type == NALU_VPS ||
nal_unit_type == NALU_SPS ||
nal_unit_type == NALU_PPS ||
nal_unit_type == NALU_AUD)
bitstream_put_ui(bs, 0x00000001, 32);
else
bitstream_put_ui(bs, 0x000001, 24);
}
static void nal_header(bitstream *bs, int nal_unit_type)
{
bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */
bitstream_put_ui(bs, nal_unit_type, 6);
bitstream_put_ui(bs, 0, 6);
bitstream_put_ui(bs, 1, 3);
}
static void
rbsp_trailing_bits(struct bitstream *bs)
{
bitstream_put_ui(bs, 1, 1);
bitstream_byte_aligning(bs, 0);
}
#define TEMPORAL_ID_NESTING 1
#define POC_BITS 16
static void protier_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *seq)
{
uint32_t i = 0;
bitstream_put_ui(bs, 0, 2); // general_profile_space
bitstream_put_ui(bs, seq->general_tier_flag, 1); // general_tier_flag
bitstream_put_ui(bs, seq->general_profile_idc, 5); // general_profile_idc // real_hevc_profile
//for (i = 0; i < 32; i++)
//bitstream_put_ui(bs, protier_param.general_profile_compatibility_flag[i], 1);
//bitstream_put_ui(bs, 1 << 30, 32);
// ptps->general_profile_compatibility_flag[ptps->general_profile_idc] = 1;
// todo: configurable flags?
for (i = 0; i < 32; i++)
bitstream_put_ui(bs, i == seq->general_profile_idc, 1);
bitstream_put_ui(bs, 1, 1); //general_progressive_source_flag
bitstream_put_ui(bs, 0, 1); // general_interlaced_source_flag
bitstream_put_ui(bs, 1, 1); // general_non_packed_constraint_flag
bitstream_put_ui(bs, 1, 1); // general_frame_only_constraint_flag
// reserved?
bitstream_put_ui(bs, 0, 16);
bitstream_put_ui(bs, 0, 16);
bitstream_put_ui(bs, 0, 12);
bitstream_put_ui(bs, seq->general_level_idc, 8); // general_level_idc
//ptps->general_level_idc = 30;
//ptps->general_level_idc = ptps->general_level_idc * 4;
}
static void vps_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *seq)
{
uint32_t i = 0;
bitstream_put_ui(bs, 0, 4); // vps.vps_video_parameter_set_id
bitstream_put_ui(bs, 3, 2); //vps_reserved_three_2bits
//vps_base_layer_internal_flag:1
//vps_base_layer_available_flag:1
bitstream_put_ui(bs, 0, 6); //vps_reserved_zero_6bits
// vps_max_layers_minus1:0
bitstream_put_ui(bs, 0, 3); // vps_max_sub_layers_minus1
bitstream_put_ui(bs, TEMPORAL_ID_NESTING, 1); // vps_temporal_id_nesting_flag
bitstream_put_ui(bs, 0xFFFF, 16); //vps_reserved_0xffff_16bits
protier_rbsp(bs, seq);
bitstream_put_ui(bs, 0, 1); // vps.vps_sub_layer_ordering_info_present_flag
// for (i = (vps.vps_sub_layer_ordering_info_present_flag ? 0 : vps.vps_max_sub_layers_minus1); i <= vps.vps_max_sub_layers_minus1; i++) {
// NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
// here just follow the spec 7.3.2.1
// todo: check this. At least, breaks some hevc parsers when set to 0
bitstream_put_ue(bs, 1); //vps.vps_max_dec_pic_buffering_minus1[i]
bitstream_put_ue(bs, 0);//vps.vps_max_num_reorder_pics[i]
bitstream_put_ue(bs, 0);//vps.vps_max_latency_increase_plus1[i]
//}
//*/
bitstream_put_ui(bs, 0, 6); // vps.vps_max_nuh_reserved_zero_layer_id
bitstream_put_ue(bs, 0); // vps.vps_num_op_sets_minus1
bitstream_put_ui(bs, 0, 1); // vps.vps_num_op_sets_minus1
/*&
if (vps.vps_timing_info_present_flag) {
bitstream_put_ue(bs, vps.vps_num_units_in_tick);
bitstream_put_ue(bs, vps.vps_time_scale);
bitstream_put_ue(bs, vps.vps_poc_proportional_to_timing_flag);
if (vps.vps_poc_proportional_to_timing_flag) {
bitstream_put_ue(bs, vps.vps_num_ticks_poc_diff_one_minus1);
}
bitstream_put_ue(bs, vps.vps_num_hrd_parameters);
for (i = 0; i < vps.vps_num_hrd_parameters; i++) {
bitstream_put_ue(bs, vps.hrd_layer_set_idx[i]);
if (i > 0) {
bitstream_put_ui(bs, vps.cprms_present_flag[i], 1);
}
}
}
*/
// todo: bitstream restrictions?
// no extension flag
bitstream_put_ui(bs, 0, 1);
}
static void sps_rbsp(bitstream *bs, VAEncSequenceParameterBufferHEVC *sps, int width, int height)
{
uint32_t i = 0;
bitstream_put_ui(bs, 0, 4); // sps.sps_video_parameter_set_id
bitstream_put_ui(bs, 0, 3); // sps.sps_max_sub_layers_minus1
bitstream_put_ui(bs, TEMPORAL_ID_NESTING, 1); // sps.sps_temporal_id_nesting_flag
protier_rbsp(bs, sps);
bitstream_put_ue(bs, 0); // sps.sps_seq_parameter_set_id
bitstream_put_ue(bs, sps->seq_fields.bits.chroma_format_idc); // sps.chroma_format_idc
// 4:2:0
// todo: check if we can use 4:4:4/4:2:2
if (sps->seq_fields.bits.chroma_format_idc == 3) {
bitstream_put_ui(bs, sps->seq_fields.bits.separate_colour_plane_flag, 1);
}
bitstream_put_ue(bs, ALIGN16(width) ); //sps.pic_width_in_luma_samples
bitstream_put_ue(bs, ALIGN16(height)); //sps.pic_height_in_luma_samples
bool conformance_window_flag = ALIGN16(width) != width || ALIGN16(height) != height;
bitstream_put_ui(bs, 1, 1); // sps.conformance_window_flag
if (conformance_window_flag) { // sps.
bitstream_put_ue(bs, 0); // sps.conf_win_left_offset
bitstream_put_ue(bs, (ALIGN16(width) - width) >> 1); // sps.conf_win_right_offset
bitstream_put_ue(bs, 0); // sps.conf_win_top_offset
bitstream_put_ue(bs, (ALIGN16(height) - height) >> 1); // sps.conf_win_bottom_offset
}
bitstream_put_ue(bs, sps->seq_fields.bits.bit_depth_luma_minus8); //sps.bit_depth_luma_minus8
bitstream_put_ue(bs, sps->seq_fields.bits.bit_depth_chroma_minus8); //sps.bit_depth_chroma_minus8
bitstream_put_ue(bs, POC_BITS - 4); // sps.log2_max_pic_order_cnt_lsb_minus4
bitstream_put_ui(bs, 0 , 1); //sps.sps_sub_layer_ordering_info_present_flag
//for (i = (sps.sps_sub_layer_ordering_info_present_flag ? 0 : sps.sps_max_sub_layers_minus1); i <= sps.sps_max_sub_layers_minus1; i++) {
// NOTE: In teddi and mv_encoder, the setting is max_dec_pic_buffering.
// here just follow the spec 7.3.2.2
// todo: check. At least, breaks some hevc parsers when set to 0
bitstream_put_ue(bs, 0);//sps.sps_max_dec_pic_buffering_minus1[i]
bitstream_put_ue(bs, 0);//sps.sps_max_num_reorder_pics[i]
bitstream_put_ue(bs, 0); //sps.sps_max_latency_increase_plus1[i]
//}
bitstream_put_ue(bs, sps->log2_min_luma_coding_block_size_minus3); // sps.log2_min_luma_coding_block_size_minus3
bitstream_put_ue(bs, sps->log2_diff_max_min_luma_coding_block_size);//sps.log2_diff_max_min_luma_coding_block_size
bitstream_put_ue(bs, sps->log2_min_transform_block_size_minus2);//sps.log2_min_luma_transform_block_size_minus2
bitstream_put_ue(bs, sps->log2_diff_max_min_transform_block_size);//sps.log2_diff_max_min_luma_transform_block_size
bitstream_put_ue(bs, sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_inter
bitstream_put_ue(bs, sps->max_transform_hierarchy_depth_inter);//sps.max_transform_hierarchy_depth_intra
assert(!sps->seq_fields.bits.scaling_list_enabled_flag);
// scaling_list_enabled_flag is set as 0 in fill_sps_header() for now
bitstream_put_ui(bs, 0, 1); // sps.scaling_list_enabled_flag
/*if (sps.scaling_list_enabled_flag) {
bitstream_put_ui(bs, sps.sps_scaling_list_data_present_flag, 1);
if (sps.sps_scaling_list_data_present_flag) {
//scaling_list_data();
}
}*/
bitstream_put_ui(bs, sps->seq_fields.bits.amp_enabled_flag, 1); // sps.amp_enabled_flag
bitstream_put_ui(bs, sps->seq_fields.bits.sample_adaptive_offset_enabled_flag, 1); // sps.sample_adaptive_offset_enabled_flag
// pcm_enabled_flag is set as 0 in fill_sps_header() for now
bitstream_put_ui(bs, sps->seq_fields.bits.pcm_enabled_flag, 1);//sps.pcm_enabled_flag
assert(!sps->seq_fields.bits.pcm_enabled_flag);
if (sps->seq_fields.bits.pcm_enabled_flag) {
bitstream_put_ui(bs, sps->pcm_sample_bit_depth_luma_minus1, 4);
bitstream_put_ui(bs, sps->pcm_sample_bit_depth_chroma_minus1, 4);
bitstream_put_ue(bs, sps->log2_min_pcm_luma_coding_block_size_minus3);
bitstream_put_ue(bs, sps->log2_max_pcm_luma_coding_block_size_minus3 - sps->log2_min_pcm_luma_coding_block_size_minus3); //log2_diff_max_min_pcm_luma_coding_block_size
bitstream_put_ui(bs, sps->seq_fields.bits.pcm_loop_filter_disabled_flag, 1);
}
bitstream_put_ue(bs, 1);// sps.num_short_term_ref_pic_sets
bitstream_put_ue(bs, 1); // num_negative_pics
bitstream_put_ue(bs, 0); // num_positive_pics
bitstream_put_ue(bs, 0); //delta_poc_s0_minus1[i]
bitstream_put_ui(bs, 1, 1);//used_by_curr_pic_s0_flag[i]
/*for (i = 0; i < sps.num_short_term_ref_pic_sets; i++) {
pack_short_term_ref_pic_setp(bs, &sps.strp[i], i == 0);
}*/
// long_term_ref_pics_present_flag is set as 0 in fill_sps_header() for now
bitstream_put_ui(bs, 0, 1); // sps.long_term_ref_pics_present_flag
/*if (sps.long_term_ref_pics_present_flag) {
bitstream_put_ue(bs, sps.num_long_term_ref_pics_sps);
for (i = 0; i < sps.num_long_term_ref_pics_sps; i++) {
bitstream_put_ue(bs, sps.lt_ref_pic_poc_lsb_sps[i]);
bitstream_put_ui(bs, sps.used_by_curr_pic_lt_sps_flag[i], 1);
}
}*/
bitstream_put_ui(bs, sps->seq_fields.bits.sps_temporal_mvp_enabled_flag, 1); // sps.sps_temporal_mvp_enabled_flag
bitstream_put_ui(bs, sps->seq_fields.bits.strong_intra_smoothing_enabled_flag, 1); //sps.strong_intra_smoothing_enabled_flag
assert(!sps->vui_parameters_present_flag);
bitstream_put_ui(bs, sps->vui_parameters_present_flag, 1); // sps.vui_parameters_present_flag
bitstream_put_ui(bs, 0, 1); // sps.sps_extension_present_flag
}
#define PPS_CABAC_INIT_PRESENT_FLAG 1
static void pps_rbsp(bitstream *bs, VAEncPictureParameterBufferHEVC *pic)
{
uint32_t i = 0;
bool deblocking_filter_control_present_flag = false;
bitstream_put_ue(bs, 0); // pps.pps_pic_parameter_set_id
bitstream_put_ue(bs, 0); // pps.pps_seq_parameter_set_id
bitstream_put_ui(bs, pic->pic_fields.bits.dependent_slice_segments_enabled_flag, 1); // pps.dependent_slice_segments_enabled_flag // TODO: !!!
bitstream_put_ui(bs, 0, 1); //pps.output_flag_present_flag
bitstream_put_ui(bs, 0, 3); // pps.num_extra_slice_header_bits
bitstream_put_ui(bs, pic->pic_fields.bits.sign_data_hiding_enabled_flag, 1); //pps.sign_data_hiding_enabled_flag
bitstream_put_ui(bs, PPS_CABAC_INIT_PRESENT_FLAG, 1); // pps.cabac_init_present_flag
bitstream_put_ue(bs, pic->num_ref_idx_l0_default_active_minus1); //pps.num_ref_idx_l0_default_active_minus1
bitstream_put_ue(bs, pic->num_ref_idx_l1_default_active_minus1); //pps.num_ref_idx_l1_default_active_minus1
bitstream_put_se(bs, pic->pic_init_qp - 26); //pps.init_qp_minus26
bitstream_put_ui(bs, pic->pic_fields.bits.constrained_intra_pred_flag, 1); //pps.constrained_intra_pred_flag
bitstream_put_ui(bs, pic->pic_fields.bits.transform_skip_enabled_flag, 1); //pps.transform_skip_enabled_flag
bitstream_put_ui(bs, pic->pic_fields.bits.cu_qp_delta_enabled_flag, 1); //pps.cu_qp_delta_enabled_flag
if (pic->pic_fields.bits.cu_qp_delta_enabled_flag) {
bitstream_put_ue(bs, pic->diff_cu_qp_delta_depth);//diff_cu_qp_delta_depth
}
bitstream_put_se(bs, pic->pps_cb_qp_offset);//pps.pps_cb_qp_offset
bitstream_put_se(bs, pic->pps_cr_qp_offset);//pps.pps_cr_qp_offset)
bitstream_put_ui(bs, 0, 1);//pps.pps_slice_chroma_qp_offsets_present_flag
bitstream_put_ui(bs, pic->pic_fields.bits.weighted_pred_flag, 1);//pps.weighted_pred_flag
bitstream_put_ui(bs, pic->pic_fields.bits.weighted_bipred_flag, 1); //pps.weighted_bipred_flag
bitstream_put_ui(bs, pic->pic_fields.bits.transquant_bypass_enabled_flag, 1);//pps.transquant_bypass_enabled_flag
bitstream_put_ui(bs, pic->pic_fields.bits.tiles_enabled_flag, 1);//pps.tiles_enabled_flag
bitstream_put_ui(bs, pic->pic_fields.bits.entropy_coding_sync_enabled_flag, 1);//pps.entropy_coding_sync_enabled_flag
if (pic->pic_fields.bits.tiles_enabled_flag) {
bool uniform_spacing_flag = false;
bitstream_put_ue(bs, pic->num_tile_columns_minus1);
bitstream_put_ue(bs, pic->num_tile_rows_minus1);
bitstream_put_ui(bs, uniform_spacing_flag, 1);//uniform_spacing_flag
if (!uniform_spacing_flag) {
for (i = 0; i < pic->num_tile_columns_minus1; i++) {
bitstream_put_ue(bs, pic->column_width_minus1[i]);
}
for (i = 0; i < pic->num_tile_rows_minus1; i++) {
bitstream_put_ue(bs, pic->row_height_minus1[i]);
}
}
bitstream_put_ui(bs, pic->pic_fields.bits.loop_filter_across_tiles_enabled_flag, 1);
}
bitstream_put_ui(bs, pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag, 1); // pps.pps_loop_filter_across_slices_enabled_flag
bitstream_put_ui(bs, deblocking_filter_control_present_flag, 1);//pps.deblocking_filter_control_present_flag
if (deblocking_filter_control_present_flag) {
bool deblocking_filter_override_enabled_flag = false;
bool pps_deblocking_filter_disabled_flag = true;
bitstream_put_ui(bs, deblocking_filter_override_enabled_flag, 1);
bitstream_put_ui(bs, pps_deblocking_filter_disabled_flag, 1);
int beta_offset_div2 = 0;
int tc_offset_div2 = 0;
if (!pps_deblocking_filter_disabled_flag) {
bitstream_put_se(bs, beta_offset_div2);
bitstream_put_se(bs, tc_offset_div2);
}
}
// pps_scaling_list_data_present_flag is set as 0 in fill_pps_header() for now
assert(!pic->pic_fields.bits.scaling_list_data_present_flag);
bitstream_put_ui(bs, pic->pic_fields.bits.scaling_list_data_present_flag, 1);//pps.pps_scaling_list_data_present_flag
/*if (pps.pps_scaling_list_data_present_flag) {
//scaling_list_data();
}*/
bitstream_put_ui(bs, 0, 1);//pps.lists_modification_present_flag
bitstream_put_ue(bs, 0); //pps.log2_parallel_merge_level_minus2
bitstream_put_ui(bs, 0, 1);//pps.slice_segment_header_extension_present_flag
bitstream_put_ui(bs,0, 1); //pps.pps_extension_present_flag
/*if (pps.pps_extension_present_flag) {
bitstream_put_ui(bs, pps.pps_range_extension_flag, 1);
bitstream_put_ui(bs, pps.pps_multilayer_extension_flag, 1);
bitstream_put_ui(bs, pps.pps_3d_extension_flag, 1);
bitstream_put_ui(bs, pps.pps_extension_5bits, 1);
}
if (pps.pps_range_extension_flag) {
if (pps.transform_skip_enabled_flag)
bitstream_put_ue(bs, pps.log2_max_transform_skip_block_size_minus2);
bitstream_put_ui(bs, pps.cross_component_prediction_enabled_flag, 1);
bitstream_put_ui(bs, pps.chroma_qp_offset_list_enabled_flag, 1);
if (pps.chroma_qp_offset_list_enabled_flag) {
bitstream_put_ue(bs, pps.diff_cu_chroma_qp_offset_depth);
bitstream_put_ue(bs, pps.chroma_qp_offset_list_len_minus1);
for (i = 0; i <= pps.chroma_qp_offset_list_len_minus1; i++) {
bitstream_put_ue(bs, pps.cb_qp_offset_list[i]);
bitstream_put_ue(bs, pps.cr_qp_offset_list[i]);
}
}
bitstream_put_ue(bs, pps.log2_sao_offset_scale_luma);
bitstream_put_ue(bs, pps.log2_sao_offset_scale_chroma);
}
*/
}
static void sliceHeader_rbsp(
bitstream *bs,
int framenum, VAEncSequenceParameterBufferHEVC *sps, VAEncSliceParameterBufferHEVC *slice, VAEncPictureParameterBufferHEVC *pic)
{
uint8_t nal_unit_type = NALU_TRAIL_R;
//int gop_ref_distance = ip_period;
int i = 0;
bool is_idr = framenum == 0;
int slice_type = is_idr?SLICE_I : SLICE_P;
int short_term_ref_pic_set_sps_flag = 1; // !is_idr;
int slice_qp_delta = slice->slice_qp_delta;
int pic_order_cnt_lsb = framenum;
bitstream_put_ui(bs, 1, 1);// first_slice_segment_in_pic_flag
if (pic_order_cnt_lsb == 0)
nal_unit_type = NALU_IDR_W_DLP;
// idr
if (nal_unit_type >= 16 && nal_unit_type <= 23)
bitstream_put_ui(bs, 1, 1); //no_output_of_prior_pics_flag
bitstream_put_ue(bs, 0);//slice_pic_parameter_set_id
/*if (!slice_header->first_slice_segment_in_pic_flag) {
if (slice_header->dependent_slice_segment_flag) {
bitstream_put_ui(bs, slice_header->dependent_slice_segment_flag, 1);
}
bitstream_put_ui(bs, slice_header->slice_segment_address,
(uint8_t)(ceil(log(slice_header->picture_height_in_ctus * slice_header->picture_width_in_ctus) / log(2.0))));
}*/
// !slice_header->dependent_slice_segment_flag
if (!slice->slice_fields.bits.dependent_slice_segment_flag) {
/*for (i = 0; i < pps->num_extra_slice_header_bits; i++) {
bitstream_put_ui(bs, slice_header->slice_reserved_undetermined_flag[i], 1);
}*/
bitstream_put_ue(bs, slice_type);
/*if (pps->output_flag_present_flag) {
bitstream_put_ui(bs, slice_header->pic_output_flag, 1);
}*/
if(sps->seq_fields.bits.separate_colour_plane_flag)
bitstream_put_ui(bs, slice->slice_fields.bits.colour_plane_id, 2);
if (!(nal_unit_type == NALU_IDR_W_DLP || nal_unit_type == NALU_IDR_N_LP)) {
// slice_header->pic_order_cnt_lsb
bitstream_put_ui(bs, pic_order_cnt_lsb, POC_BITS );//(sps->log2_max_pic_order_cnt_lsb_minus4 + 4)
bitstream_put_ui(bs, 1, 1); // short_term_ref_pic_set_sps_flag
// assume we are only pushing I-slices on IDR frames, SPS only references (n-1)th frame for now
// this should be restored when p-slice references something different or using CRA frames
#if 0
if (!short_term_ref_pic_set_sps_flag) {
// refer to Teddi
if (sps->num_short_term_ref_pic_sets > 0)
bitstream_put_ui(bs, 0, 1); // inter_ref_pic_set_prediction_flag, always 0 for now
bitstream_put_ue(bs, slice_header->strp.num_negative_pics);
bitstream_put_ue(bs, slice_header->strp.num_positive_pics);
// below chunks of codes (majorly two big 'for' blocks) are refering both
// Teddi and mv_encoder, they look kind of ugly, however, keep them as these
// since it will be pretty easy to update if change/update in Teddi side.
// According to Teddi, these are CModel Implementation.
int prev = 0;
int frame_cnt_in_gop = slice_header->pic_order_cnt_lsb / 2;
// this is the first big 'for' block
for (i = 0; i < slice_header->strp.num_negative_pics; i++) {
// Low Delay B case
if (1 == gop_ref_distance) {
bitstream_put_ue(bs, 0 /*delta_poc_s0_minus1*/);
} else {
// For Non-BPyramid GOP i.e B0 type
if (num_active_ref_p > 1) {
// DeltaPOC Equals NumB
int DeltaPoc = -(int)(gop_ref_distance);
bitstream_put_ue(bs, prev - DeltaPoc - 1 /*delta_poc_s0_minus1*/);
} else {
// the big 'if' wraps here is -
// if (!slice_header->short_term_ref_pic_set_sps_flag)
// From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
// either for B-Prymid or first several frames in a GOP in multi-ref cases
// when there are not enough backward refs.
// So though there are really some codes under this 'else'in Teddi, don't
// want to introduce them in MEA to avoid confusion, and put an assert
// here to guard that there is new case we need handle in the future.
assert(0);
}
}
bitstream_put_ui(bs, 1 /*used_by_curr_pic_s0_flag*/, 1);
}
prev = 0;
// this is the second big 'for' block
for (i = 0; i < slice_header->strp.num_positive_pics; i++) {
// Non-BPyramid GOP
if (num_active_ref_p > 1) {
// MultiRef Case
if (frame_cnt_in_gop < gop_ref_distance) {
int DeltaPoc = (int)(gop_ref_distance - frame_cnt_in_gop);
bitstream_put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
} else if (frame_cnt_in_gop > gop_ref_distance) {
int DeltaPoc = (int)(gop_ref_distance * slice_header->strp.num_negative_pics - frame_cnt_in_gop);
bitstream_put_ue(bs, DeltaPoc - prev - 1 /*delta_poc_s1_minus1*/);
}
} else {
// the big 'if' wraps here is -
// if (!slice_header->short_term_ref_pic_set_sps_flag)
// From the Teddi logic, the short_term_ref_pic_set_sps_flag only can be '0'
// either for B-Prymid or first several frames in a GOP in multi-ref cases
// when there are not enough backward refs.
// So though there are really some codes under this 'else'in Teddi, don't
// want to introduce them in MEA to avoid confusion, and put an assert
// here to guard that there is new case we need handle in the future.
assert(0);
}
bitstream_put_ui(bs, 1 /*used_by_curr_pic_s1_flag*/, 1);
}
} else if (sps->num_short_term_ref_pic_sets > 1)
bitstream_put_ui(bs, slice_header->short_term_ref_pic_set_idx,
(uint8_t)(ceil(log(sps->num_short_term_ref_pic_sets) / log(2.0))));
#endif
// no long term refs
#if 0
if (sps->long_term_ref_pics_present_flag) {
if (sps->num_long_term_ref_pics_sps > 0)
bitstream_put_ue(bs, slice_header->num_long_term_sps);
bitstream_put_ue(bs, slice_header->num_long_term_pics);
}
#endif
if (sps->seq_fields.bits.sps_temporal_mvp_enabled_flag)
bitstream_put_ui(bs, slice->slice_fields.bits.slice_temporal_mvp_enabled_flag, 1);
}
if (sps->seq_fields.bits.sample_adaptive_offset_enabled_flag ) { // sample_adaptive_offset_enabled_flag
bitstream_put_ui(bs, slice->slice_fields.bits.slice_sao_luma_flag, 1);// slice_sao_luma_flag
bitstream_put_ui(bs, slice->slice_fields.bits.slice_sao_chroma_flag, 1);//slice_sao_chroma_flag
}
if (slice_type != SLICE_I) {
bitstream_put_ui(bs, slice->slice_fields.bits.num_ref_idx_active_override_flag, 1); //num_ref_idx_active_override_flag
if (slice->slice_fields.bits.num_ref_idx_active_override_flag) {
bitstream_put_ue(bs, slice->num_ref_idx_l0_active_minus1);
//if (slice->slice_type == SLICE_B)
//bitstream_put_ue(bs, slice->num_ref_idx_l1_active_minus1);
}
#if 0
if (pps->lists_modification_present_flag && slice_header->num_poc_total_cur > 1) {
/* ref_pic_list_modification */
bitstream_put_ui(bs, slice_header->ref_pic_list_modification_flag_l0, 1);
if (slice_header->ref_pic_list_modification_flag_l0) {
for (i = 0; i <= slice_header->num_ref_idx_l0_active_minus1; i++) {
bitstream_put_ui(bs, slice_header->list_entry_l0[i],
(uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
}
}
bitstream_put_ui(bs, slice_header->ref_pic_list_modification_flag_l1, 1);
if (slice_header->ref_pic_list_modification_flag_l1) {
for (i = 0; i <= slice_header->num_ref_idx_l1_active_minus1; i++) {
bitstream_put_ui(bs, slice_header->list_entry_l1[i],
(uint8_t)(ceil(log(slice_header->num_poc_total_cur) / log(2.0))));
}
}
}
#endif
/*if (slice_header->slice_type == SLICE_B) {
bitstream_put_ui(bs, slice_header->mvd_l1_zero_flag, 1);
}*/
if (PPS_CABAC_INIT_PRESENT_FLAG) {
bitstream_put_ui(bs, slice->slice_fields.bits.cabac_init_flag, 1); //slice_header->cabac_init_present_flag
}
if (slice->slice_fields.bits.slice_temporal_mvp_enabled_flag) {
int collocated_from_l0_flag = 1;
if (slice->slice_type == SLICE_B) {
collocated_from_l0_flag = slice->slice_fields.bits.collocated_from_l0_flag;
bitstream_put_ui(bs, collocated_from_l0_flag , 1);
}
if (((collocated_from_l0_flag && (slice->num_ref_idx_l0_active_minus1 > 0)) ||
(!collocated_from_l0_flag && (slice->num_ref_idx_l1_active_minus1 > 0)))) {
bitstream_put_ue(bs, pic->num_ref_idx_l0_default_active_minus1); // collocated_ref_idx
}
}
bitstream_put_ue(bs, 5 - slice->max_num_merge_cand);//slice_header->five_minus_max_num_merge_cand
}
bitstream_put_se(bs, slice_qp_delta);
/*if (pps->chroma_qp_offset_list_enabled_flag) {
bitstream_put_se(bs, slice_header->slice_qp_delta_cb);
bitstream_put_se(bs, slice_header->slice_qp_delta_cr);
}
if (pps->deblocking_filter_override_enabled_flag) {
bitstream_put_ui(bs, slice_header->deblocking_filter_override_flag, 1);
}
if (slice_header->deblocking_filter_override_flag) {
bitstream_put_ui(bs, slice_header->disable_deblocking_filter_flag, 1);
if (!slice_header->disable_deblocking_filter_flag) {
bitstream_put_se(bs, slice_header->beta_offset_div2);
bitstream_put_se(bs, slice_header->tc_offset_div2);
}
}*/
if (pic->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag &&
(slice->slice_fields.bits.slice_sao_luma_flag || slice->slice_fields.bits.slice_sao_chroma_flag ||
!slice->slice_fields.bits.slice_deblocking_filter_disabled_flag)) {
bitstream_put_ui(bs, slice->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag, 1);
}
}
if ((pic->pic_fields.bits.tiles_enabled_flag) || (pic->pic_fields.bits.entropy_coding_sync_enabled_flag)) {
int num_entry_point_offsets = 0, offset_len_minus1 = 0;
bitstream_put_ue(bs, num_entry_point_offsets);
if (num_entry_point_offsets > 0) {
bitstream_put_ue(bs, offset_len_minus1);
}
}
/*if (pps->slice_segment_header_extension_present_flag) {
int slice_header_extension_length = 0;
bitstream_put_ue(bs, slice_header_extension_length);
}*/
}
void bitstream_dump(const char *name, bitstream *bs)
{
#if 0
FILE *f = fopen(name,"wb");
fwrite(bs->buffer, 1, bs->bit_offset / 8,f);
fclose(f);
#endif
}
static int
build_packed_pic_buffer(struct vaapi_recorder *r,
void **header_buffer)
{
struct bitstream bs;
bitstream_start(&bs);
nal_start_code_prefix(&bs, NALU_PPS);
nal_header(&bs, NALU_PPS);
pps_rbsp(&bs,&r->encoder.param.pic);
rbsp_trailing_bits(&bs);
bitstream_end(&bs);
bitstream_dump("pic.bin",&bs);
*header_buffer = bs.buffer;
return bs.bit_offset;
}
static int
build_packed_video_buffer(struct vaapi_recorder *r,
void **header_buffer)
{
struct bitstream bs;
bitstream_start(&bs);
nal_start_code_prefix(&bs, NALU_VPS);
nal_header(&bs, NALU_VPS);
vps_rbsp(&bs, &r->encoder.param.seq);
rbsp_trailing_bits(&bs);
bitstream_end(&bs);
bitstream_dump("vid.bin",&bs);
*header_buffer = bs.buffer;
return bs.bit_offset;
}
static int
build_packed_seq_buffer(struct vaapi_recorder *r,
void **header_buffer)
{
struct bitstream bs;
bitstream_start(&bs);
nal_start_code_prefix(&bs, NALU_SPS);
nal_header(&bs, NALU_SPS);
sps_rbsp(&bs, &r->encoder.param.seq, r->width, r->height);
rbsp_trailing_bits(&bs);
bitstream_end(&bs);
bitstream_dump("seq.bin",&bs);
*header_buffer = bs.buffer;
return bs.bit_offset;
}
static int
build_packed_slice_buffer(struct vaapi_recorder *r,
void **header_buffer)
{
struct bitstream bs;
int is_idr = r->frame_count == 0; // !!pic_param.pic_fields.bits.idr_pic_flag;
int naluType = is_idr ? NALU_IDR_W_DLP : NALU_TRAIL_R;
bitstream_start(&bs);
nal_start_code_prefix(&bs, naluType);
nal_header(&bs, naluType);
sliceHeader_rbsp(&bs, r->frame_count, &r->encoder.param.seq, &r->encoder.param.slice, &r->encoder.param.pic);
rbsp_trailing_bits(&bs);
bitstream_end(&bs);
bitstream_dump("slice.bin",&bs);
*header_buffer = bs.buffer;
return bs.bit_offset;
}
static int
create_packed_header_buffers(struct vaapi_recorder *r, VABufferID *buffers,
VAEncPackedHeaderType type,
void *data, int bit_length)
{
VAEncPackedHeaderParameterBuffer packed_header;
VAStatus status;
packed_header.type = type;
packed_header.bit_length = bit_length;
packed_header.has_emulation_bytes = 0;
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncPackedHeaderParameterBufferType,
sizeof packed_header, 1, &packed_header,
&buffers[0]);
if (status != VA_STATUS_SUCCESS)
{
printf("%s: %d\n", __PRETTY_FUNCTION__, status);
return 0;
}
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncPackedHeaderDataBufferType,
(bit_length + 7) / 8, 1, data, &buffers[1]);
if (status != VA_STATUS_SUCCESS) {
printf("%s: %d\n", __PRETTY_FUNCTION__, status);
vaDestroyBuffer(r->va_dpy, buffers[0]);
return 0;
}
return 2;
}
static int
encoder_prepare_headers(struct vaapi_recorder *r, VABufferID *buffers)
{
VABufferID *p;
int bit_length;
void *data;
p = buffers;
bit_length = build_packed_video_buffer(r, &data);
p += create_packed_header_buffers(r, p, VAEncPackedHeaderSequence,
data, bit_length);
free(data);
bit_length = build_packed_seq_buffer(r, &data);
p += create_packed_header_buffers(r, p, VAEncPackedHeaderSequence,
data, bit_length);
free(data);
return p - buffers;
}
static VAStatus
encoder_render_picture(struct vaapi_recorder *r, VASurfaceID input,
VABufferID *buffers, int count)
{
}
static VABufferID
encoder_create_output_buffer(struct vaapi_recorder *r)
{
VABufferID output_buf;
VAStatus status;
status = vaCreateBuffer(r->va_dpy, r->encoder.ctx,
VAEncCodedBufferType, r->encoder.output_size,
1, NULL, &output_buf);
if (status == VA_STATUS_SUCCESS)
return output_buf;
else
return VA_INVALID_ID;
}
enum output_write_status {
OUTPUT_WRITE_SUCCESS,
OUTPUT_WRITE_OVERFLOW,
OUTPUT_WRITE_FATAL
};
static enum output_write_status
encoder_write_output(struct vaapi_recorder *r, VABufferID output_buf)
{
VACodedBufferSegment *segment;
VAStatus status;
int count = 0;
status = vaMapBuffer(r->va_dpy, output_buf, (void **) &segment);
if (status != VA_STATUS_SUCCESS)
return OUTPUT_WRITE_FATAL;
do
{
if (segment->status & VA_CODED_BUF_STATUS_SLICE_OVERFLOW_MASK) {
r->encoder.output_size *= 2;
vaUnmapBuffer(r->va_dpy, output_buf);
return OUTPUT_WRITE_OVERFLOW;
}
count += write(r->output_fd, segment->buf, segment->size);
segment = (VACodedBufferSegment *)segment->next;
}
while(segment);
vaUnmapBuffer(r->va_dpy, output_buf);
if (count < 0)
return OUTPUT_WRITE_FATAL;
return OUTPUT_WRITE_SUCCESS;
}
static int
push_output_buffer(struct vaapi_recorder *r, VABufferID buf, VASurfaceID surf);
static void
encoder_encode(struct vaapi_recorder *r, VASurfaceID input)
{
VABufferID output_buf = VA_INVALID_ID;
VABufferID buffers[13];
int count = 0;
int i, slice_type;
enum output_write_status ret;
if ((r->frame_count % r->encoder.intra_period) == 0)
slice_type = SLICE_I;
else
slice_type = SLICE_P;
if (r->frame_count == 0)
buffers[count++] = encoder_update_seq_parameters(r);
VABufferID slice_buf = encoder_update_slice_parameter(r, slice_type);
//buffers[count++] = slice_buf;
for (i = 0; i < count; i++)
if (buffers[i] == VA_INVALID_ID)
goto bail;
VAStatus status;
status = vaBeginPicture(r->va_dpy, r->encoder.ctx, input);
if (status != VA_STATUS_SUCCESS)
goto bail;
//status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[0], 1);
if (status != VA_STATUS_SUCCESS)
goto bail;
//status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[2], 1);
do {
output_buf = encoder_create_output_buffer(r);
if (output_buf == VA_INVALID_ID)
goto bail;
VABufferID pic_buf =
encoder_update_pic_parameters(r, output_buf);
if (count && buffers[count - 1] == VA_INVALID_ID)
goto bail;
if (r->frame_count == 0)
count += encoder_prepare_headers(r, buffers + count);
// todo: this might be required in every frame
//if(r->frame_count == 0)
{
void *data;
uint32_t bit_length;
if(r->frame_count == 0)
{
buffers[count++] = encoder_update_misc_framerate_parameter(r);
buffers[count++] = encoder_update_misc_rate_parameter(r);
buffers[count++] = encoder_update_misc_hrd_parameter(r);
bit_length = build_packed_pic_buffer(r, &data);
count += create_packed_header_buffers(r, buffers + count, VAEncPackedHeaderPicture,
data, bit_length);
free(data);
}
buffers[count++]= pic_buf;
bit_length = build_packed_slice_buffer(r, &data );
count += create_packed_header_buffers(r, buffers + count, VAEncPackedHeaderSlice,
data, bit_length);
free(data);
}
buffers[count++] = slice_buf;
status = vaRenderPicture(r->va_dpy, r->encoder.ctx, buffers, count);
/*for(int i = 0; i < count; i++)
{
status = vaRenderPicture(r->va_dpy, r->encoder.ctx, &buffers[i], 1);
printf("%d %d\n", i, status);
}*/
/*VAStatus er = encoder_render_picture(r, input, buffers, count);
if(er != VA_STATUS_SUCCESS)
{
printf("render error %d\n", er);
goto bail;
}*/
status = vaEndPicture(r->va_dpy, r->encoder.ctx);
if (status != VA_STATUS_SUCCESS)
{
printf("end %d\n", status);
goto bail;
}
//status = vaSyncSurface(r->va_dpy, input);
//ret = encoder_write_output(r, output_buf);
//vaDestroyBuffer(r->va_dpy, output_buf);
push_output_buffer(r, output_buf, input);
output_buf = VA_INVALID_ID;
vaDestroyBuffer(r->va_dpy, buffers[--count]);
if(ret == OUTPUT_WRITE_OVERFLOW)
exit(1);
} while (ret == OUTPUT_WRITE_OVERFLOW);
if (ret == OUTPUT_WRITE_FATAL)
{
r->error = errno;
exit(1);
}
for (i = 0; i < count; i++)
vaDestroyBuffer(r->va_dpy, buffers[i]);
r->frame_count++;
return;
bail:
printf("buffer errors?\n");
for (i = 0; i < count; i++)
vaDestroyBuffer(r->va_dpy, buffers[i]);
if (output_buf != VA_INVALID_ID)
vaDestroyBuffer(r->va_dpy, output_buf);
}
static int
setup_vpp(struct vaapi_recorder *r)
{
VAStatus status;
VASurfaceAttrib attrs[2] = { {VASurfaceAttribMemoryType, VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},},
{VASurfaceAttribPixelFormat,VA_SURFACE_ATTRIB_SETTABLE,{VAGenericValueTypeInteger, 0},} };
attrs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
attrs[1].value.value.i = USE_P010?VA_FOURCC_P010:VA_FOURCC_NV12;
status = vaCreateConfig(r->va_dpy, VAProfileNone,
VAEntrypointVideoProc, NULL, 0,
&r->vpp.cfg);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to create VPP config\n");
return -1;
}
status = vaCreateContext(r->va_dpy, r->vpp.cfg, r->width, r->height,
0, NULL, 0, &r->vpp.ctx);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to create VPP context\n");
goto err_cfg;
}
status = vaCreateBuffer(r->va_dpy, r->vpp.ctx,
VAProcPipelineParameterBufferType,
sizeof(VAProcPipelineParameterBuffer),
1, NULL, &r->vpp.pipeline_buf);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to create VPP pipeline buffer\n");
goto err_ctx;
}
status = vaCreateSurfaces(r->va_dpy, USE_P010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420,
r->width, r->height, &r->vpp.output, 1,
attrs, 2);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to create YUV surface\n");
goto err_buf;
}
return 0;
err_buf:
vaDestroyBuffer(r->va_dpy, r->vpp.pipeline_buf);
err_ctx:
vaDestroyConfig(r->va_dpy, r->vpp.ctx);
err_cfg:
vaDestroyConfig(r->va_dpy, r->vpp.cfg);
return -1;
}
static void
vpp_destroy(struct vaapi_recorder *r)
{
vaDestroySurfaces(r->va_dpy, &r->vpp.output, 1);
vaDestroyBuffer(r->va_dpy, r->vpp.pipeline_buf);
vaDestroyConfig(r->va_dpy, r->vpp.ctx);
vaDestroyConfig(r->va_dpy, r->vpp.cfg);
}
static int
setup_worker_thread(struct vaapi_recorder *r)
{
pthread_mutex_init(&r->mutex, NULL);
pthread_cond_init(&r->input_cond, NULL);
pthread_create(&r->worker_thread, NULL, worker_thread_function, r);
return 1;
}
static void *
output_thread_function(void *data);
static int
setup_output_thread(struct vaapi_recorder *r)
{
pthread_mutex_init(&r->encoder.mutex, NULL);
pthread_cond_init(&r->encoder.output_cond, NULL);
pthread_create(&r->encoder.output_thread, NULL, output_thread_function, r);
return 1;
}
static void
destroy_worker_thread(struct vaapi_recorder *r)
{
pthread_mutex_lock(&r->mutex);
/* Make sure the worker thread finishes */
r->destroying = 1;
pthread_cond_signal(&r->input_cond);
pthread_mutex_unlock(&r->mutex);
pthread_join(r->worker_thread, NULL);
pthread_mutex_destroy(&r->mutex);
pthread_cond_destroy(&r->input_cond);
}
struct vaapi_recorder *
vaapi_recorder_create(int drm_fd, int width, int height, const char *filename)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
if (setup_worker_thread(r) < 0)
goto err_free;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
static VASurfaceID gInputRGBA;
static VAStatus
create_surface_from_fd(struct vaapi_recorder *r, int prime_fd,
int stride, VASurfaceID *surface);
struct vaapi_recorder *
vaapi_recorder_create2(int drm_fd, int width, int height, const char *filename, int dmabuf_fd, int dmabuf_stride)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
r->encoder.output_buf = VA_INVALID_ID;
setup_output_thread(r);
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
#include <libdrm/drm_fourcc.h>
struct vaapi_recorder *
vaapi_recorder_create3(int drm_fd, int width, int height, const char *filename, int dmabuf_fd, int dmabuf_stride, int dmabuf_fd_uv, int dmabuf_stride_uv)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
VADRMPRIMESurfaceDescriptor drmSurface = {0};
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
//create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
VASurfaceAttrib va_attribs[4];
//VASurfaceAttribExternalBuffers va_attrib_extbuf;
/*
*
* {fourcc = 842094158, width = 1920, height = 1080, num_objects = 1, objects = {{fd = 15, size = 3133440, drm_format_modifier = 72057594037927938}, {fd = 0, size = 0,
drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}}, num_layers = 2, layers = {{drm_format = 538982482,
num_planes = 1, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 943215175, num_planes = 1, object_index = {0, 0, 0, 0}, offset = {
2088960, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}, {drm_format = 0,
num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}}}
* {fourcc = 842094158, width = 1920, height = 1080, num_objects = 1, objects = {{fd = 13, size = 3133440, drm_format_modifier = 72057594037927938}, {fd = 0, size = 0,
drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}, {fd = 0, size = 0, drm_format_modifier = 0}}, num_layers = 2, layers = {{drm_format = 538982482,
num_planes = 1, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 943215175, num_planes = 1, object_index = {0, 0, 0, 0}, offset = {
2088960, 0, 0, 0}, pitch = {1920, 0, 0, 0}}, {drm_format = 0, num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}, {drm_format = 0,
num_planes = 0, object_index = {0, 0, 0, 0}, offset = {0, 0, 0, 0}, pitch = {0, 0, 0, 0}}}}
*/
drmSurface.fourcc = VA_FOURCC_NV12;
drmSurface.width = width;
drmSurface.height = height;
drmSurface.num_objects = 1;
drmSurface.objects[0].fd = dmabuf_fd;
drmSurface.objects[0].drm_format_modifier = 72057594037927938;//0x20000002096bb03;
drmSurface.objects[0].size = 3133440;
//drmSurface.objects[1].fd = dmabuf_fd_uv;
//drmSurface.objects[1].drm_format_modifier = 0;//0x20000002096bb03;
//drmSurface.objects[1].size = 1920 * height/2;
drmSurface.num_layers = 2;
drmSurface.layers[0].drm_format = DRM_FORMAT_R8;
drmSurface.layers[0].num_planes = 1;
drmSurface.layers[0].object_index[0] = 0;
drmSurface.layers[0].offset[0] = 0;
drmSurface.layers[0].pitch[0] = 1920;
drmSurface.layers[1].drm_format = DRM_FORMAT_GR88;
drmSurface.layers[1].num_planes = 1;
drmSurface.layers[1].object_index[0] = 0;
drmSurface.layers[1].offset[0] = 2088960;
drmSurface.layers[1].pitch[0] = 1920;
/*unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv};
va_attrib_extbuf.pixel_format = VA_FOURCC_NV12;
va_attrib_extbuf.width = r->width;
va_attrib_extbuf.height = r->height;
va_attrib_extbuf.data_size = r->height * stride;
va_attrib_extbuf.num_planes = 1;
va_attrib_extbuf.pitches[0] = stride;
va_attrib_extbuf.offsets[0] = 0;
va_attrib_extbuf.buffers = &buffer_fd;
va_attrib_extbuf.num_buffers = 1;
va_attrib_extbuf.flags = 0;
va_attrib_extbuf.private_data = NULL;*/
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
va_attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypePointer;
va_attribs[1].value.value.p = &drmSurface;
va_attribs[2].type = VASurfaceAttribUsageHint;
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[2].value.type = VAGenericValueTypeInteger;
va_attribs[2].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
va_attribs[3].type = VASurfaceAttribPixelFormat;
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[3].value.type = VAGenericValueTypeInteger;
va_attribs[3].value.value.i = VA_FOURCC_NV12;
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420,
r->width, r->height, &r->vpp.output, 1,
va_attribs, 2);
printf("%d\n", status);
if(status != VA_STATUS_SUCCESS)
exit(1);
r->encoder.output_buf = VA_INVALID_ID;
setup_output_thread(r);
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
struct vaapi_recorder *
vaapi_recorder_create4(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
VADRMPRIMESurfaceDescriptor drmSurface = {0};
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
//create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
VASurfaceAttrib va_attribs[5];
//unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv};
va_attrib_extbuf.pixel_format = VA_FOURCC_P010;
va_attrib_extbuf.width = r->width;
va_attrib_extbuf.height = r->height;
//va_attrib_extbuf.data_size = r->height * stride;
//va_attrib_extbuf.num_planes = 1;
//va_attrib_extbuf.pitches[0] = stride;
//va_attrib_extbuf.offsets[0] = 0;
//va_attrib_extbuf.buffers = &buffer_fd;
//va_attrib_extbuf.num_buffers = 1;
va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING;
va_attrib_extbuf.private_data = NULL;
VADRMFormatModifierList modList;
modList.modifiers = modifiers;
modList.num_modifiers = modifierscount;
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
va_attribs[1].type = VASurfaceAttribUsageHint;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypeInteger;
va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
va_attribs[2].type = VASurfaceAttribPixelFormat;
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[2].value.type = VAGenericValueTypeInteger;
va_attribs[2].value.value.i = VA_FOURCC_P010;
va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[3].value.type = VAGenericValueTypePointer;
va_attribs[3].value.value.p = &va_attrib_extbuf;
va_attribs[4].type = VASurfaceAttribDRMFormatModifiers;
va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[4].value.type = VAGenericValueTypePointer;
va_attribs[4].value.value.p = &modList;
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10,
r->width, r->height, &r->vpp.output, 1,
&va_attribs[0], 5);
printf("%d\n", status);
status = vaExportSurfaceHandle(r->va_dpy, r->vpp.output, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface );
printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier);
*dmabuf_fd = drmSurface.objects[0].fd;
*mod = drmSurface.objects[0].drm_format_modifier;
*size = drmSurface.objects[0].size;
*offset = drmSurface.layers[1].offset[0];
*pitch1 = drmSurface.layers[0].pitch[0];
*pitch2 = drmSurface.layers[1].pitch[0];
if(status != VA_STATUS_SUCCESS)
exit(1);
r->encoder.output_buf = VA_INVALID_ID;
setup_output_thread(r);
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
struct vaapi_recorder *
vaapi_recorder_create5(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
struct vaapi_recorder *r;
VAStatus status;
int major, minor;
int flags;
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
VADRMPRIMESurfaceDescriptor drmSurface = {0};
r = (vaapi_recorder*)calloc(sizeof *r,1);
if (r == NULL)
return NULL;
r->width = width;
r->height = height;
r->drm_fd = drm_fd;
flags = O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC;
r->output_fd = open(filename, flags, 0644);
if (r->output_fd < 0)
goto err_thread;
r->va_dpy = vaGetDisplayDRM(drm_fd);
if (!r->va_dpy) {
printf("failed to create VA display\n");
goto err_fd;
}
status = vaInitialize(r->va_dpy, &major, &minor);
if (status != VA_STATUS_SUCCESS) {
printf("vaapi: failed to initialize display\n");
goto err_fd;
}
if (setup_vpp(r) < 0) {
printf("vaapi: failed to initialize VPP pipeline\n");
goto err_va_dpy;
}
if (setup_encoder(r) < 0) {
goto err_vpp;
}
//create_surface_from_fd(r, dmabuf_fd, dmabuf_stride, &gInputRGBA);
VASurfaceAttrib va_attribs[5];
//unsigned long buffer_fd[2] = {dmabuf_fd, dmabuf_fd_uv};
va_attrib_extbuf.pixel_format = VA_FOURCC_P010;
va_attrib_extbuf.width = r->width;
va_attrib_extbuf.height = r->height;
//va_attrib_extbuf.data_size = r->height * stride;
//va_attrib_extbuf.num_planes = 1;
//va_attrib_extbuf.pitches[0] = stride;
//va_attrib_extbuf.offsets[0] = 0;
//va_attrib_extbuf.buffers = &buffer_fd;
//va_attrib_extbuf.num_buffers = 1;
va_attrib_extbuf.flags = VA_SURFACE_EXTBUF_DESC_ENABLE_TILING;
va_attrib_extbuf.private_data = NULL;
VADRMFormatModifierList modList;
modList.modifiers = modifiers;
modList.num_modifiers = modifierscount;
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
va_attribs[1].type = VASurfaceAttribUsageHint;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypeInteger;
va_attribs[1].value.value.i = VA_SURFACE_ATTRIB_USAGE_HINT_EXPORT | VA_SURFACE_ATTRIB_USAGE_HINT_ENCODER;
va_attribs[2].type = VASurfaceAttribPixelFormat;
va_attribs[2].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[2].value.type = VAGenericValueTypeInteger;
va_attribs[2].value.value.i = VA_FOURCC_P010;
va_attribs[3].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[3].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[3].value.type = VAGenericValueTypePointer;
va_attribs[3].value.value.p = &va_attrib_extbuf;
va_attribs[4].type = VASurfaceAttribDRMFormatModifiers;
va_attribs[4].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[4].value.type = VAGenericValueTypePointer;
va_attribs[4].value.value.p = &modList;
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_YUV420_10,
r->width, r->height, r->inputFrames, CHAIN_SIZE,
&va_attribs[0], 5);
printf("%d\n", status);
for(int i = 0; i < CHAIN_SIZE; i++)
{
status = vaExportSurfaceHandle(r->va_dpy, r->inputFrames[i], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS, &drmSurface );
printf("%d %d %llx\n", status, drmSurface.objects[0].fd, drmSurface.objects[0].drm_format_modifier);
dmabuf_fd[i] = drmSurface.objects[0].fd;
if(status != VA_STATUS_SUCCESS)
exit(1);
}
*mod = drmSurface.objects[0].drm_format_modifier;
*size = drmSurface.objects[0].size;
*offset = drmSurface.layers[1].offset[0];
*pitch1 = drmSurface.layers[0].pitch[0];
*pitch2 = drmSurface.layers[1].pitch[0];
r->encoder.output_buf = VA_INVALID_ID;
setup_output_thread(r);
return r;
err_vpp:
vpp_destroy(r);
err_va_dpy:
vaTerminate(r->va_dpy);
err_fd:
close(r->output_fd);
err_thread:
destroy_worker_thread(r);
err_free:
free(r);
return NULL;
}
void
vaapi_recorder_destroy(struct vaapi_recorder *r)
{
destroy_worker_thread(r);
encoder_destroy(r);
vpp_destroy(r);
vaTerminate(r->va_dpy);
close(r->output_fd);
close(r->drm_fd);
free(r);
}
static VAStatus
create_surface_from_fd(struct vaapi_recorder *r, int prime_fd,
int stride, VASurfaceID *surface)
{
VASurfaceAttrib va_attribs[2];
VASurfaceAttribExternalBuffers va_attrib_extbuf;
VAStatus status;
unsigned long buffer_fd = prime_fd;
va_attrib_extbuf.pixel_format = VA_FOURCC_BGRX;
va_attrib_extbuf.width = r->width;
va_attrib_extbuf.height = r->height;
va_attrib_extbuf.data_size = r->height * stride;
va_attrib_extbuf.num_planes = 1;
va_attrib_extbuf.pitches[0] = stride;
va_attrib_extbuf.offsets[0] = 0;
va_attrib_extbuf.buffers = &buffer_fd;
va_attrib_extbuf.num_buffers = 1;
va_attrib_extbuf.flags = 0;
va_attrib_extbuf.private_data = NULL;
va_attribs[0].type = VASurfaceAttribMemoryType;
va_attribs[0].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[0].value.type = VAGenericValueTypeInteger;
va_attribs[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
va_attribs[1].type = VASurfaceAttribExternalBufferDescriptor;
va_attribs[1].flags = VA_SURFACE_ATTRIB_SETTABLE;
va_attribs[1].value.type = VAGenericValueTypePointer;
va_attribs[1].value.value.p = &va_attrib_extbuf;
status = vaCreateSurfaces(r->va_dpy, VA_RT_FORMAT_RGB32,
r->width, r->height, surface, 1,
va_attribs, 2);
return status;
}
static VAStatus
convert_rgb_to_yuv(struct vaapi_recorder *r, VASurfaceID rgb_surface)
{
VAProcPipelineParameterBuffer *pipeline_param;
VAStatus status;
status = vaMapBuffer(r->va_dpy, r->vpp.pipeline_buf,
(void **) &pipeline_param);
if (status != VA_STATUS_SUCCESS)
return status;
memset(pipeline_param, 0, sizeof *pipeline_param);
pipeline_param->surface = rgb_surface;
pipeline_param->surface_color_standard = VAProcColorStandardNone;
pipeline_param->output_background_color = 0xff000000;
pipeline_param->output_color_standard = VAProcColorStandardNone;
status = vaUnmapBuffer(r->va_dpy, r->vpp.pipeline_buf);
if (status != VA_STATUS_SUCCESS)
return status;
status = vaBeginPicture(r->va_dpy, r->vpp.ctx, r->vpp.output);
if (status != VA_STATUS_SUCCESS)
return status;
status = vaRenderPicture(r->va_dpy, r->vpp.ctx,
&r->vpp.pipeline_buf, 1);
if (status != VA_STATUS_SUCCESS)
return status;
status = vaEndPicture(r->va_dpy, r->vpp.ctx);
if (status != VA_STATUS_SUCCESS)
return status;
return status;
}
static void
recorder_frame(struct vaapi_recorder *r)
{
VASurfaceID rgb_surface;
VAStatus status;
status = create_surface_from_fd(r, r->input.prime_fd,
r->input.stride, &rgb_surface);
if (status != VA_STATUS_SUCCESS) {
printf("[libva recorder] "
"failed to create surface from bo\n");
return;
}
close(r->input.prime_fd);
status = convert_rgb_to_yuv(r, rgb_surface);
if (status != VA_STATUS_SUCCESS) {
printf("[libva recorder] "
"color space conversion failed\n");
return;
}
encoder_encode(r, r->vpp.output);
vaDestroySurfaces(r->va_dpy, &rgb_surface, 1);
}
void
recorder_frame2(struct vaapi_recorder *r)
{
VAStatus status;
status = convert_rgb_to_yuv(r, gInputRGBA);
if (status != VA_STATUS_SUCCESS) {
printf("[libva recorder] "
"color space conversion failed\n");
return;
}
encoder_encode(r, r->vpp.output);
}
void
recorder_frame3(struct vaapi_recorder *r)
{
encoder_encode(r, r->vpp.output);
}
void
recorder_frame4(struct vaapi_recorder *r, int idx)
{
encoder_encode(r, r->inputFrames[idx]);
}
static void *
worker_thread_function(void *data)
{
struct vaapi_recorder *r = (vaapi_recorder*)data;
pthread_mutex_lock(&r->mutex);
while (!r->destroying) {
if (!r->input.valid)
pthread_cond_wait(&r->input_cond, &r->mutex);
/* If the thread is awaken by destroy_worker_thread(),
* there might not be valid input */
if (!r->input.valid)
continue;
recorder_frame(r);
r->input.valid = 0;
}
pthread_mutex_unlock(&r->mutex);
return NULL;
}
static void *
output_thread_function(void *data)
{
struct vaapi_recorder *r = (vaapi_recorder*)data;
pthread_mutex_lock(&r->encoder.mutex);
while (!r->destroying) {
if (r->encoder.output_buf == VA_INVALID_ID)
pthread_cond_wait(&r->encoder.output_cond, &r->encoder.mutex);
/* If the thread is awaken by destroy_worker_thread(),
* there might not be valid input */
if (r->encoder.output_buf == VA_INVALID_ID)
continue;
//output_frame(r);
//vaSyncSurface(r->va_dpy, r->encoder.output_sync_surf);
vaSyncBuffer(r->va_dpy, r->encoder.output_buf, UINT64_MAX);
encoder_write_output(r, r->encoder.output_buf);
vaDestroyBuffer(r->va_dpy, r->encoder.output_buf);
r->encoder.output_buf = VA_INVALID_ID;
}
pthread_mutex_unlock(&r->encoder.mutex);
return NULL;
}
static int
push_output_buffer(struct vaapi_recorder *r, VABufferID buf, VASurfaceID surf)
{
int ret = 0;
pthread_mutex_lock(&r->encoder.mutex);
if (r->error) {
errno = r->error;
ret = -1;
goto unlock;
}
/* The mutex is never released while encoding, so this point should
* never be reached if input.valid is true. */
assert(r->encoder.output_buf == VA_INVALID_ID);
r->encoder.output_buf = buf;
r->encoder.output_sync_surf = surf;
pthread_cond_signal(&r->encoder.output_cond);
unlock:
pthread_mutex_unlock(&r->encoder.mutex);
return ret;
}
int
vaapi_recorder_frame(struct vaapi_recorder *r, int prime_fd, int stride)
{
int ret = 0;
pthread_mutex_lock(&r->mutex);
if (r->error) {
errno = r->error;
ret = -1;
goto unlock;
}
/* The mutex is never released while encoding, so this point should
* never be reached if input.valid is true. */
assert(!r->input.valid);
r->input.prime_fd = prime_fd;
r->input.stride = stride;
r->input.valid = 1;
pthread_cond_signal(&r->input_cond);
unlock:
pthread_mutex_unlock(&r->mutex);
return ret;
}