Move encoder output to separate class

This commit is contained in:
mittorn 2024-11-13 21:30:06 +03:00
parent 1622b8bf2d
commit 26cd5b28e8
4 changed files with 183 additions and 133 deletions

View file

@ -20,32 +20,14 @@ struct VaapiEncoder
VASurfaceID inputFrames[CHAIN_SIZE];
int width, height;
int frame_count;
int destroying;
VASurfaceID reference_picture[3];
pthread_t output_thread = 0;
pthread_mutex_t mutex;
pthread_cond_t output_cond;
VABufferID output_buf;
int output_fd = -1;
VAConfigAttrib attrib[VAConfigAttribTypeMax];
VAConfigID cfg;
inline void Destroy()
{
if(output_fd >= 0)
close(output_fd);
output_fd = -1;
if(output_thread)
{
pthread_mutex_lock(&mutex);
destroying = 1;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
pthread_join(output_thread, NULL);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&output_cond);
}
if(dpy)
{
if(ctx != VA_INVALID_ID)
@ -57,15 +39,12 @@ struct VaapiEncoder
ctx = VA_INVALID_ID;
vaDestroySurfaces(dpy, reference_picture, 3);
vaDestroySurfaces(dpy, inputFrames, CHAIN_SIZE);
if(output_buf != VA_INVALID_ID)
vaDestroyBuffer(dpy, output_buf);
output_buf = VA_INVALID_ID;
}
vaTerminate(dpy);
dpy = nullptr;
}
}
bool SetupVA(VAProfile profile, uint32_t format, uint32_t fourcc, int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
bool SetupVA(VAProfile profile, uint32_t format, uint32_t fourcc, int drm_fd, int width, int height, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount)
{
VASurfaceAttrib va_attribs[5];
VASurfaceAttribExternalBuffers va_attrib_extbuf = {0};
@ -76,10 +55,8 @@ struct VaapiEncoder
this->width = width;
this->height = height;
frame_count = 0;
destroying = 0;
dpy = vaGetDisplayDRM(drm_fd);
output_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
output_buf = VA_INVALID_ID;
status = vaInitialize(dpy, &major, &minor);
if(status != VA_STATUS_SUCCESS)
{
@ -134,9 +111,7 @@ struct VaapiEncoder
*offset = drmSurface.layers[1].offset[0];
*pitch1 = drmSurface.layers[0].pitch[0];
*pitch2 = drmSurface.layers[1].pitch[0];
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&output_cond, NULL);
pthread_create(&output_thread, NULL, &OutputThread, this);
for (int i = 0; i < VAConfigAttribTypeMax; i++)
attrib[i].type = (VAConfigAttribType)i;
vaGetConfigAttributes(dpy, profile, VAEntrypointEncSlice, attrib, VAConfigAttribTypeMax);
@ -144,79 +119,6 @@ struct VaapiEncoder
return true;
}
void PushOutput(VABufferID buf)
{
pthread_mutex_lock(&mutex);
/* The mutex is never released while encoding, so this point should
* never be reached if input.valid is true. */
assert(output_buf == VA_INVALID_ID);
output_buf = buf;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
}
static void *OutputThread(void *data)
{
VaapiEncoder *r = (VaapiEncoder*)data;
pthread_mutex_lock(&r->mutex);
while (!r->destroying) {
if (r->output_buf == VA_INVALID_ID)
pthread_cond_wait(&r->output_cond, &r->mutex);
/* If the thread is awaken by destroy_worker_thread(),
* there might not be valid input */
if (r->output_buf == VA_INVALID_ID)
continue;
vaSyncBuffer(r->dpy, r->output_buf, UINT64_MAX);
r->WriteOutput(r->output_buf);
vaDestroyBuffer(r->dpy, r->output_buf);
r->output_buf = VA_INVALID_ID;
}
pthread_mutex_unlock(&r->mutex);
return NULL;
}
bool WriteOutput(VABufferID buf)
{
VACodedBufferSegment *segment;
VAStatus status;
int count = 0;
status = vaMapBuffer(dpy, buf, (void **) &segment);
if (status != VA_STATUS_SUCCESS)
return false;
do
{
#if 0
if (segment->status & VA_CODED_BUF_STATUS_SLICE_OVERFLOW_MASK) {
vaUnmapBuffer(dpy, buf);
printf("overflow!\n");
// todo: warning or mark for restart in lq?
// some mesa versions seems to cause false-overflow here!
return false;
}
#endif
assert(segment->size);
count += write(output_fd, segment->buf, segment->size);
segment = (VACodedBufferSegment *)segment->next;
}
while(segment);
vaUnmapBuffer(dpy, buf);
if (count < 0)
return false;
return true;
}
template <int maxSize>
bool CreatePackedBuffer(VABufferID &par, VABufferID &dat, VAEncPackedHeaderType type, const BaseBitstream<maxSize> &buf )
{
@ -326,4 +228,129 @@ struct VaapiEncoder
}
};
template <typename OutputHandler>
struct OutputWriter
{
OutputHandler &handler;
VaapiEncoder &enc;
typedef typename OutputHandler::ID ID;
OutputWriter(OutputHandler &handler_, VaapiEncoder &enc_) : handler(handler_), enc(enc_)
{}
bool WriteOutput(VABufferID buf, ID id)
{
VACodedBufferSegment *segment;
VAStatus status;
int count = 0;
handler.BeginFrame(id);
vaSyncBuffer(enc.dpy, buf, UINT64_MAX);
status = vaMapBuffer(enc.dpy, buf, (void **) &segment);
if (status != VA_STATUS_SUCCESS)
return false;
do
{
#if 0
if (segment->status & VA_CODED_BUF_STATUS_SLICE_OVERFLOW_MASK) {
vaUnmapBuffer(dpy, buf);
printf("overflow!\n");
// todo: warning or mark for restart in lq?
// some mesa versions seems to cause false-overflow here!
return false;
}
#endif
assert(segment->size);
count += handler.WriteData(segment->buf, segment->size);
segment = (VACodedBufferSegment *)segment->next;
}
while(segment);
handler.EndFrame();
vaUnmapBuffer(enc.dpy, buf);
if (count < 0)
return false;
return true;
}
};
template <typename OutputHandler>
struct ThreadedOutputWriter : OutputWriter<OutputHandler>
{
typedef typename OutputHandler::ID ID;
pthread_t output_thread = 0;
pthread_mutex_t mutex;
pthread_cond_t output_cond;
VABufferID output_buf;
typename OutputHandler::ID output_id;
int destroying;
ThreadedOutputWriter(OutputHandler &handler_, VaapiEncoder &enc_) : OutputWriter<OutputHandler>(handler_, enc_)
{}
void Destroy()
{
if(output_thread)
{
pthread_mutex_lock(&mutex);
destroying = 1;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
pthread_join(output_thread, NULL);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&output_cond);
}
if(output_buf != VA_INVALID_ID)
vaDestroyBuffer(this->enc.dpy, output_buf);
output_buf = VA_INVALID_ID;
}
void Start()
{
destroying = 0;
output_buf = VA_INVALID_ID;
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&output_cond, NULL);
pthread_create(&output_thread, NULL, &OutputThread, this);
}
void WriteOutput(VABufferID buf, ID id)
{
pthread_mutex_lock(&mutex);
/* The mutex is never released while encoding, so this point should
* never be reached if input.valid is true. */
assert(output_buf == VA_INVALID_ID);
output_buf = buf;
output_id = id;
pthread_cond_signal(&output_cond);
pthread_mutex_unlock(&mutex);
}
static void *OutputThread(void *data)
{
ThreadedOutputWriter *r = (ThreadedOutputWriter*)data;
pthread_mutex_lock(&r->mutex);
while (!r->destroying) {
if (r->output_buf == VA_INVALID_ID)
pthread_cond_wait(&r->output_cond, &r->mutex);
/* If the thread is awaken by destroy_worker_thread(),
* there might not be valid input */
if (r->output_buf == VA_INVALID_ID)
continue;
((OutputWriter<OutputHandler>*)r)->WriteOutput(r->output_buf, r->output_id);
vaDestroyBuffer(r->enc.dpy, r->output_buf);
r->output_buf = VA_INVALID_ID;
}
pthread_mutex_unlock(&r->mutex);
return NULL;
}
};
#endif // VAAPI_ENCODER_H

View file

@ -362,12 +362,12 @@ struct VaapiEncoderH264: VaapiEncoder
}
}
bool Setup(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount, bool p010)
bool Setup(int drm_fd, int width, int height, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount, bool p010)
{
VAProfile profile = VAProfileH264Main;
uint32_t format = VA_RT_FORMAT_YUV420;
uint32_t fourcc = VA_FOURCC_NV12;
if(!SetupVA(profile, format, fourcc, drm_fd, width, height, filename, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount))
if(!SetupVA(profile, format, fourcc, drm_fd, width, height, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount))
{
VaapiEncoder::Destroy();
return false;
@ -382,7 +382,8 @@ struct VaapiEncoderH264: VaapiEncoder
return true;
}
inline void EncodeIDR(int idx)
template <typename Writer>
inline void EncodeIDR(int idx, Writer &w, typename Writer::ID id)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
@ -426,16 +427,14 @@ struct VaapiEncoderH264: VaapiEncoder
status = vaEndPicture(dpy, ctx);
if(status != VA_STATUS_SUCCESS)
abort();
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
w.WriteOutput(output, id);
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;
}
inline void EncodeP(int idx)
template <typename Writer>
inline void EncodeP(int idx, Writer &w, typename Writer::ID id)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
// todo: chain slice/output buffers, patch POC in slice buffers???
@ -462,10 +461,7 @@ struct VaapiEncoderH264: VaapiEncoder
vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) );
status = vaEndPicture(dpy, ctx);
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
w.WriteOutput(output, id);
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;

View file

@ -643,7 +643,6 @@ struct PackedSliceHEVC : BaseBitstreamHEVC
}
};
struct VaapiEncoderHEVC: VaapiEncoder
{
VAEncSequenceParameterBufferHEVC seq;
@ -682,7 +681,7 @@ struct VaapiEncoderHEVC: VaapiEncoder
seq.seq_fields.bits.bit_depth_chroma_minus8 = 2;
seq.seq_fields.bits.bit_depth_luma_minus8 = 2;
}
seq.bits_per_second = 150*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50;
seq.bits_per_second = 15*1024*1024;//(long long)r->width * r->height * 12 * 90 / 50;
// vps
// (none?)
@ -740,12 +739,12 @@ struct VaapiEncoderHEVC: VaapiEncoder
slice.ref_pic_list1[i].pic_order_cnt = -1;
}
}
bool Setup(int drm_fd, int width, int height, const char *filename, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount, bool p010)
bool Setup(int drm_fd, int width, int height, int *dmabuf_fd, uint64_t *mod, uint32_t *size, uint32_t *offset, uint32_t *pitch1, uint32_t *pitch2, uint64_t *modifiers, int modifierscount, bool p010)
{
VAProfile profile = p010?VAProfileHEVCMain10:VAProfileHEVCMain;
uint32_t format = p010?VA_RT_FORMAT_YUV420_10:VA_RT_FORMAT_YUV420;
uint32_t fourcc = p010?VA_FOURCC_P010:VA_FOURCC_NV12;
if(!SetupVA(profile, format, fourcc, drm_fd, width, height, filename, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount))
if(!SetupVA(profile, format, fourcc, drm_fd, width, height, dmabuf_fd, mod, size, offset, pitch1, pitch2, modifiers, modifierscount))
{
VaapiEncoder::Destroy();
return false;
@ -759,7 +758,8 @@ struct VaapiEncoderHEVC: VaapiEncoder
return true;
}
inline void EncodeIDR(int idx)
template <typename Writer>
inline void EncodeIDR(int idx, Writer &w, typename Writer::ID id)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
@ -784,7 +784,7 @@ struct VaapiEncoderHEVC: VaapiEncoder
VABufferID fpsb = CreateMiscParameterBuffer(VAEncMiscParameterTypeFrameRate,VAEncMiscParameterFrameRate{.framerate = 90} );
VABufferID hrdb = CreateMiscParameterBuffer(VAEncMiscParameterTypeHRD, VAEncMiscParameterHRD{});
VABufferID rcb = CreateMiscParameterBuffer(VAEncMiscParameterTypeRateControl, VAEncMiscParameterRateControl{
.bits_per_second = 150*1024*1024,
.bits_per_second = 15*1024*1024,
.target_percentage = 66,
.window_size = 1000,
.initial_qp = 1,
@ -805,15 +805,14 @@ struct VaapiEncoderHEVC: VaapiEncoder
status = vaEndPicture(dpy, ctx);
if(status != VA_STATUS_SUCCESS)
abort();
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
w.WriteOutput(output, id);
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;
}
inline void EncodeP(int idx)
template <typename Writer>
inline void EncodeP(int idx, Writer &w, typename Writer::ID id)
{
VAStatus status = vaBeginPicture(dpy, ctx, inputFrames[idx]);
// todo: chain slice/output buffers, patch POC in slice buffers???
@ -840,10 +839,7 @@ struct VaapiEncoderHEVC: VaapiEncoder
vaRenderPicture(dpy, ctx, buffers, sizeof(buffers) / sizeof(buffers[0]) );
status = vaEndPicture(dpy, ctx);
PushOutput(output);
/*status = vaSyncSurface(dpy, inputFrames[idx]);
status = vaSyncBuffer(dpy, output, 1000000000);
WriteOutput(output);*/
w.WriteOutput(output, id);
for(int i = 0; i < sizeof(buffers) / sizeof(buffers[0]); i++)
vaDestroyBuffer(dpy, buffers[i]);
frame_count++;

View file

@ -102,6 +102,31 @@ struct TextureCompressionPipeline: BaseVulkanPipeline
}
};
struct FDOutput
{
typedef int ID;
int output_fd = -1;
void Open(const char *filename)
{
output_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
}
void Destroy()
{
if(output_fd >= 0)
close(output_fd);
output_fd = -1;
}
void BeginFrame(int id)
{
}
int WriteData(const void *data, size_t size)
{
return write(output_fd, data, size);
}
void EndFrame()
{
}
};
const int WIDTH = 1920; // Size of rendered mandelbrot set.
const int HEIGHT = 1080; // Size of renderered mandelbrot set.
@ -414,7 +439,9 @@ struct ComputeApplication {
CallWith((VkBufferCopy{0,0,sizeof(data)}),vkCmdCopyBuffer(b, staging.buffer, compressionConstantBuffer.buffer, 1, &ref));
dev.FlushCommandBuffer(b, dev.defaultQueue);
enc.Setup(drm_fd, WIDTH/2, HEIGHT/2, filename, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
enc.Setup(drm_fd, WIDTH/2, HEIGHT/2, fd, &mod, &size, &offset, &pitch1, &pitch2, modifiers, count, p010);
CallWith($Sc(
VkSemaphoreCreateInfo{VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO},
$M(VkSemaphoreTypeCreateInfoKHR{VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR},
@ -447,6 +474,10 @@ struct ComputeApplication {
return NULL;
};
pthread_create(&compress_thread, NULL, compress_thunk, this);
FDOutput out;
out.Open(filename);
ThreadedOutputWriter writer(out, enc);
writer.Start();
while(frameNum++ < 1000)
{
@ -459,9 +490,9 @@ struct ComputeApplication {
//recorder_frame4(r, chidx);
if(frameNum == 1)
enc.EncodeIDR(chidx);
enc.EncodeIDR(chidx, writer, 0);
else
enc.EncodeP(chidx);
enc.EncodeP(chidx, writer, 0);
((UBO*)chain[chidx].uboBuf.mapped)->frameNum = frameNum;