summaryrefslogtreecommitdiff
path: root/encode.c
diff options
context:
space:
mode:
authorMikhail Burakov <mburakov@mailbox.org>2023-05-29 12:14:39 +0200
committerMikhail Burakov <mburakov@mailbox.org>2023-05-29 12:16:01 +0200
commitd869207ed9fc54268d84b0c07c22ec14910c9333 (patch)
tree87dabc4b439e93cced4f113e7e986103a30ab12a /encode.c
parent6ac7270839edcb8d839f2e40debbaeadf69063c5 (diff)
Entirely replace older encode implementation with a new one
Diffstat (limited to 'encode.c')
-rw-r--r--encode.c943
1 files changed, 727 insertions, 216 deletions
diff --git a/encode.c b/encode.c
index cbf9294..309e7c4 100644
--- a/encode.c
+++ b/encode.c
@@ -15,247 +15,640 @@
* along with streamer. If not, see <https://www.gnu.org/licenses/>.
*/
-#include "encode.h"
-
#include <assert.h>
-#include <drm_fourcc.h>
#include <errno.h>
-#include <libavcodec/avcodec.h>
-#include <libavutil/hwcontext.h>
-#include <libavutil/hwcontext_vaapi.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/uio.h>
#include <unistd.h>
#include <va/va.h>
+#include <va/va_drm.h>
#include <va/va_drmcommon.h>
+#include "bitstream.h"
+#include "encode.h"
#include "gpu.h"
+#include "hevc.h"
#include "toolbox/utils.h"
struct EncodeContext {
struct GpuContext* gpu_context;
- AVBufferRef* hwdevice_context;
- AVCodecContext* codec_context;
-
- AVFrame* hw_frame;
+ uint32_t width;
+ uint32_t height;
+ enum YuvColorspace colorspace;
+ enum YuvRange range;
+
+ int render_node;
+ VADisplay va_display;
+ VAConfigID va_config_id;
+
+ struct {
+ bool packed_header_sequence;
+ bool packed_header_slice;
+ } codec_quirks;
+
+ VAContextID va_context_id;
+ VASurfaceID input_surface_id;
struct GpuFrame* gpu_frame;
+
+ VASurfaceID recon_surface_ids[2];
+ VABufferID output_buffer_id;
+
+ VAEncSequenceParameterBufferHEVC seq;
+ VAEncPictureParameterBufferHEVC pic;
+ VAEncSliceParameterBufferHEVC slice;
+ size_t frame_counter;
};
-static bool SetHwFramesContext(struct EncodeContext* encode_context, int width,
- int height) {
- encode_context->codec_context->hw_frames_ctx =
- av_hwframe_ctx_alloc(encode_context->hwdevice_context);
- if (!encode_context->codec_context->hw_frames_ctx) {
- LOG("Failed to allocate hwframes context");
- return false;
+static const char* VaErrorString(VAStatus error) {
+ static const char* va_error_strings[] = {
+ "VA_STATUS_SUCCESS",
+ "VA_STATUS_ERROR_OPERATION_FAILED",
+ "VA_STATUS_ERROR_ALLOCATION_FAILED",
+ "VA_STATUS_ERROR_INVALID_DISPLAY",
+ "VA_STATUS_ERROR_INVALID_CONFIG",
+ "VA_STATUS_ERROR_INVALID_CONTEXT",
+ "VA_STATUS_ERROR_INVALID_SURFACE",
+ "VA_STATUS_ERROR_INVALID_BUFFER",
+ "VA_STATUS_ERROR_INVALID_IMAGE",
+ "VA_STATUS_ERROR_INVALID_SUBPICTURE",
+ "VA_STATUS_ERROR_ATTR_NOT_SUPPORTED",
+ "VA_STATUS_ERROR_MAX_NUM_EXCEEDED",
+ "VA_STATUS_ERROR_UNSUPPORTED_PROFILE",
+ "VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT",
+ "VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT",
+ "VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE",
+ "VA_STATUS_ERROR_SURFACE_BUSY",
+ "VA_STATUS_ERROR_FLAG_NOT_SUPPORTED",
+ "VA_STATUS_ERROR_INVALID_PARAMETER",
+ "VA_STATUS_ERROR_RESOLUTION_NOT_SUPPORTED",
+ "VA_STATUS_ERROR_UNIMPLEMENTED",
+ "VA_STATUS_ERROR_SURFACE_IN_DISPLAYING",
+ "VA_STATUS_ERROR_INVALID_IMAGE_FORMAT",
+ "VA_STATUS_ERROR_DECODING_ERROR",
+ "VA_STATUS_ERROR_ENCODING_ERROR",
+ "VA_STATUS_ERROR_INVALID_VALUE",
+ "???",
+ "???",
+ "???",
+ "???",
+ "???",
+ "???",
+ "VA_STATUS_ERROR_UNSUPPORTED_FILTER",
+ "VA_STATUS_ERROR_INVALID_FILTER_CHAIN",
+ "VA_STATUS_ERROR_HW_BUSY",
+ "???",
+ "VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE",
+ "VA_STATUS_ERROR_NOT_ENOUGH_BUFFER",
+ "VA_STATUS_ERROR_TIMEDOUT",
+ };
+ return VA_STATUS_SUCCESS <= error && error <= VA_STATUS_ERROR_TIMEDOUT
+ ? va_error_strings[error - VA_STATUS_SUCCESS]
+ : "???";
+}
+
+static void OnVaLogMessage(void* context, const char* message) {
+ (void)context;
+ size_t len = strlen(message);
+ while (message[len - 1] == '\n') len--;
+ LOG("%.*s", (int)len, message);
+}
+
+static bool InitializeCodecQuirks(struct EncodeContext* encode_context) {
+ bool result = false;
+ VAProfile dummy_profile;
+ VAEntrypoint dummy_entrypoint;
+ int num_attribs = vaMaxNumConfigAttributes(encode_context->va_display);
+ VAConfigAttrib* attrib_list =
+ malloc((size_t)num_attribs * sizeof(VAConfigAttrib));
+ VAStatus status = vaQueryConfigAttributes(
+ encode_context->va_display, encode_context->va_config_id, &dummy_profile,
+ &dummy_entrypoint, attrib_list, &num_attribs);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to query va config attributes (%s)", VaErrorString(status));
+ goto rollback_attrib_list;
}
- AVHWFramesContext* hwframes_context_data =
- (void*)(encode_context->codec_context->hw_frames_ctx->data);
- hwframes_context_data->initial_pool_size = 8;
- hwframes_context_data->format = AV_PIX_FMT_VAAPI;
- hwframes_context_data->sw_format = AV_PIX_FMT_NV12;
- hwframes_context_data->width = width;
- hwframes_context_data->height = height;
- int err = av_hwframe_ctx_init(encode_context->codec_context->hw_frames_ctx);
- if (err < 0) {
- LOG("Failed to init hwframes context (%s)", av_err2str(err));
- av_buffer_unref(&encode_context->codec_context->hw_frames_ctx);
- return false;
+ for (int i = 0; i < num_attribs; i++) {
+ if (attrib_list[i].type == VAConfigAttribEncPackedHeaders) {
+ encode_context->codec_quirks.packed_header_sequence =
+ !!(attrib_list[i].value & VA_ENC_PACKED_HEADER_SEQUENCE);
+ encode_context->codec_quirks.packed_header_slice =
+ !!(attrib_list[i].value & VA_ENC_PACKED_HEADER_SLICE);
+ }
}
- return true;
+ result = true;
+
+rollback_attrib_list:
+ free(attrib_list);
+ return result;
+}
+
+static struct GpuFrame* VaSurfaceToGpuFrame(VADisplay va_display,
+ VASurfaceID va_surface_id,
+ struct GpuContext* gpu_context) {
+ VADRMPRIMESurfaceDescriptor prime;
+ VAStatus status = vaExportSurfaceHandle(
+ va_display, va_surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+ VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS, &prime);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to export va surface (%s)", VaErrorString(status));
+ return NULL;
+ }
+
+ struct GpuFramePlane planes[] = {{.dmabuf_fd = -1},
+ {.dmabuf_fd = -1},
+ {.dmabuf_fd = -1},
+ {.dmabuf_fd = -1}};
+ static_assert(LENGTH(planes) == LENGTH(prime.layers[0].object_index),
+ "Suspicious VADRMPRIMESurfaceDescriptor structure");
+
+ for (size_t i = 0; i < prime.layers[0].num_planes; i++) {
+ uint32_t object_index = prime.layers[0].object_index[i];
+ planes[i] = (struct GpuFramePlane){
+ .dmabuf_fd = prime.objects[object_index].fd,
+ .pitch = prime.layers[0].pitch[i],
+ .offset = prime.layers[0].offset[i],
+ .modifier = prime.objects[object_index].drm_format_modifier,
+ };
+ }
+
+ struct GpuFrame* gpu_frame =
+ GpuContextCreateFrame(gpu_context, prime.width, prime.height,
+ prime.fourcc, prime.layers[0].num_planes, planes);
+ if (!gpu_frame) {
+ LOG("Failed to create gpu frame");
+ goto release_planes;
+ }
+ return gpu_frame;
+
+release_planes:
+ CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd,
+ planes[2].dmabuf_fd, planes[3].dmabuf_fd});
+ return NULL;
}
-static enum AVColorSpace ConvertColorspace(enum YuvColorspace colorspace) {
- switch (colorspace) {
- case kItuRec601:
- // TODO(mburakov): No dedicated definition for BT601?
- return AVCOL_SPC_SMPTE170M;
- case kItuRec709:
- return AVCOL_SPC_BT709;
- default:
- __builtin_unreachable();
+static void InitializeSeqHeader(struct EncodeContext* encode_context,
+ uint16_t pic_width_in_luma_samples,
+ uint16_t pic_height_in_luma_samples) {
+ encode_context->seq = (VAEncSequenceParameterBufferHEVC){
+ .general_profile_idc = 1, // Main profile
+ .general_level_idc = 120, // Level 4
+ .general_tier_flag = 0, // Main tier
+
+ .intra_period = 120, // Where this one comes from?
+ .intra_idr_period = 120, // Each I frame is an IDR frame
+ .ip_period = 1, // No B-frames
+ .bits_per_second = 0, // TODO (investigate)
+
+ .pic_width_in_luma_samples = pic_width_in_luma_samples,
+ .pic_height_in_luma_samples = pic_height_in_luma_samples,
+
+ .seq_fields.bits =
+ {
+ .chroma_format_idc = 1, // 4:2:0
+ .separate_colour_plane_flag = 0, // Table 6-1
+ .bit_depth_luma_minus8 = 0, // 8 bpp luma
+ .bit_depth_chroma_minus8 = 0, // 8 bpp chroma
+ .scaling_list_enabled_flag = 0, // defaulted
+ .strong_intra_smoothing_enabled_flag = 0, // defaulted
+
+ // mburakov: ffmpeg hardcodes these for i965 Skylake driver.
+ .amp_enabled_flag = 1, // TODO (quirks)
+ .sample_adaptive_offset_enabled_flag = 0, // TODO (quirks)
+ .pcm_enabled_flag = 0, // TODO (quirks)
+ .pcm_loop_filter_disabled_flag = 0, // defaulted
+ .sps_temporal_mvp_enabled_flag = 0, // TODO (quirks)
+
+ .low_delay_seq = 1, // No B-frames
+ .hierachical_flag = 0, // defaulted
+ },
+
+ // mburakov: ffmpeg hardcodes these for i965 Skylake driver.
+ .log2_min_luma_coding_block_size_minus3 = 0, // TODO (quirks)
+ .log2_diff_max_min_luma_coding_block_size = 2, // TODO (quirks)
+ .log2_min_transform_block_size_minus2 = 0, // hardcoded
+ .log2_diff_max_min_transform_block_size = 3, // hardcoded
+ .max_transform_hierarchy_depth_inter = 3, // hardcoded
+ .max_transform_hierarchy_depth_intra = 3, // hardcoded
+
+ .pcm_sample_bit_depth_luma_minus1 = 0, // defaulted
+ .pcm_sample_bit_depth_chroma_minus1 = 0, // defaulted
+ .log2_min_pcm_luma_coding_block_size_minus3 = 0, // defaulted
+ .log2_max_pcm_luma_coding_block_size_minus3 = 0, // defaulted
+
+ .vui_parameters_present_flag = 1,
+ .vui_fields.bits =
+ {
+ .aspect_ratio_info_present_flag = 0, // defaulted
+ .neutral_chroma_indication_flag = 0, // defaulted
+ .field_seq_flag = 0, // defaulted
+ .vui_timing_info_present_flag = 1, // hardcoded
+ .bitstream_restriction_flag = 1, // hardcoded
+ .tiles_fixed_structure_flag = 0, // defaulted
+ .motion_vectors_over_pic_boundaries_flag = 1, // hardcoded
+ .restricted_ref_pic_lists_flag = 1, // hardcoded
+ .log2_max_mv_length_horizontal = 15, // hardcoded
+ .log2_max_mv_length_vertical = 15, // hardcoded
+ },
+
+ .vui_num_units_in_tick = 1, // TODO (investigate)
+ .vui_time_scale = 60, // TODO (investigate)
+ .min_spatial_segmentation_idc = 0, // defaulted
+ .max_bytes_per_pic_denom = 0, // hardcoded
+ .max_bits_per_min_cu_denom = 0, // hardcoded
+
+ .scc_fields.bits =
+ {
+ .palette_mode_enabled_flag = 0, // defaulted
+ },
+ };
+}
+
+static void InitializePicHeader(struct EncodeContext* encode_context) {
+ const typeof(encode_context->seq.seq_fields.bits)* seq_bits =
+ &encode_context->seq.seq_fields.bits;
+
+ uint8_t collocated_ref_pic_index =
+ seq_bits->sps_temporal_mvp_enabled_flag ? 0 : 0xff;
+
+ encode_context->pic = (VAEncPictureParameterBufferHEVC){
+ .decoded_curr_pic =
+ {
+ .picture_id = VA_INVALID_ID, // dynamic
+ .flags = VA_PICTURE_HEVC_INVALID, // dynamic
+ },
+
+ // .reference_frames[15],
+
+ .coded_buf = encode_context->output_buffer_id,
+ .collocated_ref_pic_index = collocated_ref_pic_index,
+ .last_picture = 0, // hardcoded
+
+ .pic_init_qp = 30, // Fixed quality
+ .diff_cu_qp_delta_depth = 0, // Fixed quality
+ .pps_cb_qp_offset = 0, // hardcoded
+ .pps_cr_qp_offset = 0, // hardcoded
+
+ .num_tile_columns_minus1 = 0, // No tiles
+ .num_tile_rows_minus1 = 0, // No tiles
+ .column_width_minus1 = {0}, // No tiles
+ .row_height_minus1 = {0}, // No tiles
+
+ .log2_parallel_merge_level_minus2 = 0, // defaulted
+ .ctu_max_bitsize_allowed = 0, // hardcoded
+ .num_ref_idx_l0_default_active_minus1 = 0, // hardcoded
+ .num_ref_idx_l1_default_active_minus1 = 0, // hardcoded
+ .slice_pic_parameter_set_id = 0, // hardcoded
+ .nal_unit_type = 0, // dynamic
+
+ .pic_fields.bits =
+ {
+ .idr_pic_flag = 0, // dynamic
+ .coding_type = 0, // dynamic
+ .reference_pic_flag = 1, // No B-frames
+
+ .dependent_slice_segments_enabled_flag = 0, // defaulted
+ .sign_data_hiding_enabled_flag = 0, // defaulted
+ .constrained_intra_pred_flag = 0, // defaulted
+ .transform_skip_enabled_flag = 0, // TODO (quirks)
+ .cu_qp_delta_enabled_flag = 0, // Fixed quality
+ .weighted_pred_flag = 0, // defaulted
+ .weighted_bipred_flag = 0, // defaulted
+ .transquant_bypass_enabled_flag = 0, // defaulted
+ .tiles_enabled_flag = 0, // No tiles
+ .entropy_coding_sync_enabled_flag = 0, // defaulted
+ .loop_filter_across_tiles_enabled_flag = 0, // No tiles
+
+ .pps_loop_filter_across_slices_enabled_flag = 1, // hardcoded
+ .scaling_list_data_present_flag = 0, // defaulted
+
+ .screen_content_flag = 0, // TODO (investigate)
+ .enable_gpu_weighted_prediction = 0, // hardcoded
+ .no_output_of_prior_pics_flag = 0, // hardcoded
+ },
+
+ .hierarchical_level_plus1 = 0, // defaulted
+ .scc_fields.bits =
+ {
+ .pps_curr_pic_ref_enabled_flag = 0, // defaulted
+ },
+ };
+
+ for (size_t i = 0; i < LENGTH(encode_context->pic.reference_frames); i++) {
+ encode_context->pic.reference_frames[i] = (VAPictureHEVC){
+ .picture_id = VA_INVALID_ID,
+ .flags = VA_PICTURE_HEVC_INVALID,
+ };
}
}
-static enum AVColorRange ConvertRange(enum YuvRange range) {
- switch (range) {
- case kNarrowRange:
- return AVCOL_RANGE_MPEG;
- case kFullRange:
- return AVCOL_RANGE_JPEG;
- default:
- __builtin_unreachable();
+static void InitializeSliceHeader(struct EncodeContext* encode_context,
+ uint32_t num_ctu_in_slice) {
+ const typeof(encode_context->seq.seq_fields.bits)* seq_bits =
+ &encode_context->seq.seq_fields.bits;
+
+ encode_context->slice = (VAEncSliceParameterBufferHEVC){
+ .slice_segment_address = 0, // No slice segments
+ .num_ctu_in_slice = num_ctu_in_slice,
+
+ .slice_type = 0, // dynamic
+ .slice_pic_parameter_set_id =
+ encode_context->pic.slice_pic_parameter_set_id,
+
+ .num_ref_idx_l0_active_minus1 =
+ encode_context->pic.num_ref_idx_l0_default_active_minus1,
+ .num_ref_idx_l1_active_minus1 =
+ encode_context->pic.num_ref_idx_l1_default_active_minus1,
+
+ .luma_log2_weight_denom = 0, // defaulted
+ .delta_chroma_log2_weight_denom = 0, // defaulted
+
+ // .delta_luma_weight_l0[15],
+ // .luma_offset_l0[15],
+ // .delta_chroma_weight_l0[15][2],
+ // .chroma_offset_l0[15][2],
+ // .delta_luma_weight_l1[15],
+ // .luma_offset_l1[15],
+ // .delta_chroma_weight_l1[15][2],
+ // .chroma_offset_l1[15][2],
+
+ .max_num_merge_cand = 5, // defaulted
+ .slice_qp_delta = 0, // Fixed quality
+ .slice_cb_qp_offset = 0, // defaulted
+ .slice_cr_qp_offset = 0, // defaulted
+
+ .slice_beta_offset_div2 = 0, // defaulted
+ .slice_tc_offset_div2 = 0, // defaulted
+
+ .slice_fields.bits =
+ {
+ .last_slice_of_pic_flag = 1, // No slice segments
+ .dependent_slice_segment_flag = 0, // No slice segments
+ .colour_plane_id = 0, // defaulted
+ .slice_temporal_mvp_enabled_flag =
+ seq_bits->sps_temporal_mvp_enabled_flag,
+ .slice_sao_luma_flag =
+ seq_bits->sample_adaptive_offset_enabled_flag,
+ .slice_sao_chroma_flag =
+ seq_bits->sample_adaptive_offset_enabled_flag,
+ .num_ref_idx_active_override_flag = 0, // hardcoded
+ .mvd_l1_zero_flag = 0, // defaulted
+ .cabac_init_flag = 0, // defaulted
+ .slice_deblocking_filter_disabled_flag = 0, // defaulted
+ .slice_loop_filter_across_slices_enabled_flag = 0, // defaulted
+ .collocated_from_l0_flag = 0, // No B-frames
+ },
+
+ .pred_weight_table_bit_offset = 0, // defaulted
+ .pred_weight_table_bit_length = 0, // defaulted
+ };
+
+ for (size_t i = 0; i < LENGTH(encode_context->slice.ref_pic_list0); i++) {
+ encode_context->slice.ref_pic_list0[i] = (VAPictureHEVC){
+ .picture_id = VA_INVALID_ID,
+ .flags = VA_PICTURE_HEVC_INVALID,
+ };
+ }
+
+ for (size_t i = 0; i < LENGTH(encode_context->slice.ref_pic_list1); i++) {
+ encode_context->slice.ref_pic_list1[i] = (VAPictureHEVC){
+ .picture_id = VA_INVALID_ID,
+ .flags = VA_PICTURE_HEVC_INVALID,
+ };
}
}
struct EncodeContext* EncodeContextCreate(struct GpuContext* gpu_context,
uint32_t width, uint32_t height,
- enum YuvColorspace colrospace,
+ enum YuvColorspace colorspace,
enum YuvRange range) {
struct EncodeContext* encode_context = malloc(sizeof(struct EncodeContext));
if (!encode_context) {
- LOG("Failed to allocate encode context (%s)", strerror(errno));
+ LOG("Faield to allocate encode context (%s)", strerror(errno));
return NULL;
}
+
*encode_context = (struct EncodeContext){
.gpu_context = gpu_context,
+ .width = width,
+ .height = height,
+ .colorspace = colorspace,
+ .range = range,
};
- int err = av_hwdevice_ctx_create(&encode_context->hwdevice_context,
- AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0);
- if (err < 0) {
- LOG("Failed to create hwdevice context (%s)", av_err2str(err));
+ encode_context->render_node = open("/dev/dri/renderD128", O_RDWR);
+ if (encode_context->render_node == -1) {
+ LOG("Failed to open render node (%s)", strerror(errno));
goto rollback_encode_context;
}
- static const char codec_name[] = "hevc_vaapi";
- const AVCodec* codec = avcodec_find_encoder_by_name(codec_name);
- if (!codec) {
- LOG("Failed to find %s encoder", codec_name);
- goto rollback_hwdevice_context;
- }
- encode_context->codec_context = avcodec_alloc_context3(codec);
- if (!encode_context->codec_context) {
- LOG("Failed to allocate codec context");
- goto rollback_hwdevice_context;
+ encode_context->va_display = vaGetDisplayDRM(encode_context->render_node);
+ if (!encode_context->va_display) {
+ LOG("Failed to get va display (%s)", strerror(errno));
+ goto rollback_render_node;
}
- encode_context->codec_context->time_base = (AVRational){1, 60};
- encode_context->codec_context->width = (int)width;
- encode_context->codec_context->height = (int)height;
- encode_context->codec_context->pix_fmt = AV_PIX_FMT_VAAPI;
- encode_context->codec_context->max_b_frames = 0;
- encode_context->codec_context->refs = 1;
- encode_context->codec_context->global_quality = 28;
- encode_context->codec_context->colorspace = ConvertColorspace(colrospace);
- encode_context->codec_context->color_range = ConvertRange(range);
+ vaSetErrorCallback(encode_context->va_display, OnVaLogMessage, NULL);
+#ifndef NDEBUG
+ vaSetInfoCallback(encode_context->va_display, OnVaLogMessage, NULL);
+#endif // NDEBUG
- if (!SetHwFramesContext(encode_context, (int)width, (int)height)) {
- LOG("Failed to set hwframes context");
- goto rollback_codec_context;
- }
- err = avcodec_open2(encode_context->codec_context, codec, NULL);
- if (err < 0) {
- LOG("Failed to open codec (%s)", av_err2str(err));
- goto rollback_codec_context;
+ int major, minor;
+ VAStatus status = vaInitialize(encode_context->va_display, &major, &minor);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to initialize va (%s)", VaErrorString(status));
+ goto rollback_va_display;
}
- return encode_context;
-rollback_codec_context:
- avcodec_free_context(&encode_context->codec_context);
-rollback_hwdevice_context:
- av_buffer_unref(&encode_context->hwdevice_context);
-rollback_encode_context:
- free(encode_context);
- return NULL;
-}
+ LOG("Initialized VA %d.%d", major, minor);
+ // TODO(mburakov): Check entry points?
-static struct GpuFrame* PrimeToGpuFrame(
- struct GpuContext* gpu_context, const VADRMPRIMESurfaceDescriptor* prime) {
- struct GpuFramePlane planes[] = {
- {.dmabuf_fd = -1},
- {.dmabuf_fd = -1},
- {.dmabuf_fd = -1},
- {.dmabuf_fd = -1},
+ VAConfigAttrib attrib_list[] = {
+ {.type = VAConfigAttribRTFormat, .value = VA_RT_FORMAT_YUV420},
};
- static_assert(LENGTH(planes) == LENGTH(prime->layers[0].object_index),
- "Suspicious VADRMPRIMESurfaceDescriptor structure");
-
- for (size_t i = 0; i < prime->layers[0].num_planes; i++) {
- uint32_t object_index = prime->layers[0].object_index[i];
- planes[i] = (struct GpuFramePlane){
- .dmabuf_fd = prime->objects[object_index].fd,
- .pitch = prime->layers[0].pitch[i],
- .offset = prime->layers[0].offset[i],
- .modifier = prime->objects[object_index].drm_format_modifier,
- };
+ status = vaCreateConfig(encode_context->va_display, VAProfileHEVCMain,
+ VAEntrypointEncSlice, attrib_list,
+ LENGTH(attrib_list), &encode_context->va_config_id);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to create va config (%s)", VaErrorString(status));
+ goto rollback_va_display;
}
- struct GpuFrame* gpu_frame =
- GpuContextCreateFrame(gpu_context, prime->width, prime->height,
- prime->fourcc, prime->layers[0].num_planes, planes);
- if (!gpu_frame) {
- LOG("Failed to create gpu frame");
- goto release_planes;
+ if (!InitializeCodecQuirks(encode_context)) {
+ LOG("Failed to initialize codec quirks");
+ goto rollback_va_config_id;
}
- return gpu_frame;
-release_planes:
- CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd,
- planes[2].dmabuf_fd, planes[3].dmabuf_fd});
- return NULL;
-}
-
-const struct GpuFrame* EncodeContextGetFrame(
- struct EncodeContext* encode_context) {
- AVFrame* hw_frame = av_frame_alloc();
- if (!hw_frame) {
- LOG("Failed to allocate hwframe");
- return NULL;
+ // TODO(mburakov): ffmpeg attempts to deduce this.
+ static const uint32_t min_cb_size = 16;
+ uint32_t width_in_cb = (width + min_cb_size - 1) / min_cb_size;
+ uint32_t height_in_cb = (height + min_cb_size - 1) / min_cb_size;
+
+ // TODO(mburakov): ffmpeg attempts to deduce this.
+ static const uint32_t slice_block_size = 32;
+ uint32_t slice_block_rows =
+ (encode_context->width + slice_block_size - 1) / slice_block_size;
+ uint32_t slice_block_cols =
+ (encode_context->height + slice_block_size - 1) / slice_block_size;
+ uint32_t num_ctu_in_slice = slice_block_rows * slice_block_cols;
+
+ status = vaCreateContext(
+ encode_context->va_display, encode_context->va_config_id,
+ (int)(width_in_cb * min_cb_size), (int)(height_in_cb * min_cb_size),
+ VA_PROGRESSIVE, NULL, 0, &encode_context->va_context_id);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to create va context (%s)", VaErrorString(status));
+ goto rollback_va_config_id;
}
- int err = av_hwframe_get_buffer(encode_context->codec_context->hw_frames_ctx,
- hw_frame, 0);
- if (err < 0) {
- LOG("Failed to get hwframe buffer (%s)", av_err2str(err));
- goto rollback_hw_frame;
+ status =
+ vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, width,
+ height, &encode_context->input_surface_id, 1, NULL, 0);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to create va input surface (%s)", VaErrorString(status));
+ goto rollback_va_context_id;
}
- if (!hw_frame->hw_frames_ctx) {
- LOG("Failed to ref hwframe context");
- goto rollback_hw_frame;
+
+ encode_context->gpu_frame = VaSurfaceToGpuFrame(
+ encode_context->va_display, encode_context->input_surface_id,
+ encode_context->gpu_context);
+ if (!encode_context->gpu_frame) {
+ LOG("Failed to convert va surface to gpu frame");
+ goto rollback_input_surface_id;
}
- // mburakov: Roughly based on Sunshine code...
- AVVAAPIDeviceContext* vaapi_device_context =
- ((AVHWDeviceContext*)(void*)encode_context->hwdevice_context->data)
- ->hwctx;
- VASurfaceID surface_id = (VASurfaceID)(uintptr_t)hw_frame->data[3];
- VADRMPRIMESurfaceDescriptor prime;
- VAStatus status = vaExportSurfaceHandle(
- vaapi_device_context->display, surface_id,
- VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
- VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS, &prime);
+ status =
+ vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420,
+ width_in_cb * min_cb_size, height_in_cb * min_cb_size,
+ encode_context->recon_surface_ids,
+ LENGTH(encode_context->recon_surface_ids), NULL, 0);
if (status != VA_STATUS_SUCCESS) {
- LOG("Failed to export vaapi surface (%d)", status);
- goto rollback_hw_frame;
+ LOG("Failed to create va recon surfaces (%s)", VaErrorString(status));
+ goto rollback_gpu_frame;
}
- struct GpuFrame* gpu_frame =
- PrimeToGpuFrame(encode_context->gpu_context, &prime);
- if (!gpu_frame) {
- LOG("Failed to create gpu frame");
- goto rollback_hw_frame;
+ unsigned int max_encoded_size =
+ encode_context->width * encode_context->height * 3 / 2;
+ status =
+ vaCreateBuffer(encode_context->va_display, encode_context->va_context_id,
+ VAEncCodedBufferType, max_encoded_size, 1, NULL,
+ &encode_context->output_buffer_id);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to create va output buffer (%s)", VaErrorString(status));
+ goto rollback_recon_surface_ids;
}
- encode_context->hw_frame = hw_frame;
- encode_context->gpu_frame = gpu_frame;
- return gpu_frame;
+ InitializeSeqHeader(encode_context, (uint16_t)(width_in_cb * min_cb_size),
+ (uint16_t)(height_in_cb * min_cb_size));
+ InitializePicHeader(encode_context);
+ InitializeSliceHeader(encode_context, num_ctu_in_slice);
+ return encode_context;
-rollback_hw_frame:
- av_frame_free(&hw_frame);
+rollback_recon_surface_ids:
+ vaDestroySurfaces(encode_context->va_display,
+ encode_context->recon_surface_ids,
+ LENGTH(encode_context->recon_surface_ids));
+rollback_gpu_frame:
+ GpuContextDestroyFrame(encode_context->gpu_context,
+ encode_context->gpu_frame);
+rollback_input_surface_id:
+ vaDestroySurfaces(encode_context->va_display,
+ &encode_context->input_surface_id, 1);
+rollback_va_context_id:
+ vaDestroyContext(encode_context->va_display, encode_context->va_config_id);
+rollback_va_config_id:
+ vaDestroyConfig(encode_context->va_display, encode_context->va_config_id);
+rollback_va_display:
+ vaTerminate(encode_context->va_display);
+rollback_render_node:
+ close(encode_context->render_node);
+rollback_encode_context:
+ free(encode_context);
return NULL;
}
-static bool DrainPacket(const struct AVPacket* packet, int fd) {
- uint32_t size = (uint32_t)packet->size;
- struct iovec iov[] = {
- {.iov_base = &size, .iov_len = sizeof(size)},
- {.iov_base = packet->data, .iov_len = (size_t)packet->size},
+const struct GpuFrame* EncodeContextGetFrame(
+ struct EncodeContext* encode_context) {
+ return encode_context->gpu_frame;
+}
+
+static bool UploadBuffer(const struct EncodeContext* encode_context,
+ VABufferType va_buffer_type, unsigned int size,
+ void* data, VABufferID** presult) {
+ VAStatus status =
+ vaCreateBuffer(encode_context->va_display, encode_context->va_context_id,
+ va_buffer_type, size, 1, data, *presult);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to create buffer (%s)", VaErrorString(status));
+ return false;
+ }
+ (*presult)++;
+ return true;
+}
+
+static bool UploadPackedBuffer(const struct EncodeContext* encode_context,
+ VAEncPackedHeaderType packed_header_type,
+ unsigned int bit_length, void* data,
+ VABufferID** presult) {
+ VAEncPackedHeaderParameterBuffer packed_header = {
+ .type = packed_header_type,
+ .bit_length = bit_length,
+ .has_emulation_bytes = 1,
+ };
+ return UploadBuffer(encode_context, VAEncPackedHeaderParameterBufferType,
+ sizeof(packed_header), &packed_header, presult) &&
+ UploadBuffer(encode_context, VAEncPackedHeaderDataBufferType,
+ (bit_length + 7) / 8, data, presult);
+}
+
+static void UpdatePicHeader(struct EncodeContext* encode_context, bool idr) {
+ encode_context->pic.decoded_curr_pic = (VAPictureHEVC){
+ .picture_id =
+ encode_context
+ ->recon_surface_ids[encode_context->frame_counter %
+ LENGTH(encode_context->recon_surface_ids)],
+ .pic_order_cnt = (int32_t)(encode_context->frame_counter %
+ encode_context->seq.intra_idr_period),
};
+
+ if (idr) {
+ encode_context->pic.reference_frames[0] = (VAPictureHEVC){
+ .picture_id = VA_INVALID_ID,
+ .flags = VA_PICTURE_HEVC_INVALID,
+ };
+ encode_context->pic.nal_unit_type = IDR_W_RADL;
+ encode_context->pic.pic_fields.bits.idr_pic_flag = 1;
+ encode_context->pic.pic_fields.bits.coding_type = 1;
+ } else {
+ encode_context->pic.reference_frames[0] = (VAPictureHEVC){
+ .picture_id =
+ encode_context
+ ->recon_surface_ids[(encode_context->frame_counter - 1) %
+ LENGTH(encode_context->recon_surface_ids)],
+ .pic_order_cnt = (int32_t)((encode_context->frame_counter - 1) %
+ encode_context->seq.intra_idr_period),
+ };
+ encode_context->pic.nal_unit_type = TRAIL_R;
+ encode_context->pic.pic_fields.bits.idr_pic_flag = 0;
+ encode_context->pic.pic_fields.bits.coding_type = 2;
+ }
+}
+
+static bool DrainBuffers(int fd, struct iovec* iovec, int count) {
for (;;) {
- ssize_t result = writev(fd, iov, LENGTH(iov));
+ ssize_t result = writev(fd, iovec, count);
if (result < 0) {
if (errno == EINTR) continue;
LOG("Failed to write (%s)", strerror(errno));
return false;
}
- for (size_t i = 0; i < LENGTH(iov); i++) {
- size_t delta = MIN((size_t)result, iov[i].iov_len);
- iov[i].iov_base = (uint8_t*)iov[i].iov_base + delta;
- iov[i].iov_len -= delta;
+ for (int i = 0; i < count; i++) {
+ size_t delta = MIN((size_t)result, iovec[i].iov_len);
+ iovec[i].iov_base = (uint8_t*)iovec[i].iov_base + delta;
+ iovec[i].iov_len -= delta;
result -= delta;
}
if (!result) return true;
@@ -264,59 +657,177 @@ static bool DrainPacket(const struct AVPacket* packet, int fd) {
bool EncodeContextEncodeFrame(struct EncodeContext* encode_context, int fd) {
bool result = false;
- if (encode_context->gpu_frame) {
- GpuContextDestroyFrame(encode_context->gpu_context,
- encode_context->gpu_frame);
- encode_context->gpu_frame = NULL;
- }
- AVPacket* packet = av_packet_alloc();
- if (!packet) {
- LOG("Failed to allocate packet (%s)", strerror(errno));
- goto rollback_hw_frame;
- }
-
- int err = avcodec_send_frame(encode_context->codec_context,
- encode_context->hw_frame);
- if (err < 0) {
- LOG("Failed to send frame (%s)", av_err2str(err));
- goto rollback_packet;
- }
-
- err = avcodec_receive_packet(encode_context->codec_context, packet);
- switch (err) {
- case 0:
- break;
- case AVERROR(EAGAIN):
- // TODO(mburakov): This happens only for the very first frame, and
- // effectively introduces an additional latency of 16ms...
- result = true;
- goto rollback_packet;
- default:
- LOG("Failed to receive packet (%s)", av_err2str(err));
- goto rollback_packet;
- }
-
- result = DrainPacket(packet, fd);
- av_packet_unref(packet);
- if (!result) {
- LOG("Failed to drain packet");
- goto rollback_packet;
- }
-
-rollback_packet:
- av_packet_free(&packet);
-rollback_hw_frame:
- av_frame_free(&encode_context->hw_frame);
+ VABufferID buffers[8];
+ VABufferID* buffer_ptr = buffers;
+
+ bool idr =
+ !(encode_context->frame_counter % encode_context->seq.intra_idr_period);
+ if (idr && !UploadBuffer(encode_context, VAEncSequenceParameterBufferType,
+ sizeof(encode_context->seq), &encode_context->seq,
+ &buffer_ptr)) {
+ LOG("Failed to upload sequence parameter buffer");
+ goto rollback_buffers;
+ }
+
+ if (encode_context->codec_quirks.packed_header_sequence && idr) {
+ char buffer[256];
+ struct Bitstream bitstream = {
+ .data = buffer,
+ .size = 0,
+ };
+
+ static const struct MoreVideoParameters mvp = {
+ .vps_max_dec_pic_buffering_minus1 = 1, // No B-frames
+ .vps_max_num_reorder_pics = 0, // No B-frames
+ };
+ uint32_t conf_win_right_offset_luma =
+ encode_context->seq.pic_width_in_luma_samples - encode_context->width;
+ uint32_t conf_win_bottom_offset_luma =
+ encode_context->seq.pic_height_in_luma_samples - encode_context->height;
+ const struct MoreSeqParameters msp = {
+ .conf_win_left_offset = 0,
+ .conf_win_right_offset = conf_win_right_offset_luma / 2,
+ .conf_win_top_offset = 0,
+ .conf_win_bottom_offset = conf_win_bottom_offset_luma / 2,
+ .sps_max_dec_pic_buffering_minus1 = 1, // No B-frames
+ .sps_max_num_reorder_pics = 0, // No B-frames
+ .video_signal_type_present_flag = 1,
+ .video_full_range_flag = encode_context->range == kFullRange,
+ .colour_description_present_flag = 1,
+ .colour_primaries = 2, // Unsepcified
+ .transfer_characteristics = 2, // Unspecified
+ .matrix_coeffs =
+ encode_context->colorspace == kItuRec601 ? 6 : 1, // Table E.5
+ };
+
+ PackVideoParameterSetNalUnit(&bitstream, &encode_context->seq, &mvp);
+ PackSeqParameterSetNalUnit(&bitstream, &encode_context->seq, &msp);
+ PackPicParameterSetNalUnit(&bitstream, &encode_context->pic);
+ if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSequence,
+ (unsigned int)bitstream.size, bitstream.data,
+ &buffer_ptr)) {
+ LOG("Failed to upload packed sequence header");
+ goto rollback_buffers;
+ }
+ }
+
+ UpdatePicHeader(encode_context, idr);
+ if (!UploadBuffer(encode_context, VAEncPictureParameterBufferType,
+ sizeof(encode_context->pic), &encode_context->pic,
+ &buffer_ptr)) {
+ LOG("Failed to upload picture parameter buffer");
+ goto rollback_buffers;
+ }
+
+ encode_context->slice.slice_type = idr ? I : P;
+ encode_context->slice.ref_pic_list0[0] =
+ encode_context->pic.reference_frames[0];
+ if (encode_context->codec_quirks.packed_header_slice) {
+ char buffer[256];
+ struct Bitstream bitstream = {
+ .data = buffer,
+ .size = 0,
+ };
+ static const struct NegativePics negative_pics[] = {
+ {
+ .delta_poc_s0_minus1 = 0,
+ .used_by_curr_pic_s0_flag = true,
+ },
+ };
+ const struct MoreSliceParamerters msp = {
+ .first_slice_segment_in_pic_flag = 1,
+ .num_negative_pics = idr ? 0 : LENGTH(negative_pics),
+ .negative_pics = idr ? NULL : negative_pics,
+ };
+ PackSliceSegmentHeaderNalUnit(&bitstream, &encode_context->seq,
+ &encode_context->pic, &encode_context->slice,
+ &msp);
+ if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSlice,
+ (unsigned int)bitstream.size, bitstream.data,
+ &buffer_ptr)) {
+ LOG("Failed to upload packed sequence header");
+ goto rollback_buffers;
+ }
+ }
+
+ if (!UploadBuffer(encode_context, VAEncSliceParameterBufferType,
+ sizeof(encode_context->slice), &encode_context->slice,
+ &buffer_ptr)) {
+ LOG("Failed to upload slice parameter buffer");
+ goto rollback_buffers;
+ }
+
+ VAStatus status =
+ vaBeginPicture(encode_context->va_display, encode_context->va_context_id,
+ encode_context->input_surface_id);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to begin va picture (%s)", VaErrorString(status));
+ goto rollback_buffers;
+ }
+
+ int num_buffers = (int)(buffer_ptr - buffers);
+ status = vaRenderPicture(encode_context->va_display,
+ encode_context->va_context_id, buffers, num_buffers);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to render va picture (%s)", VaErrorString(status));
+ goto rollback_buffers;
+ }
+
+ status =
+ vaEndPicture(encode_context->va_display, encode_context->va_context_id);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to end va picture (%s)", VaErrorString(status));
+ goto rollback_buffers;
+ }
+
+ status = vaSyncBuffer(encode_context->va_display,
+ encode_context->output_buffer_id, VA_TIMEOUT_INFINITE);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to sync va buffer (%s)", VaErrorString(status));
+ goto rollback_buffers;
+ }
+
+ VACodedBufferSegment* segment;
+ status = vaMapBuffer(encode_context->va_display,
+ encode_context->output_buffer_id, (void**)&segment);
+ if (status != VA_STATUS_SUCCESS) {
+ LOG("Failed to map va buffer (%s)", VaErrorString(status));
+ goto rollback_buffers;
+ }
+ if (segment->next != NULL) {
+ LOG("Next segment non-null!");
+ abort();
+ }
+
+ struct iovec iovec[] = {
+ {.iov_base = &segment->size, .iov_len = sizeof(segment->size)},
+ {.iov_base = segment->buf, .iov_len = segment->size},
+ };
+ if (!DrainBuffers(fd, iovec, LENGTH(iovec))) {
+ LOG("Failed to drain encoded frame");
+ goto rollback_segment;
+ }
+
+ encode_context->frame_counter++;
+ result = true;
+
+rollback_segment:
+ vaUnmapBuffer(encode_context->va_display, encode_context->output_buffer_id);
+rollback_buffers:
+ while (buffer_ptr-- > buffers)
+ vaDestroyBuffer(encode_context->va_display, *buffer_ptr);
return result;
}
void EncodeContextDestroy(struct EncodeContext* encode_context) {
- if (encode_context->gpu_frame) {
- GpuContextDestroyFrame(encode_context->gpu_context,
- encode_context->gpu_frame);
- }
- if (encode_context->hw_frame) av_frame_free(&encode_context->hw_frame);
- avcodec_free_context(&encode_context->codec_context);
- av_buffer_unref(&encode_context->hwdevice_context);
+ vaDestroyBuffer(encode_context->va_display, encode_context->output_buffer_id);
+ GpuContextDestroyFrame(encode_context->gpu_context,
+ encode_context->gpu_frame);
+ vaDestroySurfaces(encode_context->va_display,
+ &encode_context->input_surface_id, 1);
+ vaDestroyContext(encode_context->va_display, encode_context->va_context_id);
+ vaDestroyConfig(encode_context->va_display, encode_context->va_config_id);
+ vaTerminate(encode_context->va_display);
+ close(encode_context->render_node);
free(encode_context);
}