From d869207ed9fc54268d84b0c07c22ec14910c9333 Mon Sep 17 00:00:00 2001 From: Mikhail Burakov Date: Mon, 29 May 2023 12:14:39 +0200 Subject: Entirely replace older encode implementation with a new one --- encode.c | 943 ++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 727 insertions(+), 216 deletions(-) (limited to 'encode.c') diff --git a/encode.c b/encode.c index cbf9294..309e7c4 100644 --- a/encode.c +++ b/encode.c @@ -15,247 +15,640 @@ * along with streamer. If not, see . */ -#include "encode.h" - #include -#include #include -#include -#include -#include +#include #include #include #include #include #include #include +#include #include +#include "bitstream.h" +#include "encode.h" #include "gpu.h" +#include "hevc.h" #include "toolbox/utils.h" struct EncodeContext { struct GpuContext* gpu_context; - AVBufferRef* hwdevice_context; - AVCodecContext* codec_context; - - AVFrame* hw_frame; + uint32_t width; + uint32_t height; + enum YuvColorspace colorspace; + enum YuvRange range; + + int render_node; + VADisplay va_display; + VAConfigID va_config_id; + + struct { + bool packed_header_sequence; + bool packed_header_slice; + } codec_quirks; + + VAContextID va_context_id; + VASurfaceID input_surface_id; struct GpuFrame* gpu_frame; + + VASurfaceID recon_surface_ids[2]; + VABufferID output_buffer_id; + + VAEncSequenceParameterBufferHEVC seq; + VAEncPictureParameterBufferHEVC pic; + VAEncSliceParameterBufferHEVC slice; + size_t frame_counter; }; -static bool SetHwFramesContext(struct EncodeContext* encode_context, int width, - int height) { - encode_context->codec_context->hw_frames_ctx = - av_hwframe_ctx_alloc(encode_context->hwdevice_context); - if (!encode_context->codec_context->hw_frames_ctx) { - LOG("Failed to allocate hwframes context"); - return false; +static const char* VaErrorString(VAStatus error) { + static const char* va_error_strings[] = { + "VA_STATUS_SUCCESS", + "VA_STATUS_ERROR_OPERATION_FAILED", + "VA_STATUS_ERROR_ALLOCATION_FAILED", + "VA_STATUS_ERROR_INVALID_DISPLAY", + "VA_STATUS_ERROR_INVALID_CONFIG", + "VA_STATUS_ERROR_INVALID_CONTEXT", + "VA_STATUS_ERROR_INVALID_SURFACE", + "VA_STATUS_ERROR_INVALID_BUFFER", + "VA_STATUS_ERROR_INVALID_IMAGE", + "VA_STATUS_ERROR_INVALID_SUBPICTURE", + "VA_STATUS_ERROR_ATTR_NOT_SUPPORTED", + "VA_STATUS_ERROR_MAX_NUM_EXCEEDED", + "VA_STATUS_ERROR_UNSUPPORTED_PROFILE", + "VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT", + "VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT", + "VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE", + "VA_STATUS_ERROR_SURFACE_BUSY", + "VA_STATUS_ERROR_FLAG_NOT_SUPPORTED", + "VA_STATUS_ERROR_INVALID_PARAMETER", + "VA_STATUS_ERROR_RESOLUTION_NOT_SUPPORTED", + "VA_STATUS_ERROR_UNIMPLEMENTED", + "VA_STATUS_ERROR_SURFACE_IN_DISPLAYING", + "VA_STATUS_ERROR_INVALID_IMAGE_FORMAT", + "VA_STATUS_ERROR_DECODING_ERROR", + "VA_STATUS_ERROR_ENCODING_ERROR", + "VA_STATUS_ERROR_INVALID_VALUE", + "???", + "???", + "???", + "???", + "???", + "???", + "VA_STATUS_ERROR_UNSUPPORTED_FILTER", + "VA_STATUS_ERROR_INVALID_FILTER_CHAIN", + "VA_STATUS_ERROR_HW_BUSY", + "???", + "VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE", + "VA_STATUS_ERROR_NOT_ENOUGH_BUFFER", + "VA_STATUS_ERROR_TIMEDOUT", + }; + return VA_STATUS_SUCCESS <= error && error <= VA_STATUS_ERROR_TIMEDOUT + ? va_error_strings[error - VA_STATUS_SUCCESS] + : "???"; +} + +static void OnVaLogMessage(void* context, const char* message) { + (void)context; + size_t len = strlen(message); + while (message[len - 1] == '\n') len--; + LOG("%.*s", (int)len, message); +} + +static bool InitializeCodecQuirks(struct EncodeContext* encode_context) { + bool result = false; + VAProfile dummy_profile; + VAEntrypoint dummy_entrypoint; + int num_attribs = vaMaxNumConfigAttributes(encode_context->va_display); + VAConfigAttrib* attrib_list = + malloc((size_t)num_attribs * sizeof(VAConfigAttrib)); + VAStatus status = vaQueryConfigAttributes( + encode_context->va_display, encode_context->va_config_id, &dummy_profile, + &dummy_entrypoint, attrib_list, &num_attribs); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to query va config attributes (%s)", VaErrorString(status)); + goto rollback_attrib_list; } - AVHWFramesContext* hwframes_context_data = - (void*)(encode_context->codec_context->hw_frames_ctx->data); - hwframes_context_data->initial_pool_size = 8; - hwframes_context_data->format = AV_PIX_FMT_VAAPI; - hwframes_context_data->sw_format = AV_PIX_FMT_NV12; - hwframes_context_data->width = width; - hwframes_context_data->height = height; - int err = av_hwframe_ctx_init(encode_context->codec_context->hw_frames_ctx); - if (err < 0) { - LOG("Failed to init hwframes context (%s)", av_err2str(err)); - av_buffer_unref(&encode_context->codec_context->hw_frames_ctx); - return false; + for (int i = 0; i < num_attribs; i++) { + if (attrib_list[i].type == VAConfigAttribEncPackedHeaders) { + encode_context->codec_quirks.packed_header_sequence = + !!(attrib_list[i].value & VA_ENC_PACKED_HEADER_SEQUENCE); + encode_context->codec_quirks.packed_header_slice = + !!(attrib_list[i].value & VA_ENC_PACKED_HEADER_SLICE); + } } - return true; + result = true; + +rollback_attrib_list: + free(attrib_list); + return result; +} + +static struct GpuFrame* VaSurfaceToGpuFrame(VADisplay va_display, + VASurfaceID va_surface_id, + struct GpuContext* gpu_context) { + VADRMPRIMESurfaceDescriptor prime; + VAStatus status = vaExportSurfaceHandle( + va_display, va_surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS, &prime); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to export va surface (%s)", VaErrorString(status)); + return NULL; + } + + struct GpuFramePlane planes[] = {{.dmabuf_fd = -1}, + {.dmabuf_fd = -1}, + {.dmabuf_fd = -1}, + {.dmabuf_fd = -1}}; + static_assert(LENGTH(planes) == LENGTH(prime.layers[0].object_index), + "Suspicious VADRMPRIMESurfaceDescriptor structure"); + + for (size_t i = 0; i < prime.layers[0].num_planes; i++) { + uint32_t object_index = prime.layers[0].object_index[i]; + planes[i] = (struct GpuFramePlane){ + .dmabuf_fd = prime.objects[object_index].fd, + .pitch = prime.layers[0].pitch[i], + .offset = prime.layers[0].offset[i], + .modifier = prime.objects[object_index].drm_format_modifier, + }; + } + + struct GpuFrame* gpu_frame = + GpuContextCreateFrame(gpu_context, prime.width, prime.height, + prime.fourcc, prime.layers[0].num_planes, planes); + if (!gpu_frame) { + LOG("Failed to create gpu frame"); + goto release_planes; + } + return gpu_frame; + +release_planes: + CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd, + planes[2].dmabuf_fd, planes[3].dmabuf_fd}); + return NULL; } -static enum AVColorSpace ConvertColorspace(enum YuvColorspace colorspace) { - switch (colorspace) { - case kItuRec601: - // TODO(mburakov): No dedicated definition for BT601? - return AVCOL_SPC_SMPTE170M; - case kItuRec709: - return AVCOL_SPC_BT709; - default: - __builtin_unreachable(); +static void InitializeSeqHeader(struct EncodeContext* encode_context, + uint16_t pic_width_in_luma_samples, + uint16_t pic_height_in_luma_samples) { + encode_context->seq = (VAEncSequenceParameterBufferHEVC){ + .general_profile_idc = 1, // Main profile + .general_level_idc = 120, // Level 4 + .general_tier_flag = 0, // Main tier + + .intra_period = 120, // Where this one comes from? + .intra_idr_period = 120, // Each I frame is an IDR frame + .ip_period = 1, // No B-frames + .bits_per_second = 0, // TODO (investigate) + + .pic_width_in_luma_samples = pic_width_in_luma_samples, + .pic_height_in_luma_samples = pic_height_in_luma_samples, + + .seq_fields.bits = + { + .chroma_format_idc = 1, // 4:2:0 + .separate_colour_plane_flag = 0, // Table 6-1 + .bit_depth_luma_minus8 = 0, // 8 bpp luma + .bit_depth_chroma_minus8 = 0, // 8 bpp chroma + .scaling_list_enabled_flag = 0, // defaulted + .strong_intra_smoothing_enabled_flag = 0, // defaulted + + // mburakov: ffmpeg hardcodes these for i965 Skylake driver. + .amp_enabled_flag = 1, // TODO (quirks) + .sample_adaptive_offset_enabled_flag = 0, // TODO (quirks) + .pcm_enabled_flag = 0, // TODO (quirks) + .pcm_loop_filter_disabled_flag = 0, // defaulted + .sps_temporal_mvp_enabled_flag = 0, // TODO (quirks) + + .low_delay_seq = 1, // No B-frames + .hierachical_flag = 0, // defaulted + }, + + // mburakov: ffmpeg hardcodes these for i965 Skylake driver. + .log2_min_luma_coding_block_size_minus3 = 0, // TODO (quirks) + .log2_diff_max_min_luma_coding_block_size = 2, // TODO (quirks) + .log2_min_transform_block_size_minus2 = 0, // hardcoded + .log2_diff_max_min_transform_block_size = 3, // hardcoded + .max_transform_hierarchy_depth_inter = 3, // hardcoded + .max_transform_hierarchy_depth_intra = 3, // hardcoded + + .pcm_sample_bit_depth_luma_minus1 = 0, // defaulted + .pcm_sample_bit_depth_chroma_minus1 = 0, // defaulted + .log2_min_pcm_luma_coding_block_size_minus3 = 0, // defaulted + .log2_max_pcm_luma_coding_block_size_minus3 = 0, // defaulted + + .vui_parameters_present_flag = 1, + .vui_fields.bits = + { + .aspect_ratio_info_present_flag = 0, // defaulted + .neutral_chroma_indication_flag = 0, // defaulted + .field_seq_flag = 0, // defaulted + .vui_timing_info_present_flag = 1, // hardcoded + .bitstream_restriction_flag = 1, // hardcoded + .tiles_fixed_structure_flag = 0, // defaulted + .motion_vectors_over_pic_boundaries_flag = 1, // hardcoded + .restricted_ref_pic_lists_flag = 1, // hardcoded + .log2_max_mv_length_horizontal = 15, // hardcoded + .log2_max_mv_length_vertical = 15, // hardcoded + }, + + .vui_num_units_in_tick = 1, // TODO (investigate) + .vui_time_scale = 60, // TODO (investigate) + .min_spatial_segmentation_idc = 0, // defaulted + .max_bytes_per_pic_denom = 0, // hardcoded + .max_bits_per_min_cu_denom = 0, // hardcoded + + .scc_fields.bits = + { + .palette_mode_enabled_flag = 0, // defaulted + }, + }; +} + +static void InitializePicHeader(struct EncodeContext* encode_context) { + const typeof(encode_context->seq.seq_fields.bits)* seq_bits = + &encode_context->seq.seq_fields.bits; + + uint8_t collocated_ref_pic_index = + seq_bits->sps_temporal_mvp_enabled_flag ? 0 : 0xff; + + encode_context->pic = (VAEncPictureParameterBufferHEVC){ + .decoded_curr_pic = + { + .picture_id = VA_INVALID_ID, // dynamic + .flags = VA_PICTURE_HEVC_INVALID, // dynamic + }, + + // .reference_frames[15], + + .coded_buf = encode_context->output_buffer_id, + .collocated_ref_pic_index = collocated_ref_pic_index, + .last_picture = 0, // hardcoded + + .pic_init_qp = 30, // Fixed quality + .diff_cu_qp_delta_depth = 0, // Fixed quality + .pps_cb_qp_offset = 0, // hardcoded + .pps_cr_qp_offset = 0, // hardcoded + + .num_tile_columns_minus1 = 0, // No tiles + .num_tile_rows_minus1 = 0, // No tiles + .column_width_minus1 = {0}, // No tiles + .row_height_minus1 = {0}, // No tiles + + .log2_parallel_merge_level_minus2 = 0, // defaulted + .ctu_max_bitsize_allowed = 0, // hardcoded + .num_ref_idx_l0_default_active_minus1 = 0, // hardcoded + .num_ref_idx_l1_default_active_minus1 = 0, // hardcoded + .slice_pic_parameter_set_id = 0, // hardcoded + .nal_unit_type = 0, // dynamic + + .pic_fields.bits = + { + .idr_pic_flag = 0, // dynamic + .coding_type = 0, // dynamic + .reference_pic_flag = 1, // No B-frames + + .dependent_slice_segments_enabled_flag = 0, // defaulted + .sign_data_hiding_enabled_flag = 0, // defaulted + .constrained_intra_pred_flag = 0, // defaulted + .transform_skip_enabled_flag = 0, // TODO (quirks) + .cu_qp_delta_enabled_flag = 0, // Fixed quality + .weighted_pred_flag = 0, // defaulted + .weighted_bipred_flag = 0, // defaulted + .transquant_bypass_enabled_flag = 0, // defaulted + .tiles_enabled_flag = 0, // No tiles + .entropy_coding_sync_enabled_flag = 0, // defaulted + .loop_filter_across_tiles_enabled_flag = 0, // No tiles + + .pps_loop_filter_across_slices_enabled_flag = 1, // hardcoded + .scaling_list_data_present_flag = 0, // defaulted + + .screen_content_flag = 0, // TODO (investigate) + .enable_gpu_weighted_prediction = 0, // hardcoded + .no_output_of_prior_pics_flag = 0, // hardcoded + }, + + .hierarchical_level_plus1 = 0, // defaulted + .scc_fields.bits = + { + .pps_curr_pic_ref_enabled_flag = 0, // defaulted + }, + }; + + for (size_t i = 0; i < LENGTH(encode_context->pic.reference_frames); i++) { + encode_context->pic.reference_frames[i] = (VAPictureHEVC){ + .picture_id = VA_INVALID_ID, + .flags = VA_PICTURE_HEVC_INVALID, + }; } } -static enum AVColorRange ConvertRange(enum YuvRange range) { - switch (range) { - case kNarrowRange: - return AVCOL_RANGE_MPEG; - case kFullRange: - return AVCOL_RANGE_JPEG; - default: - __builtin_unreachable(); +static void InitializeSliceHeader(struct EncodeContext* encode_context, + uint32_t num_ctu_in_slice) { + const typeof(encode_context->seq.seq_fields.bits)* seq_bits = + &encode_context->seq.seq_fields.bits; + + encode_context->slice = (VAEncSliceParameterBufferHEVC){ + .slice_segment_address = 0, // No slice segments + .num_ctu_in_slice = num_ctu_in_slice, + + .slice_type = 0, // dynamic + .slice_pic_parameter_set_id = + encode_context->pic.slice_pic_parameter_set_id, + + .num_ref_idx_l0_active_minus1 = + encode_context->pic.num_ref_idx_l0_default_active_minus1, + .num_ref_idx_l1_active_minus1 = + encode_context->pic.num_ref_idx_l1_default_active_minus1, + + .luma_log2_weight_denom = 0, // defaulted + .delta_chroma_log2_weight_denom = 0, // defaulted + + // .delta_luma_weight_l0[15], + // .luma_offset_l0[15], + // .delta_chroma_weight_l0[15][2], + // .chroma_offset_l0[15][2], + // .delta_luma_weight_l1[15], + // .luma_offset_l1[15], + // .delta_chroma_weight_l1[15][2], + // .chroma_offset_l1[15][2], + + .max_num_merge_cand = 5, // defaulted + .slice_qp_delta = 0, // Fixed quality + .slice_cb_qp_offset = 0, // defaulted + .slice_cr_qp_offset = 0, // defaulted + + .slice_beta_offset_div2 = 0, // defaulted + .slice_tc_offset_div2 = 0, // defaulted + + .slice_fields.bits = + { + .last_slice_of_pic_flag = 1, // No slice segments + .dependent_slice_segment_flag = 0, // No slice segments + .colour_plane_id = 0, // defaulted + .slice_temporal_mvp_enabled_flag = + seq_bits->sps_temporal_mvp_enabled_flag, + .slice_sao_luma_flag = + seq_bits->sample_adaptive_offset_enabled_flag, + .slice_sao_chroma_flag = + seq_bits->sample_adaptive_offset_enabled_flag, + .num_ref_idx_active_override_flag = 0, // hardcoded + .mvd_l1_zero_flag = 0, // defaulted + .cabac_init_flag = 0, // defaulted + .slice_deblocking_filter_disabled_flag = 0, // defaulted + .slice_loop_filter_across_slices_enabled_flag = 0, // defaulted + .collocated_from_l0_flag = 0, // No B-frames + }, + + .pred_weight_table_bit_offset = 0, // defaulted + .pred_weight_table_bit_length = 0, // defaulted + }; + + for (size_t i = 0; i < LENGTH(encode_context->slice.ref_pic_list0); i++) { + encode_context->slice.ref_pic_list0[i] = (VAPictureHEVC){ + .picture_id = VA_INVALID_ID, + .flags = VA_PICTURE_HEVC_INVALID, + }; + } + + for (size_t i = 0; i < LENGTH(encode_context->slice.ref_pic_list1); i++) { + encode_context->slice.ref_pic_list1[i] = (VAPictureHEVC){ + .picture_id = VA_INVALID_ID, + .flags = VA_PICTURE_HEVC_INVALID, + }; } } struct EncodeContext* EncodeContextCreate(struct GpuContext* gpu_context, uint32_t width, uint32_t height, - enum YuvColorspace colrospace, + enum YuvColorspace colorspace, enum YuvRange range) { struct EncodeContext* encode_context = malloc(sizeof(struct EncodeContext)); if (!encode_context) { - LOG("Failed to allocate encode context (%s)", strerror(errno)); + LOG("Faield to allocate encode context (%s)", strerror(errno)); return NULL; } + *encode_context = (struct EncodeContext){ .gpu_context = gpu_context, + .width = width, + .height = height, + .colorspace = colorspace, + .range = range, }; - int err = av_hwdevice_ctx_create(&encode_context->hwdevice_context, - AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); - if (err < 0) { - LOG("Failed to create hwdevice context (%s)", av_err2str(err)); + encode_context->render_node = open("/dev/dri/renderD128", O_RDWR); + if (encode_context->render_node == -1) { + LOG("Failed to open render node (%s)", strerror(errno)); goto rollback_encode_context; } - static const char codec_name[] = "hevc_vaapi"; - const AVCodec* codec = avcodec_find_encoder_by_name(codec_name); - if (!codec) { - LOG("Failed to find %s encoder", codec_name); - goto rollback_hwdevice_context; - } - encode_context->codec_context = avcodec_alloc_context3(codec); - if (!encode_context->codec_context) { - LOG("Failed to allocate codec context"); - goto rollback_hwdevice_context; + encode_context->va_display = vaGetDisplayDRM(encode_context->render_node); + if (!encode_context->va_display) { + LOG("Failed to get va display (%s)", strerror(errno)); + goto rollback_render_node; } - encode_context->codec_context->time_base = (AVRational){1, 60}; - encode_context->codec_context->width = (int)width; - encode_context->codec_context->height = (int)height; - encode_context->codec_context->pix_fmt = AV_PIX_FMT_VAAPI; - encode_context->codec_context->max_b_frames = 0; - encode_context->codec_context->refs = 1; - encode_context->codec_context->global_quality = 28; - encode_context->codec_context->colorspace = ConvertColorspace(colrospace); - encode_context->codec_context->color_range = ConvertRange(range); + vaSetErrorCallback(encode_context->va_display, OnVaLogMessage, NULL); +#ifndef NDEBUG + vaSetInfoCallback(encode_context->va_display, OnVaLogMessage, NULL); +#endif // NDEBUG - if (!SetHwFramesContext(encode_context, (int)width, (int)height)) { - LOG("Failed to set hwframes context"); - goto rollback_codec_context; - } - err = avcodec_open2(encode_context->codec_context, codec, NULL); - if (err < 0) { - LOG("Failed to open codec (%s)", av_err2str(err)); - goto rollback_codec_context; + int major, minor; + VAStatus status = vaInitialize(encode_context->va_display, &major, &minor); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to initialize va (%s)", VaErrorString(status)); + goto rollback_va_display; } - return encode_context; -rollback_codec_context: - avcodec_free_context(&encode_context->codec_context); -rollback_hwdevice_context: - av_buffer_unref(&encode_context->hwdevice_context); -rollback_encode_context: - free(encode_context); - return NULL; -} + LOG("Initialized VA %d.%d", major, minor); + // TODO(mburakov): Check entry points? -static struct GpuFrame* PrimeToGpuFrame( - struct GpuContext* gpu_context, const VADRMPRIMESurfaceDescriptor* prime) { - struct GpuFramePlane planes[] = { - {.dmabuf_fd = -1}, - {.dmabuf_fd = -1}, - {.dmabuf_fd = -1}, - {.dmabuf_fd = -1}, + VAConfigAttrib attrib_list[] = { + {.type = VAConfigAttribRTFormat, .value = VA_RT_FORMAT_YUV420}, }; - static_assert(LENGTH(planes) == LENGTH(prime->layers[0].object_index), - "Suspicious VADRMPRIMESurfaceDescriptor structure"); - - for (size_t i = 0; i < prime->layers[0].num_planes; i++) { - uint32_t object_index = prime->layers[0].object_index[i]; - planes[i] = (struct GpuFramePlane){ - .dmabuf_fd = prime->objects[object_index].fd, - .pitch = prime->layers[0].pitch[i], - .offset = prime->layers[0].offset[i], - .modifier = prime->objects[object_index].drm_format_modifier, - }; + status = vaCreateConfig(encode_context->va_display, VAProfileHEVCMain, + VAEntrypointEncSlice, attrib_list, + LENGTH(attrib_list), &encode_context->va_config_id); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to create va config (%s)", VaErrorString(status)); + goto rollback_va_display; } - struct GpuFrame* gpu_frame = - GpuContextCreateFrame(gpu_context, prime->width, prime->height, - prime->fourcc, prime->layers[0].num_planes, planes); - if (!gpu_frame) { - LOG("Failed to create gpu frame"); - goto release_planes; + if (!InitializeCodecQuirks(encode_context)) { + LOG("Failed to initialize codec quirks"); + goto rollback_va_config_id; } - return gpu_frame; -release_planes: - CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd, - planes[2].dmabuf_fd, planes[3].dmabuf_fd}); - return NULL; -} - -const struct GpuFrame* EncodeContextGetFrame( - struct EncodeContext* encode_context) { - AVFrame* hw_frame = av_frame_alloc(); - if (!hw_frame) { - LOG("Failed to allocate hwframe"); - return NULL; + // TODO(mburakov): ffmpeg attempts to deduce this. + static const uint32_t min_cb_size = 16; + uint32_t width_in_cb = (width + min_cb_size - 1) / min_cb_size; + uint32_t height_in_cb = (height + min_cb_size - 1) / min_cb_size; + + // TODO(mburakov): ffmpeg attempts to deduce this. + static const uint32_t slice_block_size = 32; + uint32_t slice_block_rows = + (encode_context->width + slice_block_size - 1) / slice_block_size; + uint32_t slice_block_cols = + (encode_context->height + slice_block_size - 1) / slice_block_size; + uint32_t num_ctu_in_slice = slice_block_rows * slice_block_cols; + + status = vaCreateContext( + encode_context->va_display, encode_context->va_config_id, + (int)(width_in_cb * min_cb_size), (int)(height_in_cb * min_cb_size), + VA_PROGRESSIVE, NULL, 0, &encode_context->va_context_id); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to create va context (%s)", VaErrorString(status)); + goto rollback_va_config_id; } - int err = av_hwframe_get_buffer(encode_context->codec_context->hw_frames_ctx, - hw_frame, 0); - if (err < 0) { - LOG("Failed to get hwframe buffer (%s)", av_err2str(err)); - goto rollback_hw_frame; + status = + vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, width, + height, &encode_context->input_surface_id, 1, NULL, 0); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to create va input surface (%s)", VaErrorString(status)); + goto rollback_va_context_id; } - if (!hw_frame->hw_frames_ctx) { - LOG("Failed to ref hwframe context"); - goto rollback_hw_frame; + + encode_context->gpu_frame = VaSurfaceToGpuFrame( + encode_context->va_display, encode_context->input_surface_id, + encode_context->gpu_context); + if (!encode_context->gpu_frame) { + LOG("Failed to convert va surface to gpu frame"); + goto rollback_input_surface_id; } - // mburakov: Roughly based on Sunshine code... - AVVAAPIDeviceContext* vaapi_device_context = - ((AVHWDeviceContext*)(void*)encode_context->hwdevice_context->data) - ->hwctx; - VASurfaceID surface_id = (VASurfaceID)(uintptr_t)hw_frame->data[3]; - VADRMPRIMESurfaceDescriptor prime; - VAStatus status = vaExportSurfaceHandle( - vaapi_device_context->display, surface_id, - VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, - VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS, &prime); + status = + vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, + width_in_cb * min_cb_size, height_in_cb * min_cb_size, + encode_context->recon_surface_ids, + LENGTH(encode_context->recon_surface_ids), NULL, 0); if (status != VA_STATUS_SUCCESS) { - LOG("Failed to export vaapi surface (%d)", status); - goto rollback_hw_frame; + LOG("Failed to create va recon surfaces (%s)", VaErrorString(status)); + goto rollback_gpu_frame; } - struct GpuFrame* gpu_frame = - PrimeToGpuFrame(encode_context->gpu_context, &prime); - if (!gpu_frame) { - LOG("Failed to create gpu frame"); - goto rollback_hw_frame; + unsigned int max_encoded_size = + encode_context->width * encode_context->height * 3 / 2; + status = + vaCreateBuffer(encode_context->va_display, encode_context->va_context_id, + VAEncCodedBufferType, max_encoded_size, 1, NULL, + &encode_context->output_buffer_id); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to create va output buffer (%s)", VaErrorString(status)); + goto rollback_recon_surface_ids; } - encode_context->hw_frame = hw_frame; - encode_context->gpu_frame = gpu_frame; - return gpu_frame; + InitializeSeqHeader(encode_context, (uint16_t)(width_in_cb * min_cb_size), + (uint16_t)(height_in_cb * min_cb_size)); + InitializePicHeader(encode_context); + InitializeSliceHeader(encode_context, num_ctu_in_slice); + return encode_context; -rollback_hw_frame: - av_frame_free(&hw_frame); +rollback_recon_surface_ids: + vaDestroySurfaces(encode_context->va_display, + encode_context->recon_surface_ids, + LENGTH(encode_context->recon_surface_ids)); +rollback_gpu_frame: + GpuContextDestroyFrame(encode_context->gpu_context, + encode_context->gpu_frame); +rollback_input_surface_id: + vaDestroySurfaces(encode_context->va_display, + &encode_context->input_surface_id, 1); +rollback_va_context_id: + vaDestroyContext(encode_context->va_display, encode_context->va_config_id); +rollback_va_config_id: + vaDestroyConfig(encode_context->va_display, encode_context->va_config_id); +rollback_va_display: + vaTerminate(encode_context->va_display); +rollback_render_node: + close(encode_context->render_node); +rollback_encode_context: + free(encode_context); return NULL; } -static bool DrainPacket(const struct AVPacket* packet, int fd) { - uint32_t size = (uint32_t)packet->size; - struct iovec iov[] = { - {.iov_base = &size, .iov_len = sizeof(size)}, - {.iov_base = packet->data, .iov_len = (size_t)packet->size}, +const struct GpuFrame* EncodeContextGetFrame( + struct EncodeContext* encode_context) { + return encode_context->gpu_frame; +} + +static bool UploadBuffer(const struct EncodeContext* encode_context, + VABufferType va_buffer_type, unsigned int size, + void* data, VABufferID** presult) { + VAStatus status = + vaCreateBuffer(encode_context->va_display, encode_context->va_context_id, + va_buffer_type, size, 1, data, *presult); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to create buffer (%s)", VaErrorString(status)); + return false; + } + (*presult)++; + return true; +} + +static bool UploadPackedBuffer(const struct EncodeContext* encode_context, + VAEncPackedHeaderType packed_header_type, + unsigned int bit_length, void* data, + VABufferID** presult) { + VAEncPackedHeaderParameterBuffer packed_header = { + .type = packed_header_type, + .bit_length = bit_length, + .has_emulation_bytes = 1, + }; + return UploadBuffer(encode_context, VAEncPackedHeaderParameterBufferType, + sizeof(packed_header), &packed_header, presult) && + UploadBuffer(encode_context, VAEncPackedHeaderDataBufferType, + (bit_length + 7) / 8, data, presult); +} + +static void UpdatePicHeader(struct EncodeContext* encode_context, bool idr) { + encode_context->pic.decoded_curr_pic = (VAPictureHEVC){ + .picture_id = + encode_context + ->recon_surface_ids[encode_context->frame_counter % + LENGTH(encode_context->recon_surface_ids)], + .pic_order_cnt = (int32_t)(encode_context->frame_counter % + encode_context->seq.intra_idr_period), }; + + if (idr) { + encode_context->pic.reference_frames[0] = (VAPictureHEVC){ + .picture_id = VA_INVALID_ID, + .flags = VA_PICTURE_HEVC_INVALID, + }; + encode_context->pic.nal_unit_type = IDR_W_RADL; + encode_context->pic.pic_fields.bits.idr_pic_flag = 1; + encode_context->pic.pic_fields.bits.coding_type = 1; + } else { + encode_context->pic.reference_frames[0] = (VAPictureHEVC){ + .picture_id = + encode_context + ->recon_surface_ids[(encode_context->frame_counter - 1) % + LENGTH(encode_context->recon_surface_ids)], + .pic_order_cnt = (int32_t)((encode_context->frame_counter - 1) % + encode_context->seq.intra_idr_period), + }; + encode_context->pic.nal_unit_type = TRAIL_R; + encode_context->pic.pic_fields.bits.idr_pic_flag = 0; + encode_context->pic.pic_fields.bits.coding_type = 2; + } +} + +static bool DrainBuffers(int fd, struct iovec* iovec, int count) { for (;;) { - ssize_t result = writev(fd, iov, LENGTH(iov)); + ssize_t result = writev(fd, iovec, count); if (result < 0) { if (errno == EINTR) continue; LOG("Failed to write (%s)", strerror(errno)); return false; } - for (size_t i = 0; i < LENGTH(iov); i++) { - size_t delta = MIN((size_t)result, iov[i].iov_len); - iov[i].iov_base = (uint8_t*)iov[i].iov_base + delta; - iov[i].iov_len -= delta; + for (int i = 0; i < count; i++) { + size_t delta = MIN((size_t)result, iovec[i].iov_len); + iovec[i].iov_base = (uint8_t*)iovec[i].iov_base + delta; + iovec[i].iov_len -= delta; result -= delta; } if (!result) return true; @@ -264,59 +657,177 @@ static bool DrainPacket(const struct AVPacket* packet, int fd) { bool EncodeContextEncodeFrame(struct EncodeContext* encode_context, int fd) { bool result = false; - if (encode_context->gpu_frame) { - GpuContextDestroyFrame(encode_context->gpu_context, - encode_context->gpu_frame); - encode_context->gpu_frame = NULL; - } - AVPacket* packet = av_packet_alloc(); - if (!packet) { - LOG("Failed to allocate packet (%s)", strerror(errno)); - goto rollback_hw_frame; - } - - int err = avcodec_send_frame(encode_context->codec_context, - encode_context->hw_frame); - if (err < 0) { - LOG("Failed to send frame (%s)", av_err2str(err)); - goto rollback_packet; - } - - err = avcodec_receive_packet(encode_context->codec_context, packet); - switch (err) { - case 0: - break; - case AVERROR(EAGAIN): - // TODO(mburakov): This happens only for the very first frame, and - // effectively introduces an additional latency of 16ms... - result = true; - goto rollback_packet; - default: - LOG("Failed to receive packet (%s)", av_err2str(err)); - goto rollback_packet; - } - - result = DrainPacket(packet, fd); - av_packet_unref(packet); - if (!result) { - LOG("Failed to drain packet"); - goto rollback_packet; - } - -rollback_packet: - av_packet_free(&packet); -rollback_hw_frame: - av_frame_free(&encode_context->hw_frame); + VABufferID buffers[8]; + VABufferID* buffer_ptr = buffers; + + bool idr = + !(encode_context->frame_counter % encode_context->seq.intra_idr_period); + if (idr && !UploadBuffer(encode_context, VAEncSequenceParameterBufferType, + sizeof(encode_context->seq), &encode_context->seq, + &buffer_ptr)) { + LOG("Failed to upload sequence parameter buffer"); + goto rollback_buffers; + } + + if (encode_context->codec_quirks.packed_header_sequence && idr) { + char buffer[256]; + struct Bitstream bitstream = { + .data = buffer, + .size = 0, + }; + + static const struct MoreVideoParameters mvp = { + .vps_max_dec_pic_buffering_minus1 = 1, // No B-frames + .vps_max_num_reorder_pics = 0, // No B-frames + }; + uint32_t conf_win_right_offset_luma = + encode_context->seq.pic_width_in_luma_samples - encode_context->width; + uint32_t conf_win_bottom_offset_luma = + encode_context->seq.pic_height_in_luma_samples - encode_context->height; + const struct MoreSeqParameters msp = { + .conf_win_left_offset = 0, + .conf_win_right_offset = conf_win_right_offset_luma / 2, + .conf_win_top_offset = 0, + .conf_win_bottom_offset = conf_win_bottom_offset_luma / 2, + .sps_max_dec_pic_buffering_minus1 = 1, // No B-frames + .sps_max_num_reorder_pics = 0, // No B-frames + .video_signal_type_present_flag = 1, + .video_full_range_flag = encode_context->range == kFullRange, + .colour_description_present_flag = 1, + .colour_primaries = 2, // Unsepcified + .transfer_characteristics = 2, // Unspecified + .matrix_coeffs = + encode_context->colorspace == kItuRec601 ? 6 : 1, // Table E.5 + }; + + PackVideoParameterSetNalUnit(&bitstream, &encode_context->seq, &mvp); + PackSeqParameterSetNalUnit(&bitstream, &encode_context->seq, &msp); + PackPicParameterSetNalUnit(&bitstream, &encode_context->pic); + if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSequence, + (unsigned int)bitstream.size, bitstream.data, + &buffer_ptr)) { + LOG("Failed to upload packed sequence header"); + goto rollback_buffers; + } + } + + UpdatePicHeader(encode_context, idr); + if (!UploadBuffer(encode_context, VAEncPictureParameterBufferType, + sizeof(encode_context->pic), &encode_context->pic, + &buffer_ptr)) { + LOG("Failed to upload picture parameter buffer"); + goto rollback_buffers; + } + + encode_context->slice.slice_type = idr ? I : P; + encode_context->slice.ref_pic_list0[0] = + encode_context->pic.reference_frames[0]; + if (encode_context->codec_quirks.packed_header_slice) { + char buffer[256]; + struct Bitstream bitstream = { + .data = buffer, + .size = 0, + }; + static const struct NegativePics negative_pics[] = { + { + .delta_poc_s0_minus1 = 0, + .used_by_curr_pic_s0_flag = true, + }, + }; + const struct MoreSliceParamerters msp = { + .first_slice_segment_in_pic_flag = 1, + .num_negative_pics = idr ? 0 : LENGTH(negative_pics), + .negative_pics = idr ? NULL : negative_pics, + }; + PackSliceSegmentHeaderNalUnit(&bitstream, &encode_context->seq, + &encode_context->pic, &encode_context->slice, + &msp); + if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSlice, + (unsigned int)bitstream.size, bitstream.data, + &buffer_ptr)) { + LOG("Failed to upload packed sequence header"); + goto rollback_buffers; + } + } + + if (!UploadBuffer(encode_context, VAEncSliceParameterBufferType, + sizeof(encode_context->slice), &encode_context->slice, + &buffer_ptr)) { + LOG("Failed to upload slice parameter buffer"); + goto rollback_buffers; + } + + VAStatus status = + vaBeginPicture(encode_context->va_display, encode_context->va_context_id, + encode_context->input_surface_id); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to begin va picture (%s)", VaErrorString(status)); + goto rollback_buffers; + } + + int num_buffers = (int)(buffer_ptr - buffers); + status = vaRenderPicture(encode_context->va_display, + encode_context->va_context_id, buffers, num_buffers); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to render va picture (%s)", VaErrorString(status)); + goto rollback_buffers; + } + + status = + vaEndPicture(encode_context->va_display, encode_context->va_context_id); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to end va picture (%s)", VaErrorString(status)); + goto rollback_buffers; + } + + status = vaSyncBuffer(encode_context->va_display, + encode_context->output_buffer_id, VA_TIMEOUT_INFINITE); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to sync va buffer (%s)", VaErrorString(status)); + goto rollback_buffers; + } + + VACodedBufferSegment* segment; + status = vaMapBuffer(encode_context->va_display, + encode_context->output_buffer_id, (void**)&segment); + if (status != VA_STATUS_SUCCESS) { + LOG("Failed to map va buffer (%s)", VaErrorString(status)); + goto rollback_buffers; + } + if (segment->next != NULL) { + LOG("Next segment non-null!"); + abort(); + } + + struct iovec iovec[] = { + {.iov_base = &segment->size, .iov_len = sizeof(segment->size)}, + {.iov_base = segment->buf, .iov_len = segment->size}, + }; + if (!DrainBuffers(fd, iovec, LENGTH(iovec))) { + LOG("Failed to drain encoded frame"); + goto rollback_segment; + } + + encode_context->frame_counter++; + result = true; + +rollback_segment: + vaUnmapBuffer(encode_context->va_display, encode_context->output_buffer_id); +rollback_buffers: + while (buffer_ptr-- > buffers) + vaDestroyBuffer(encode_context->va_display, *buffer_ptr); return result; } void EncodeContextDestroy(struct EncodeContext* encode_context) { - if (encode_context->gpu_frame) { - GpuContextDestroyFrame(encode_context->gpu_context, - encode_context->gpu_frame); - } - if (encode_context->hw_frame) av_frame_free(&encode_context->hw_frame); - avcodec_free_context(&encode_context->codec_context); - av_buffer_unref(&encode_context->hwdevice_context); + vaDestroyBuffer(encode_context->va_display, encode_context->output_buffer_id); + GpuContextDestroyFrame(encode_context->gpu_context, + encode_context->gpu_frame); + vaDestroySurfaces(encode_context->va_display, + &encode_context->input_surface_id, 1); + vaDestroyContext(encode_context->va_display, encode_context->va_context_id); + vaDestroyConfig(encode_context->va_display, encode_context->va_config_id); + vaTerminate(encode_context->va_display); + close(encode_context->render_node); free(encode_context); } -- cgit v1.2.3