/* * Copyright (C) 2023 Mikhail Burakov. This file is part of streamer. * * streamer is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * streamer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with streamer. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include "bitstream.h" #include "encode.h" #include "gpu.h" #include "hevc.h" #include "toolbox/utils.h" struct EncodeContext { struct GpuContext* gpu_context; uint32_t width; uint32_t height; VAEncSequenceParameterBufferHEVC seq; VAEncPictureParameterBufferHEVC pic; VAEncMiscParameterRateControl rc; VAEncMiscParameterFrameRate fr; int render_node; VADisplay va_display; VAConfigID va_config_id; VAContextID va_context_id; VASurfaceID input_surface_id; struct GpuFrame* gpu_frame; VASurfaceID recon_surface_ids[2]; VABufferID output_buffer_id; size_t frame_counter; }; static const char* VaErrorString(VAStatus error) { static const char* va_error_strings[] = { "VA_STATUS_SUCCESS", "VA_STATUS_ERROR_OPERATION_FAILED", "VA_STATUS_ERROR_ALLOCATION_FAILED", "VA_STATUS_ERROR_INVALID_DISPLAY", "VA_STATUS_ERROR_INVALID_CONFIG", "VA_STATUS_ERROR_INVALID_CONTEXT", "VA_STATUS_ERROR_INVALID_SURFACE", "VA_STATUS_ERROR_INVALID_BUFFER", "VA_STATUS_ERROR_INVALID_IMAGE", "VA_STATUS_ERROR_INVALID_SUBPICTURE", "VA_STATUS_ERROR_ATTR_NOT_SUPPORTED", "VA_STATUS_ERROR_MAX_NUM_EXCEEDED", "VA_STATUS_ERROR_UNSUPPORTED_PROFILE", "VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT", "VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT", "VA_STATUS_ERROR_UNSUPPORTED_BUFFERTYPE", "VA_STATUS_ERROR_SURFACE_BUSY", "VA_STATUS_ERROR_FLAG_NOT_SUPPORTED", "VA_STATUS_ERROR_INVALID_PARAMETER", "VA_STATUS_ERROR_RESOLUTION_NOT_SUPPORTED", "VA_STATUS_ERROR_UNIMPLEMENTED", "VA_STATUS_ERROR_SURFACE_IN_DISPLAYING", "VA_STATUS_ERROR_INVALID_IMAGE_FORMAT", "VA_STATUS_ERROR_DECODING_ERROR", "VA_STATUS_ERROR_ENCODING_ERROR", "VA_STATUS_ERROR_INVALID_VALUE", "???", "???", "???", "???", "???", "???", "VA_STATUS_ERROR_UNSUPPORTED_FILTER", "VA_STATUS_ERROR_INVALID_FILTER_CHAIN", "VA_STATUS_ERROR_HW_BUSY", "???", "VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE", "VA_STATUS_ERROR_NOT_ENOUGH_BUFFER", "VA_STATUS_ERROR_TIMEDOUT", }; return VA_STATUS_SUCCESS <= error && error <= VA_STATUS_ERROR_TIMEDOUT ? va_error_strings[error - VA_STATUS_SUCCESS] : "???"; } static void OnVaLogMessage(void* context, const char* message) { (void)context; size_t len = strlen(message); while (message[len - 1] == '\n') len--; LOG("%.*s", (int)len, message); } static struct GpuFrame* VaSurfaceToGpuFrame(VADisplay va_display, VASurfaceID va_surface_id, struct GpuContext* gpu_context) { VADRMPRIMESurfaceDescriptor prime; VAStatus status = vaExportSurfaceHandle( va_display, va_surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_WRITE_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS, &prime); if (status != VA_STATUS_SUCCESS) { LOG("Failed to export va surface (%s)", VaErrorString(status)); return NULL; } struct GpuFramePlane planes[] = {{.dmabuf_fd = -1}, {.dmabuf_fd = -1}, {.dmabuf_fd = -1}, {.dmabuf_fd = -1}}; static_assert(LENGTH(planes) == LENGTH(prime.layers[0].object_index), "Suspicious VADRMPRIMESurfaceDescriptor structure"); for (size_t i = 0; i < prime.layers[0].num_planes; i++) { uint32_t object_index = prime.layers[0].object_index[i]; planes[i] = (struct GpuFramePlane){ .dmabuf_fd = prime.objects[object_index].fd, .pitch = prime.layers[0].pitch[i], .offset = prime.layers[0].offset[i], .modifier = prime.objects[object_index].drm_format_modifier, }; } struct GpuFrame* gpu_frame = GpuContextCreateFrame(gpu_context, prime.width, prime.height, prime.fourcc, prime.layers[0].num_planes, planes); if (!gpu_frame) { LOG("Failed to create gpu frame"); goto release_planes; } return gpu_frame; release_planes: CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd, planes[2].dmabuf_fd, planes[3].dmabuf_fd}); return NULL; } struct EncodeContext* EncodeContextCreate(struct GpuContext* gpu_context, uint32_t width, uint32_t height, enum YuvColorspace colorspace, enum YuvRange range) { struct EncodeContext* encode_context = malloc(sizeof(struct EncodeContext)); if (!encode_context) { LOG("Faield to allocate encode context (%s)", strerror(errno)); return NULL; } // TODO(mburakov): ffmpeg attempts to deduce this. static const uint32_t min_cb_size = 16; uint32_t width_in_cb = (width + min_cb_size - 1) / min_cb_size; uint32_t height_in_cb = (height + min_cb_size - 1) / min_cb_size; // TODO(mburakov): in the same deduction slice block size is set to 32. *encode_context = (struct EncodeContext){ .gpu_context = gpu_context, .width = width, .height = height, }; // TODO(mburakov): ffmpeg initializes SPS like this. encode_context->seq = (VAEncSequenceParameterBufferHEVC){ .general_profile_idc = 1, // Main profile .general_level_idc = 120, // Level 4 .general_tier_flag = 0, // Main tier .intra_period = 120, // Where this one comes from? .intra_idr_period = 120, // Each I frame is an IDR frame .ip_period = 1, // No B-frames .bits_per_second = 0, // To be configured later? .pic_width_in_luma_samples = (uint16_t)(width_in_cb * min_cb_size), .pic_height_in_luma_samples = (uint16_t)(height_in_cb * min_cb_size), .seq_fields.bits = { .chroma_format_idc = 1, // 4:2:0 .separate_colour_plane_flag = 0, // Table 6-1 .bit_depth_luma_minus8 = 0, // 8 bpp luma .bit_depth_chroma_minus8 = 0, // 8 bpp chroma .scaling_list_enabled_flag = 0, // ??? .strong_intra_smoothing_enabled_flag = 0, // ??? // mburakov: ffmpeg hardcodes these for i965 Skylake driver. .amp_enabled_flag = 1, .sample_adaptive_offset_enabled_flag = 0, .pcm_enabled_flag = 0, .pcm_loop_filter_disabled_flag = 0, // ??? .sps_temporal_mvp_enabled_flag = 0, // TODO(mburakov): ffmeg does not set below flags. // .low_delay_seq = 0, // Probably should be 1 // .hierachical_flag = 0, // ??? }, // mburakov: ffmpeg hardcodes these for i965 Skylake driver. .log2_min_luma_coding_block_size_minus3 = 0, .log2_diff_max_min_luma_coding_block_size = 2, .log2_min_transform_block_size_minus2 = 0, .log2_diff_max_min_transform_block_size = 3, .max_transform_hierarchy_depth_inter = 3, .max_transform_hierarchy_depth_intra = 3, .pcm_sample_bit_depth_luma_minus1 = 0, // ??? .pcm_sample_bit_depth_chroma_minus1 = 0, // ??? .log2_min_pcm_luma_coding_block_size_minus3 = 0, // ??? .log2_max_pcm_luma_coding_block_size_minus3 = 0, // ??? // mburakov: ffmpeg hardcodes this to 0. .vui_parameters_present_flag = 1, .vui_fields.bits = { .aspect_ratio_info_present_flag = 0, // defaulted .neutral_chroma_indication_flag = 0, // defaulted .field_seq_flag = 0, // defaulted .vui_timing_info_present_flag = 1, // hardcoded .bitstream_restriction_flag = 1, // hardcoded .tiles_fixed_structure_flag = 0, // defaulted .motion_vectors_over_pic_boundaries_flag = 1, // hardcoded .restricted_ref_pic_lists_flag = 1, // hardcoded .log2_max_mv_length_horizontal = 15, // hardcoded .log2_max_mv_length_vertical = 15, // hardcoded }, .vui_num_units_in_tick = 1, // TODO .vui_time_scale = 60, // TODO .min_spatial_segmentation_idc = 0, // defaulted .max_bytes_per_pic_denom = 0, // hardcoded .max_bits_per_min_cu_denom = 0, // hardcoded // TODO(mburakov): ffmpeg leaves rest of the structure zero-initialized. }; // TODO(mburakov): ffmpeg initializes PPS like this. encode_context->pic = (VAEncPictureParameterBufferHEVC){ .decoded_curr_pic.picture_id = VA_INVALID_ID, .decoded_curr_pic.flags = VA_PICTURE_HEVC_INVALID, .coded_buf = VA_INVALID_ID, .collocated_ref_pic_index = encode_context->seq.seq_fields.bits.sps_temporal_mvp_enabled_flag ? 0 : 0xff, .last_picture = 0, // mburakov: ffmpeg hardcodes initial value for non-CQP rate control. .pic_init_qp = 30, .diff_cu_qp_delta_depth = 0, .pps_cb_qp_offset = 0, .pps_cr_qp_offset = 0, .num_tile_columns_minus1 = 0, // No tiles .num_tile_rows_minus1 = 0, // No tiles .log2_parallel_merge_level_minus2 = 0, // ??? // mburakov: ffmpeg hardcodes this to 0. .ctu_max_bitsize_allowed = 0, // mburakov: ffmpeg hardcodes both to 0. .num_ref_idx_l0_default_active_minus1 = 0, .num_ref_idx_l1_default_active_minus1 = 0, // TODO(mburakov): Should this be incremented on IDR? .slice_pic_parameter_set_id = 0, // TODO(mburakov): ffmeg does not set below value. // .nal_unit_type = 0, .pic_fields.bits = { // mburakov: ffmpeg sets the flags below for each picture. // .idr_pic_flag = 0, // .coding_type = 0, // .reference_pic_flag = 0, // TODO(mburakov): ffmpeg does not set the flag below. // .dependent_slice_segments_enabled_flag = 0, .sign_data_hiding_enabled_flag = 0, // ??? .constrained_intra_pred_flag = 0, // ??? // TODO(mburakov): ffmpeg attempts to deduce the flag below. .transform_skip_enabled_flag = 0, // mburakov: ffmpeg enables thit for non-CQP rate control. .cu_qp_delta_enabled_flag = 1, .weighted_pred_flag = 0, // ??? .weighted_bipred_flag = 0, // ??? .transquant_bypass_enabled_flag = 0, // ??? .tiles_enabled_flag = 0, // No tiles .entropy_coding_sync_enabled_flag = 0, // ??? .loop_filter_across_tiles_enabled_flag = 0, // No tiles // mburakov: ffmpeg hardcodes the flag below. .pps_loop_filter_across_slices_enabled_flag = 1, .scaling_list_data_present_flag = 0, // ??? // mburakov: ffmpeg hardcodes the flags below. .screen_content_flag = 0, .enable_gpu_weighted_prediction = 0, .no_output_of_prior_pics_flag = 0, }, // TODO(mburakov): ffmpeg does not set values below. // .hierarchical_level_plus1 = 0, // ??? // .scc_fields.value = 0, // ??? }; // TODO(mburakov): ffmpeg initializes RC like this: encode_context->rc = (VAEncMiscParameterRateControl){ .bits_per_second = 0, // Hardcoded for non-bitrate .target_percentage = 100, // Hardcoded for non-bitrate .window_size = 1000, // Hardcoded for non-AVBR .initial_qp = 0, // Hardcoded .min_qp = 0, // Comes from context .basic_unit_size = 0, // Hardcoded .ICQ_quality_factor = 28, // Comes from context - clipped [1, 51] .max_qp = 0, // Comes from context .quality_factor = 28, // Comes from context - non-clipped // TODO(mburakov): ffmpeg does not set below value. // .target_frame_size = 0, }; // TODO(mburakov): ffmpeg initializes FR like this: encode_context->fr = (VAEncMiscParameterFrameRate){ .framerate = (1 << 16) | 60, // Comes from context // TODO(mburakov): ffmpeg does not set below value. // .framerate_flags.value = 0, }; encode_context->render_node = open("/dev/dri/renderD128", O_RDWR); if (encode_context->render_node == -1) { LOG("Failed to open render node (%s)", strerror(errno)); goto rollback_encode_context; } encode_context->va_display = vaGetDisplayDRM(encode_context->render_node); if (!encode_context->va_display) { LOG("Failed to get va display (%s)", strerror(errno)); goto rollback_render_node; } vaSetErrorCallback(encode_context->va_display, OnVaLogMessage, NULL); #ifndef NDEBUG vaSetInfoCallback(encode_context->va_display, OnVaLogMessage, NULL); #endif // NDEBUG int major, minor; VAStatus status = vaInitialize(encode_context->va_display, &major, &minor); if (status != VA_STATUS_SUCCESS) { LOG("Failed to initialize va (%s)", VaErrorString(status)); goto rollback_va_display; } LOG("Initialized VA %d.%d", major, minor); // TODO(mburakov): Check entry points? #if 1 // TODO(mburakov): AMD only supports CQP and no packed headers. VAConfigAttrib config_attribs[] = { {.type = VAConfigAttribRTFormat, .value = VA_RT_FORMAT_YUV420}, {.type = VAConfigAttribRateControl, .value = VA_RC_ICQ}, {.type = VAConfigAttribEncPackedHeaders, .value = VA_ENC_PACKED_HEADER_MISC | VA_ENC_PACKED_HEADER_SLICE | VA_ENC_PACKED_HEADER_SEQUENCE}, }; #else VAConfigAttrib config_attribs[] = { {.type = VAConfigAttribRTFormat, .value = VA_RT_FORMAT_YUV420}, {.type = VAConfigAttribRateControl, .value = VA_RC_CQP}, }; #endif status = vaCreateConfig( encode_context->va_display, VAProfileHEVCMain, VAEntrypointEncSlice, config_attribs, LENGTH(config_attribs), &encode_context->va_config_id); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create va config (%s)", VaErrorString(status)); goto rollback_va_display; } status = vaCreateContext( encode_context->va_display, encode_context->va_config_id, (int)(width_in_cb * min_cb_size), (int)(height_in_cb * min_cb_size), VA_PROGRESSIVE, NULL, 0, &encode_context->va_context_id); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create va context (%s)", VaErrorString(status)); goto rollback_va_config_id; } status = vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, width, height, &encode_context->input_surface_id, 1, NULL, 0); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create va input surface (%s)", VaErrorString(status)); goto rollback_va_context_id; } encode_context->gpu_frame = VaSurfaceToGpuFrame( encode_context->va_display, encode_context->input_surface_id, encode_context->gpu_context); if (!encode_context->gpu_frame) { LOG("Failed to convert va surface to gpu frame"); goto rollback_input_surface_id; } status = vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, width_in_cb * min_cb_size, height_in_cb * min_cb_size, encode_context->recon_surface_ids, LENGTH(encode_context->recon_surface_ids), NULL, 0); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create va recon surfaces (%s)", VaErrorString(status)); goto rollback_gpu_frame; } unsigned int max_encoded_size = encode_context->width * encode_context->height * 3 / 2; status = vaCreateBuffer(encode_context->va_display, encode_context->va_context_id, VAEncCodedBufferType, max_encoded_size, 1, NULL, &encode_context->output_buffer_id); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create va output buffer (%s)", VaErrorString(status)); goto rollback_recon_surface_ids; } return encode_context; rollback_recon_surface_ids: vaDestroySurfaces(encode_context->va_display, encode_context->recon_surface_ids, LENGTH(encode_context->recon_surface_ids)); rollback_gpu_frame: GpuContextDestroyFrame(encode_context->gpu_context, encode_context->gpu_frame); rollback_input_surface_id: vaDestroySurfaces(encode_context->va_display, &encode_context->input_surface_id, 1); rollback_va_context_id: vaDestroyContext(encode_context->va_display, encode_context->va_config_id); rollback_va_config_id: vaDestroyConfig(encode_context->va_display, encode_context->va_config_id); rollback_va_display: vaTerminate(encode_context->va_display); rollback_render_node: close(encode_context->render_node); rollback_encode_context: free(encode_context); return NULL; } const struct GpuFrame* EncodeContextGetFrame( struct EncodeContext* encode_context) { return encode_context->gpu_frame; } static bool UploadBuffer(const struct EncodeContext* encode_context, VABufferType va_buffer_type, unsigned int size, void* data, VABufferID** presult) { VAStatus status = vaCreateBuffer(encode_context->va_display, encode_context->va_context_id, va_buffer_type, size, 1, data, *presult); if (status != VA_STATUS_SUCCESS) { LOG("Failed to create buffer (%s)", VaErrorString(status)); return false; } (*presult)++; return true; } static bool UploadMiscBuffer(const struct EncodeContext* encode_context, VAEncMiscParameterType misc_parameter_type, unsigned int size, const void* data, VABufferID** presult) { uint8_t stack_allocated_storage[sizeof(VAEncMiscParameterBuffer) + size]; VAEncMiscParameterBuffer* buffer = (VAEncMiscParameterBuffer*)stack_allocated_storage; buffer->type = misc_parameter_type; memcpy(buffer->data, data, size); return UploadBuffer(encode_context, VAEncMiscParameterBufferType, (unsigned int)sizeof(stack_allocated_storage), stack_allocated_storage, presult); } static bool UploadPackedBuffer(const struct EncodeContext* encode_context, VAEncPackedHeaderType packed_header_type, unsigned int bit_length, void* data, VABufferID** presult) { VAEncPackedHeaderParameterBuffer packed_header = { .type = packed_header_type, .bit_length = bit_length, .has_emulation_bytes = 1, }; return UploadBuffer(encode_context, VAEncPackedHeaderParameterBufferType, sizeof(packed_header), &packed_header, presult) && UploadBuffer(encode_context, VAEncPackedHeaderDataBufferType, (bit_length + 7) / 8, data, presult); } static bool DrainBuffers(int fd, struct iovec* iovec, int count) { for (;;) { ssize_t result = writev(fd, iovec, count); if (result < 0) { if (errno == EINTR) continue; LOG("Failed to write (%s)", strerror(errno)); return false; } for (int i = 0; i < count; i++) { size_t delta = MIN((size_t)result, iovec[i].iov_len); iovec[i].iov_base = (uint8_t*)iovec[i].iov_base + delta; iovec[i].iov_len -= delta; result -= delta; } if (!result) return true; } } bool EncodeContextEncodeFrame(struct EncodeContext* encode_context, int fd) { VABufferID buffers[12]; VABufferID* buffer_ptr = buffers; bool idr = encode_context->frame_counter % encode_context->seq.intra_idr_period == 0; if (idr && !UploadBuffer(encode_context, VAEncSequenceParameterBufferType, sizeof(encode_context->seq), &encode_context->seq, &buffer_ptr)) { LOG("Failed to upload sequence parameter buffer"); return false; } bool result = false; if (idr) { if (!UploadMiscBuffer(encode_context, VAEncMiscParameterTypeRateControl, sizeof(encode_context->rc), &encode_context->rc, &buffer_ptr)) { LOG("Failed to upload rate control buffer"); goto rollback_buffers; } if (!UploadMiscBuffer(encode_context, VAEncMiscParameterTypeFrameRate, sizeof(encode_context->fr), &encode_context->fr, &buffer_ptr)) { LOG("Failed to upload frame rate buffer"); goto rollback_buffers; } } // TODO(mburakov): Implement this!!! encode_context->pic.decoded_curr_pic = (VAPictureHEVC){ .picture_id = encode_context ->recon_surface_ids[encode_context->frame_counter % LENGTH(encode_context->recon_surface_ids)], .pic_order_cnt = (int32_t)(encode_context->frame_counter % encode_context->seq.intra_idr_period), .flags = 0, }; for (size_t i = 0; i < LENGTH(encode_context->pic.reference_frames); i++) { encode_context->pic.reference_frames[i] = (VAPictureHEVC){ .picture_id = VA_INVALID_ID, .flags = VA_PICTURE_HEVC_INVALID, }; } if (!idr) { encode_context->pic.reference_frames[0] = (VAPictureHEVC){ .picture_id = encode_context ->recon_surface_ids[(encode_context->frame_counter - 1) % LENGTH(encode_context->recon_surface_ids)], .pic_order_cnt = (int32_t)((encode_context->frame_counter - 1) % encode_context->seq.intra_idr_period), }; } encode_context->pic.coded_buf = encode_context->output_buffer_id; encode_context->pic.nal_unit_type = idr ? IDR_W_RADL : TRAIL_R; encode_context->pic.pic_fields.bits.idr_pic_flag = idr; encode_context->pic.pic_fields.bits.coding_type = idr ? 1 : 2; encode_context->pic.pic_fields.bits.reference_pic_flag = 1; if (!UploadBuffer(encode_context, VAEncPictureParameterBufferType, sizeof(encode_context->pic), &encode_context->pic, &buffer_ptr)) { LOG("Failed to upload picture parameter buffer"); goto rollback_buffers; } if (idr) { char buffer[256]; struct Bitstream bitstream = { .data = buffer, .size = 0, }; static const struct MoreVideoParameters mvp = { .vps_max_dec_pic_buffering_minus1 = 1, // No B-frames .vps_max_num_reorder_pics = 0, // No B-frames }; PackVideoParameterSetNalUnit(&bitstream, &encode_context->seq, &mvp); const struct MoreSeqParameters msp = { .conf_win_left_offset = 0, .conf_win_right_offset = (encode_context->seq.pic_width_in_luma_samples - encode_context->width) / 2, .conf_win_top_offset = 0, .conf_win_bottom_offset = (encode_context->seq.pic_height_in_luma_samples - encode_context->height) / 2, .sps_max_dec_pic_buffering_minus1 = 1, // No B-frames .sps_max_num_reorder_pics = 0, // No B-frames .video_signal_type_present_flag = 1, .video_full_range_flag = 0, // TODO .colour_description_present_flag = 1, .colour_primaries = 2, // Unsepcified .transfer_characteristics = 2, // Unspecified .matrix_coeffs = 6, // TODO }; PackSeqParameterSetNalUnit(&bitstream, &encode_context->seq, &msp); PackPicParameterSetNalUnit(&bitstream, &encode_context->pic); if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSequence, (unsigned int)bitstream.size, bitstream.data, &buffer_ptr)) { LOG("Failed to upload packed sequence header"); goto rollback_buffers; } } // if (IDR) { // VAEncSequenceParameterBufferType // } // if (IDR) { // VAEncMiscParameterBufferType (VAEncMiscParameterTypeRateControl) // VAEncMiscParameterBufferType (VAEncMiscParameterTypeFrameRate) // } // VAEncPictureParameterBufferType // if (IDR) { // VAEncPackedHeaderParameterBufferType (VAEncPackedHeaderSequence) // VAEncPackedHeaderDataBufferType (VPS+SPS+PPS) // } // VAEncPackedHeaderParameterBufferType (VAEncPackedHeaderSlice) // VAEncPackedHeaderDataBufferType (slice header) // VAEncSliceParameterBufferType #if 0 if (pic->type == PICTURE_TYPE_IDR) { av_assert0(pic->display_order == pic->encode_order); hpic->last_idr_frame = pic->display_order; hpic->slice_nal_unit = HEVC_NAL_IDR_W_RADL; hpic->slice_type = HEVC_SLICE_I; hpic->pic_type = 0; } else { av_assert0(prev); hpic->last_idr_frame = hprev->last_idr_frame; if (pic->type == PICTURE_TYPE_I) { hpic->slice_nal_unit = HEVC_NAL_CRA_NUT; hpic->slice_type = HEVC_SLICE_I; hpic->pic_type = 0; } else if (pic->type == PICTURE_TYPE_P) { av_assert0(pic->refs[0]); hpic->slice_nal_unit = HEVC_NAL_TRAIL_R; hpic->slice_type = HEVC_SLICE_P; hpic->pic_type = 1; } else { #endif // hpic->slice_nal_unit = HEVC_NAL_IDR_W_RADL; // hpic->slice_type = HEVC_SLICE_I; // hpic->pic_type = 0; // hpic->pic_order_cnt = pic->display_order - hpic->last_idr_frame; // priv->sei_needed = 0; // // priv->sei == 56 // vpic->decoded_curr_pic = (VAPictureHEVC) { // .picture_id = pic->recon_surface, // .pic_order_cnt = hpic->pic_order_cnt, // .flags = 0, // }; // #if 0 for (i = 0; i < pic->nb_refs; i++) { VAAPIEncodePicture *ref = pic->refs[i]; VAAPIEncodeH265Picture *href; av_assert0(ref && ref->encode_order < pic->encode_order); href = ref->priv_data; vpic->reference_frames[i] = (VAPictureHEVC) { .picture_id = ref->recon_surface, .pic_order_cnt = href->pic_order_cnt, .flags = (ref->display_order < pic->display_order ? VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) | (ref->display_order > pic->display_order ? VA_PICTURE_HEVC_RPS_ST_CURR_AFTER : 0), }; } for (; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++) { vpic->reference_frames[i] = (VAPictureHEVC) { .picture_id = VA_INVALID_ID, .flags = VA_PICTURE_HEVC_INVALID, }; } #endif #if 0 vpic->coded_buf = pic->output_buffer; vpic->nal_unit_type = hpic->slice_nal_unit; switch (pic->type) { case PICTURE_TYPE_IDR: vpic->pic_fields.bits.idr_pic_flag = 1; vpic->pic_fields.bits.coding_type = 1; vpic->pic_fields.bits.reference_pic_flag = 1; break; case PICTURE_TYPE_I: vpic->pic_fields.bits.idr_pic_flag = 0; vpic->pic_fields.bits.coding_type = 1; vpic->pic_fields.bits.reference_pic_flag = 1; break; case PICTURE_TYPE_P: vpic->pic_fields.bits.idr_pic_flag = 0; vpic->pic_fields.bits.coding_type = 2; vpic->pic_fields.bits.reference_pic_flag = 1; break; #endif // slice_block_rows(34) = (height + slice_block_size - 1) / slice_block_size; // slice_block_cols(60) = (width + slice_block_size - 1) / slice_block_size; // TODO(mburakov): see comment in EncodeContextCreate. static const uint32_t slice_block_size = 32; uint32_t slice_block_rows = (encode_context->width + slice_block_size - 1) / slice_block_size; uint32_t slice_block_cols = (encode_context->height + slice_block_size - 1) / slice_block_size; uint32_t block_size = slice_block_rows * slice_block_cols; VAEncSliceParameterBufferHEVC slice = { .slice_segment_address = 0, // calculated .num_ctu_in_slice = block_size, .slice_type = idr ? I : P, // calculated .slice_pic_parameter_set_id = encode_context->pic.slice_pic_parameter_set_id, .num_ref_idx_l0_active_minus1 = encode_context->pic.num_ref_idx_l0_default_active_minus1, .num_ref_idx_l1_active_minus1 = encode_context->pic.num_ref_idx_l1_default_active_minus1, .luma_log2_weight_denom = 0, // ??? .delta_chroma_log2_weight_denom = 0, // ??? // TODO(mburakov): ffmpeg does not initialize below entries. // .delta_luma_weight_l0[15], // .luma_offset_l0[15], // .delta_chroma_weight_l0[15][2], // .chroma_offset_l0[15][2], // .delta_luma_weight_l1[15], // .luma_offset_l1[15], // .delta_chroma_weight_l1[15][2], // .chroma_offset_l1[15][2], .max_num_merge_cand = 5 - 0, // ??? .slice_qp_delta = 0, // evals to zero for CQP??? .slice_cb_qp_offset = 0, // ??? .slice_cr_qp_offset = 0, // ??? .slice_beta_offset_div2 = 0, // ??? .slice_tc_offset_div2 = 0, // ??? .slice_fields.bits = { // TODO(mburakov): We only have a single slice? .last_slice_of_pic_flag = 1, .dependent_slice_segment_flag = 0, // ??? .colour_plane_id = 0, // ??? .slice_temporal_mvp_enabled_flag = encode_context->seq.seq_fields.bits .sps_temporal_mvp_enabled_flag, .slice_sao_luma_flag = encode_context->seq.seq_fields.bits .sample_adaptive_offset_enabled_flag, .slice_sao_chroma_flag = encode_context->seq.seq_fields.bits .sample_adaptive_offset_enabled_flag, .num_ref_idx_active_override_flag = 0, // ??? .mvd_l1_zero_flag = 0, // ??? .cabac_init_flag = 0, // ??? .slice_deblocking_filter_disabled_flag = 0, // ??? .slice_loop_filter_across_slices_enabled_flag = 0, // ??? .collocated_from_l0_flag = 0, // ??? }, // TODO(mburakov): ffmpeg does not initialize below entries. // .pred_weight_table_bit_offset = 0, // .pred_weight_table_bit_length = 0, }; for (size_t i = 0; i < LENGTH(slice.ref_pic_list0); i++) { slice.ref_pic_list0[i].picture_id = VA_INVALID_ID; slice.ref_pic_list0[i].flags = VA_PICTURE_HEVC_INVALID; slice.ref_pic_list1[i].picture_id = VA_INVALID_ID; slice.ref_pic_list1[i].flags = VA_PICTURE_HEVC_INVALID; } if (!idr) { slice.ref_pic_list0[0] = (VAPictureHEVC){ .picture_id = encode_context ->recon_surface_ids[(encode_context->frame_counter - 1) % LENGTH(encode_context->recon_surface_ids)], .pic_order_cnt = (int32_t)((encode_context->frame_counter - 1) % encode_context->seq.intra_idr_period), }; } // TODO(mburakov): ffmpeg assign reference frame for non-I-frames here. char buffer[256]; struct Bitstream bitstream = { .data = buffer, .size = 0, }; const struct MoreSliceParamerters msp = { .first_slice_segment_in_pic_flag = 1, .num_negative_pics = 1, .negative_pics = &(struct NegativePics){0, 1}, }; PackSliceSegmentHeaderNalUnit(&bitstream, &encode_context->seq, &encode_context->pic, &slice, &msp); if (!UploadPackedBuffer(encode_context, VAEncPackedHeaderSlice, (unsigned int)bitstream.size, bitstream.data, &buffer_ptr)) { LOG("Failed to upload packed sequence header"); goto rollback_buffers; } if (!UploadBuffer(encode_context, VAEncSliceParameterBufferType, sizeof(slice), &slice, &buffer_ptr)) { LOG("Failed to upload slice parameter buffer"); goto rollback_buffers; } VAStatus status = vaBeginPicture(encode_context->va_display, encode_context->va_context_id, encode_context->input_surface_id); if (status != VA_STATUS_SUCCESS) { LOG("Failed to begin va picture (%s)", VaErrorString(status)); goto rollback_buffers; } int num_buffers = (int)(buffer_ptr - buffers); status = vaRenderPicture(encode_context->va_display, encode_context->va_context_id, buffers, num_buffers); if (status != VA_STATUS_SUCCESS) { LOG("Failed to render va picture (%s)", VaErrorString(status)); goto rollback_buffers; } status = vaEndPicture(encode_context->va_display, encode_context->va_context_id); if (status != VA_STATUS_SUCCESS) { LOG("Failed to end va picture (%s)", VaErrorString(status)); goto rollback_buffers; } status = vaSyncBuffer(encode_context->va_display, encode_context->output_buffer_id, VA_TIMEOUT_INFINITE); if (status != VA_STATUS_SUCCESS) { LOG("Failed to sync va buffer (%s)", VaErrorString(status)); goto rollback_buffers; } VACodedBufferSegment* segment; status = vaMapBuffer(encode_context->va_display, encode_context->output_buffer_id, (void**)&segment); if (status != VA_STATUS_SUCCESS) { LOG("Failed to map va buffer (%s)", VaErrorString(status)); goto rollback_buffers; } if (segment->next != NULL) { LOG("Next segment non-null!"); abort(); } struct iovec iovec[] = { {.iov_base = &segment->size, .iov_len = sizeof(segment->size)}, {.iov_base = segment->buf, .iov_len = segment->size}, }; if (!DrainBuffers(fd, iovec, LENGTH(iovec))) { LOG("Failed to drain encoded frame"); goto rollback_segment; } encode_context->frame_counter++; result = true; rollback_segment: vaUnmapBuffer(encode_context->va_display, encode_context->output_buffer_id); rollback_buffers: while (buffer_ptr-- > buffers) vaDestroyBuffer(encode_context->va_display, *buffer_ptr); return result; } void EncodeContextDestroy(struct EncodeContext* encode_context) { vaDestroyBuffer(encode_context->va_display, encode_context->output_buffer_id); GpuContextDestroyFrame(encode_context->gpu_context, encode_context->gpu_frame); vaDestroySurfaces(encode_context->va_display, &encode_context->input_surface_id, 1); vaDestroyContext(encode_context->va_display, encode_context->va_config_id); vaDestroyConfig(encode_context->va_display, encode_context->va_config_id); vaTerminate(encode_context->va_display); close(encode_context->render_node); free(encode_context); }