#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <va/va.h>
#include <va/va_drm.h>
#include <va/va_drmcommon.h>

#include "encode.h"
#include "gpu.h"
#include "toolbox/utils.h"

#define HEVC_SLICE_P 1
#define HEVC_SLICE_I 2

#define HEVC_NAL_IDR_W_RADL 19

struct EncodeContext {
  struct GpuContext* gpu_context;
  uint32_t width;
  uint32_t height;

  VAEncSequenceParameterBufferHEVC seq;
  VAEncPictureParameterBufferHEVC pic;
  VAEncMiscParameterRateControl rc;
  VAEncMiscParameterFrameRate fr;

  int render_node;
  VADisplay va_display;
  VAConfigID va_config_id;
  VAContextID va_context_id;
  VASurfaceID input_surface_id;
  struct GpuFrame* gpu_frame;

  VASurfaceID recon_surface_ids[4];
  VABufferID output_buffer_id;

static const char* VaErrorString(VAStatus error) {
  static const char* va_error_strings[] = {
  return VA_STATUS_SUCCESS <= error && error <= VA_STATUS_ERROR_TIMEDOUT
             ? va_error_strings[error - VA_STATUS_SUCCESS]
             : "???";

static void OnVaLogMessage(void* context, const char* message) {
  size_t len = strlen(message);
  while (message[len - 1] == '\n') len--;
  LOG("%.*s", (int)len, message);

static struct GpuFrame* VaSurfaceToGpuFrame(VADisplay va_display,
                                            VASurfaceID va_surface_id,
                                            struct GpuContext* gpu_context) {
  VADRMPRIMESurfaceDescriptor prime;
  VAStatus status = vaExportSurfaceHandle(
      va_display, va_surface_id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to export va surface (%s)", VaErrorString(status));
    return NULL;

  struct GpuFramePlane planes[] = {{.dmabuf_fd = -1},
                                   {.dmabuf_fd = -1},
                                   {.dmabuf_fd = -1},
                                   {.dmabuf_fd = -1}};
  static_assert(LENGTH(planes) == LENGTH(prime.layers[0].object_index),
                "Suspicious VADRMPRIMESurfaceDescriptor structure");

  for (size_t i = 0; i < prime.layers[0].num_planes; i++) {
    uint32_t object_index = prime.layers[0].object_index[i];
    planes[i] = (struct GpuFramePlane){
        .dmabuf_fd = prime.objects[object_index].fd,
        .pitch = prime.layers[0].pitch[i],
        .offset = prime.layers[0].offset[i],
        .modifier = prime.objects[object_index].drm_format_modifier,

  struct GpuFrame* gpu_frame =
      GpuContextCreateFrame(gpu_context, prime.width, prime.height,
                            prime.fourcc, prime.layers[0].num_planes, planes);
  if (!gpu_frame) {
    LOG("Failed to create gpu frame");
    goto release_planes;
  return gpu_frame;

  CloseUniqueFds((int[]){planes[0].dmabuf_fd, planes[1].dmabuf_fd,
                         planes[2].dmabuf_fd, planes[3].dmabuf_fd});
  return NULL;

struct EncodeContext* EncodeContextCreate(struct GpuContext* gpu_context,
                                          uint32_t width, uint32_t height,
                                          enum YuvColorspace colorspace,
                                          enum YuvRange range) {
  struct EncodeContext* encode_context = malloc(sizeof(struct EncodeContext));
  if (!encode_context) {
    LOG("Faield to allocate encode context (%s)", strerror(errno));
    return NULL;

  // TODO(mburakov): ffmpeg attempts to deduce this.
  static const uint32_t min_cb_size = 16;
  uint32_t width_in_cb = (width + min_cb_size - 1) / min_cb_size;
  uint32_t height_in_cb = (height + min_cb_size - 1) / min_cb_size;

  // TODO(mburakov): in the same deduction slice block size is set to 32.

  *encode_context = (struct EncodeContext){
      .gpu_context = gpu_context,
      .width = width,
      .height = height,

  // TODO(mburakov): ffmpeg initializes SPS like this.
  encode_context->seq = (VAEncSequenceParameterBufferHEVC){
      .general_profile_idc = 1,  // Main profile
      .general_level_idc = 120,  // Level 4
      .general_tier_flag = 0,    // Main tier

      .intra_period = 120,      // Where this one comes from?
      .intra_idr_period = 120,  // Each I frame is an IDR frame
      .ip_period = 1,           // No B-frames
      .bits_per_second = 0,     // To be configured later?

      .pic_width_in_luma_samples = (uint16_t)(width_in_cb * min_cb_size),
      .pic_height_in_luma_samples = (uint16_t)(height_in_cb * min_cb_size),

      .seq_fields.bits =
              .chroma_format_idc = 1,                    // 4:2:0
              .separate_colour_plane_flag = 0,           // Table 6-1
              .bit_depth_luma_minus8 = 0,                // 8 bpp luma
              .bit_depth_chroma_minus8 = 0,              // 8 bpp chroma
              .scaling_list_enabled_flag = 0,            // ???
              .strong_intra_smoothing_enabled_flag = 0,  // ???

              // mburakov: ffmpeg hardcodes these for i965 Skylake driver.
              .amp_enabled_flag = 1,
              .sample_adaptive_offset_enabled_flag = 0,
              .pcm_enabled_flag = 0,
              .pcm_loop_filter_disabled_flag = 0,  // ???
              .sps_temporal_mvp_enabled_flag = 0,

              // TODO(mburakov): ffmeg does not set below flags.
              // .low_delay_seq = 0,     // Probably should be 1
              // .hierachical_flag = 0,  // ???

      // mburakov: ffmpeg hardcodes these for i965 Skylake driver.
      .log2_min_luma_coding_block_size_minus3 = 0,
      .log2_diff_max_min_luma_coding_block_size = 2,
      .log2_min_transform_block_size_minus2 = 0,
      .log2_diff_max_min_transform_block_size = 3,
      .max_transform_hierarchy_depth_inter = 3,
      .max_transform_hierarchy_depth_intra = 3,

      .pcm_sample_bit_depth_luma_minus1 = 0,            // ???
      .pcm_sample_bit_depth_chroma_minus1 = 0,          // ???
      .log2_min_pcm_luma_coding_block_size_minus3 = 0,  // ???
      .log2_max_pcm_luma_coding_block_size_minus3 = 0,  // ???

      // mburakov: ffmpeg hardcodes this to 0.
      .vui_parameters_present_flag = 0,

      // TODO(mburakov): ffmpeg leaves rest of the structure zero-initialized.

  // TODO(mburakov): ffmpeg initializes PPS like this.
  encode_context->pic = (VAEncPictureParameterBufferHEVC){
      .decoded_curr_pic.picture_id = VA_INVALID_ID,
      .decoded_curr_pic.flags = VA_PICTURE_HEVC_INVALID,

      .coded_buf = VA_INVALID_ID,
      .collocated_ref_pic_index =
              ? 0
              : 0xff,

      .last_picture = 0,

      // mburakov: ffmpeg hardcodes initial value for non-CQP rate control.
      .pic_init_qp = 30,
      .diff_cu_qp_delta_depth = 0,
      .pps_cb_qp_offset = 0,
      .pps_cr_qp_offset = 0,

      .num_tile_columns_minus1 = 0,  // No tiles
      .num_tile_rows_minus1 = 0,     // No tiles

      .log2_parallel_merge_level_minus2 = 0,  // ???
      // mburakov: ffmpeg hardcodes this to 0.
      .ctu_max_bitsize_allowed = 0,

      // mburakov: ffmpeg hardcodes both to 0.
      .num_ref_idx_l0_default_active_minus1 = 0,
      .num_ref_idx_l1_default_active_minus1 = 0,

      // TODO(mburakov): Should this be incremented on IDR?
      .slice_pic_parameter_set_id = 0,

      // TODO(mburakov): ffmeg does not set below value.
      // .nal_unit_type = 0,

      .pic_fields.bits =
              // mburakov: ffmpeg sets the flags below for each picture.
              // .idr_pic_flag = 0,
              // .coding_type = 0,
              // .reference_pic_flag = 0,

              // TODO(mburakov): ffmpeg does not set the flag below.
              // .dependent_slice_segments_enabled_flag = 0,

              .sign_data_hiding_enabled_flag = 0,  // ???
              .constrained_intra_pred_flag = 0,    // ???

              // TODO(mburakov): ffmpeg attempts to deduce the flag below.
              .transform_skip_enabled_flag = 0,

              // mburakov: ffmpeg enables thit for non-CQP rate control.
              .cu_qp_delta_enabled_flag = 1,

              .weighted_pred_flag = 0,                     // ???
              .weighted_bipred_flag = 0,                   // ???
              .transquant_bypass_enabled_flag = 0,         // ???
              .tiles_enabled_flag = 0,                     // No tiles
              .entropy_coding_sync_enabled_flag = 0,       // ???
              .loop_filter_across_tiles_enabled_flag = 0,  // No tiles

              // mburakov: ffmpeg hardcodes the flag below.
              .pps_loop_filter_across_slices_enabled_flag = 1,

              .scaling_list_data_present_flag = 0,  // ???

              // mburakov: ffmpeg hardcodes the flags below.
              .screen_content_flag = 0,
              .enable_gpu_weighted_prediction = 0,
              .no_output_of_prior_pics_flag = 0,

      // TODO(mburakov): ffmpeg does not set values below.
      // .hierarchical_level_plus1 = 0,  // ???
      // .scc_fields.value = 0,          // ???

  // TODO(mburakov): ffmpeg initializes RC like this:
  encode_context->rc = (VAEncMiscParameterRateControl){
      .bits_per_second = 0,      // Hardcoded for non-bitrate
      .target_percentage = 100,  // Hardcoded for non-bitrate
      .window_size = 1000,       // Hardcoded for non-AVBR
      .initial_qp = 0,           // Hardcoded
      .min_qp = 0,               // Comes from context
      .basic_unit_size = 0,      // Hardcoded
      .ICQ_quality_factor = 28,  // Comes from context - clipped [1, 51]
      .max_qp = 0,               // Comes from context
      .quality_factor = 28,      // Comes from context - non-clipped

      // TODO(mburakov): ffmpeg does not set below value.
      // .target_frame_size = 0,

  // TODO(mburakov): ffmpeg initializes FR like this:
  encode_context->fr = (VAEncMiscParameterFrameRate){
      .framerate = (1 << 16) | 60,  // Comes from context

      // TODO(mburakov): ffmpeg does not set below value.
      // .framerate_flags.value = 0,

  encode_context->render_node = open("/dev/dri/renderD128", O_RDWR);
  if (encode_context->render_node == -1) {
    LOG("Failed to open render node (%s)", strerror(errno));
    goto rollback_encode_context;

  encode_context->va_display = vaGetDisplayDRM(encode_context->render_node);
  if (!encode_context->va_display) {
    LOG("Failed to get va display (%s)", strerror(errno));
    goto rollback_render_node;

  vaSetErrorCallback(encode_context->va_display, OnVaLogMessage, NULL);

#ifndef NDEBUG
  vaSetInfoCallback(encode_context->va_display, OnVaLogMessage, NULL);
#endif  // NDEBUG

  int major, minor;
  VAStatus status = vaInitialize(encode_context->va_display, &major, &minor);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to initialize va (%s)", VaErrorString(status));
    goto rollback_va_display;

  LOG("Initialized VA %d.%d", major, minor);
  // TODO(mburakov): Check entry points?

#if 1
  VAConfigAttrib config_attribs[] = {
      {.type = VAConfigAttribRTFormat, .value = VA_RT_FORMAT_YUV420},
      {.type = VAConfigAttribRateControl, .value = VA_RC_ICQ},
      {.type = VAConfigAttribEncPackedHeaders,
  status = vaCreateConfig(
      encode_context->va_display, VAProfileHEVCMain, VAEntrypointEncSlice,
      config_attribs, LENGTH(config_attribs), &encode_context->va_config_id);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create va config (%s)", VaErrorString(status));
    goto rollback_va_display;

  status = vaCreateContext(
      encode_context->va_display, encode_context->va_config_id, (int)width,
      (int)height, VA_PROGRESSIVE, NULL, 0, &encode_context->va_context_id);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create va context (%s)", VaErrorString(status));
    goto rollback_va_config_id;

  status =
      vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420, width,
                       height, &encode_context->input_surface_id, 1, NULL, 0);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create va input surface (%s)", VaErrorString(status));
    goto rollback_va_context_id;

  encode_context->gpu_frame = VaSurfaceToGpuFrame(
      encode_context->va_display, encode_context->input_surface_id,
  if (!encode_context->gpu_frame) {
    LOG("Failed to convert va surface to gpu frame");
    goto rollback_input_surface_id;

  status =
      vaCreateSurfaces(encode_context->va_display, VA_RT_FORMAT_YUV420,
                       width_in_cb * min_cb_size, height_in_cb * min_cb_size,
                       LENGTH(encode_context->recon_surface_ids), NULL, 0);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create va recon surfaces (%s)", VaErrorString(status));
    goto rollback_gpu_frame;

  unsigned int max_encoded_size =
      encode_context->width * encode_context->height * 3 / 2;
  status =
      vaCreateBuffer(encode_context->va_display, encode_context->va_context_id,
                     VAEncCodedBufferType, max_encoded_size, 1, NULL,
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create va output buffer (%s)", VaErrorString(status));
    goto rollback_recon_surface_ids;
  return encode_context;

                    &encode_context->input_surface_id, 1);
  vaDestroyContext(encode_context->va_display, encode_context->va_config_id);
  vaDestroyConfig(encode_context->va_display, encode_context->va_config_id);
  return NULL;

const struct GpuFrame* EncodeContextGetFrame(
    struct EncodeContext* encode_context) {
  return encode_context->gpu_frame;

static bool UploadBuffer(const struct EncodeContext* encode_context,
                         VABufferType va_buffer_type, unsigned int size,
                         void* data, VABufferID** presult) {
  VAStatus status =
      vaCreateBuffer(encode_context->va_display, encode_context->va_context_id,
                     va_buffer_type, size, 1, data, *presult);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to create buffer (%s)", VaErrorString(status));
    return false;
  return true;

static bool UploadMiscBuffer(const struct EncodeContext* encode_context,
                             VAEncMiscParameterType misc_parameter_type,
                             unsigned int size, const void* data,
                             VABufferID** presult) {
  uint8_t stack_allocated_storage[sizeof(VAEncMiscParameterBuffer) + size];
  VAEncMiscParameterBuffer* buffer =
  buffer->type = misc_parameter_type;
  memcpy(buffer->data, data, size);
  return UploadBuffer(encode_context, VAEncMiscParameterBufferType,
                      (unsigned int)sizeof(stack_allocated_storage),
                      stack_allocated_storage, presult);

bool EncodeContextEncodeFrame(struct EncodeContext* encode_context, int fd) {
  VABufferID buffers[8];
  VABufferID* buffer_ptr = buffers;
  if (!UploadBuffer(encode_context, VAEncSequenceParameterBufferType,
                    sizeof(encode_context->seq), &encode_context->seq,
                    &buffer_ptr)) {
    LOG("Failed to upload sequence parameter buffer");
    return false;

  bool result = false;
  bool idr = true;
  if (idr) {
    if (!UploadMiscBuffer(encode_context, VAEncMiscParameterTypeRateControl,
                          sizeof(encode_context->rc), &encode_context->rc,
                          &buffer_ptr)) {
      LOG("Failed to upload rate control buffer");
      goto rollback_buffers;
    if (!UploadMiscBuffer(encode_context, VAEncMiscParameterTypeFrameRate,
                          sizeof(encode_context->fr), &encode_context->fr,
                          &buffer_ptr)) {
      LOG("Failed to upload frame rate buffer");
      goto rollback_buffers;

  // TODO(mburakov): Implement this!!!
  encode_context->pic.decoded_curr_pic = (VAPictureHEVC){
      .picture_id = encode_context->recon_surface_ids[0],  // recon
      .pic_order_cnt = 0,  // pic->display_order - hpic->last_idr_frame
      .flags = 0,
  for (size_t i = 0; i < LENGTH(encode_context->pic.reference_frames); i++) {
    encode_context->pic.reference_frames[i] = (VAPictureHEVC){
        .picture_id = VA_INVALID_ID,
        .flags = VA_PICTURE_HEVC_INVALID,
  encode_context->pic.coded_buf = encode_context->output_buffer_id;
  encode_context->pic.nal_unit_type = HEVC_NAL_IDR_W_RADL;
  encode_context->pic.pic_fields.bits.idr_pic_flag = 1;
  encode_context->pic.pic_fields.bits.coding_type = 1;
  encode_context->pic.pic_fields.bits.reference_pic_flag = 1;
  if (!UploadBuffer(encode_context, VAEncPictureParameterBufferType,
                    sizeof(encode_context->pic), &encode_context->pic,
                    &buffer_ptr)) {
    LOG("Failed to upload picture parameter buffer");
    goto rollback_buffers;

  // VAEncSequenceParameterBufferType
  // if (IDR) {
  //   VAEncMiscParameterTypeRateControl
  //   VAEncMiscParameterTypeFrameRate
  // }
  // VAEncPictureParameterBufferType
  // if (IDR) {
  //   Packed VPS
  //   Packed SPS
  //   Packed PPS
  //   Packed AU
  // }
  // VAEncSliceParameterBufferHEVC
  // Packed slice header

  // slice_block_rows(34) = (height + slice_block_size - 1) / slice_block_size;
  // slice_block_cols(60) = (width + slice_block_size - 1) / slice_block_size;

  // TODO(mburakov): see comment in EncodeContextCreate.
  static const uint32_t slice_block_size = 32;
  uint32_t slice_block_rows =
      (encode_context->width + slice_block_size - 1) / slice_block_size;
  uint32_t slice_block_cols =
      (encode_context->height + slice_block_size - 1) / slice_block_size;
  uint32_t block_size = slice_block_rows * slice_block_cols;

  VAEncSliceParameterBufferHEVC slice = {
      .slice_segment_address = 0,  // calculated
      .num_ctu_in_slice = block_size,

      .slice_type = HEVC_SLICE_I,  // calculated
      .slice_pic_parameter_set_id =

      .num_ref_idx_l0_active_minus1 =
      .num_ref_idx_l1_active_minus1 =

      .luma_log2_weight_denom = 0,          // ???
      .delta_chroma_log2_weight_denom = 0,  // ???

      // TODO(mburakov): ffmpeg does not initialize below entries.
      // .delta_luma_weight_l0[15],
      // .luma_offset_l0[15],
      // .delta_chroma_weight_l0[15][2],
      // .chroma_offset_l0[15][2],
      // .delta_luma_weight_l1[15],
      // .luma_offset_l1[15],
      // .delta_chroma_weight_l1[15][2],
      // .chroma_offset_l1[15][2],

      .max_num_merge_cand = 5 - 0,  // ???
      .slice_qp_delta = 0,          // evals to zero for CQP???
      .slice_cb_qp_offset = 0,      // ???
      .slice_cr_qp_offset = 0,      // ???

      .slice_beta_offset_div2 = 0,  // ???
      .slice_tc_offset_div2 = 0,    // ???

      .slice_fields.bits =
              // TODO(mburakov): We only have a single slice?
              .last_slice_of_pic_flag = 1,
              .dependent_slice_segment_flag = 0,  // ???
              .colour_plane_id = 0,               // ???
              .slice_temporal_mvp_enabled_flag =
              .slice_sao_luma_flag = encode_context->seq.seq_fields.bits
              .slice_sao_chroma_flag = encode_context->seq.seq_fields.bits
              .num_ref_idx_active_override_flag = 0,              // ???
              .mvd_l1_zero_flag = 0,                              // ???
              .cabac_init_flag = 0,                               // ???
              .slice_deblocking_filter_disabled_flag = 0,         // ???
              .slice_loop_filter_across_slices_enabled_flag = 0,  // ???
              .collocated_from_l0_flag = 0,                       // ???

      // TODO(mburakov): ffmpeg does not initialize below entries.
      // .pred_weight_table_bit_offset = 0,
      // .pred_weight_table_bit_length = 0,

  for (size_t i = 0; i < LENGTH(slice.ref_pic_list0); i++) {
    slice.ref_pic_list0[i].picture_id = VA_INVALID_ID;
    slice.ref_pic_list0[i].flags = VA_PICTURE_HEVC_INVALID;
    slice.ref_pic_list1[i].picture_id = VA_INVALID_ID;
    slice.ref_pic_list1[i].flags = VA_PICTURE_HEVC_INVALID;

  // TODO(mburakov): ffmpeg assign reference frame for non-I-frames here.

  if (!UploadBuffer(encode_context, VAEncSliceParameterBufferType,
                    sizeof(slice), &slice, &buffer_ptr)) {
    LOG("Failed to upload slice parameter buffer");
    goto rollback_buffers;

  VAStatus status =
      vaBeginPicture(encode_context->va_display, encode_context->va_context_id,
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to begin va picture (%s)", VaErrorString(status));
    goto rollback_buffers;

  int num_buffers = (int)(buffer_ptr - buffers);
  status = vaRenderPicture(encode_context->va_display,
                           encode_context->va_context_id, buffers, num_buffers);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to render va picture (%s)", VaErrorString(status));
    goto rollback_buffers;

  status =
      vaEndPicture(encode_context->va_display, encode_context->va_context_id);
  if (status != VA_STATUS_SUCCESS) {
    LOG("Failed to end va picture (%s)", VaErrorString(status));
    goto rollback_buffers;
  result = false;

  while (buffer_ptr-- > buffers)
    vaDestroyBuffer(encode_context->va_display, *buffer_ptr);
  return result;

void EncodeContextDestroy(struct EncodeContext* encode_context) {
  vaDestroyBuffer(encode_context->va_display, encode_context->output_buffer_id);
                    &encode_context->input_surface_id, 1);
  vaDestroyContext(encode_context->va_display, encode_context->va_config_id);
  vaDestroyConfig(encode_context->va_display, encode_context->va_config_id);