From 3fe1d2756c5a0b4344fb2c811399b5012d084f70 Mon Sep 17 00:00:00 2001
From: chaos <chaos@owo.monster>
Date: Wed, 4 Sep 2024 21:44:25 +0100
Subject: [PATCH] attempt to patch kodi

---
 hosts/raspberry-pi5/hardware.nix              |     7 +
 hosts/raspberry-pi5/profiles/kodi.nix         |    24 +-
 .../profiles/rbp-000-add-pi-support.patch     | 25991 ++++++++++++++++
 hosts/raspberry-pi5/raspberry-pi5.nix         |     7 -
 outputs.nix                                   |     2 +
 5 files changed, 26023 insertions(+), 8 deletions(-)
 create mode 100644 hosts/raspberry-pi5/profiles/rbp-000-add-pi-support.patch

diff --git a/hosts/raspberry-pi5/hardware.nix b/hosts/raspberry-pi5/hardware.nix
index 960a940..d003918 100644
--- a/hosts/raspberry-pi5/hardware.nix
+++ b/hosts/raspberry-pi5/hardware.nix
@@ -83,6 +83,13 @@ in {
     presets.nixos.serverEncryptedDrive
   ];
 
+  nixpkgs.overlays = [
+    (_final: prev: {
+      makeModulesClosure = x:
+        prev.makeModulesClosure (x // {allowMissing = true;});
+    })
+  ];
+
   services.xserver.videoDrivers = [
     "modesetting"
     "fbdev"
diff --git a/hosts/raspberry-pi5/profiles/kodi.nix b/hosts/raspberry-pi5/profiles/kodi.nix
index 160af05..0996ed6 100644
--- a/hosts/raspberry-pi5/profiles/kodi.nix
+++ b/hosts/raspberry-pi5/profiles/kodi.nix
@@ -3,7 +3,23 @@
   inputs,
   ...
 }: let
-  kodiPackage = inputs.nixpkgs-kodi-inputstream-adaptive-fork.legacyPackages.${pkgs.system}.kodi-wayland.withPackages (kodiPkgs:
+  kodiForkPkg = inputs.nixpkgs-kodi-inputstream-adaptive-fork.legacyPackages.${pkgs.system}.kodi;
+
+  kodiForkPkgPatched =
+    (kodiForkPkg.override {
+      x11Support = true;
+      waylandSupport = true;
+      gbmSupport = true;
+    })
+    .overrideAttrs (oldAttrs: {
+      patches =
+        oldAttrs.patches
+        ++ [
+          ./rbp-000-add-pi-support.patch
+        ];
+    });
+
+  kodiPackage = kodiForkPkgPatched.withPackages (kodiPkgs:
     with kodiPkgs; [
       pvr-iptvsimple
       inputstreamhelper
@@ -16,6 +32,12 @@
       visualization-spectrum
     ]);
 in {
+  nixpkgs.overlays = [
+    (final: _prev: {
+      ffmpeg = final.ffmpeg-pi;
+    })
+  ];
+
   environment.systemPackages = [
     kodiPackage
   ];
diff --git a/hosts/raspberry-pi5/profiles/rbp-000-add-pi-support.patch b/hosts/raspberry-pi5/profiles/rbp-000-add-pi-support.patch
new file mode 100644
index 0000000..6fa9b91
--- /dev/null
+++ b/hosts/raspberry-pi5/profiles/rbp-000-add-pi-support.patch
@@ -0,0 +1,25991 @@
+From 31acfdc558652ea480c773f095ab675218af8195 Mon Sep 17 00:00:00 2001
+From: Sam Nazarko <email@samnazarko.co.uk>
+Date: Tue, 2 Jun 2015 22:56:15 +0100
+Subject: [PATCH 01/24] Fix UPower capability detection on Vero
+
+Signed-off-by: Sam Nazarko <email@samnazarko.co.uk>
+---
+ xbmc/platform/linux/powermanagement/LogindUPowerSyscall.cpp | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xbmc/platform/linux/powermanagement/LogindUPowerSyscall.cpp b/xbmc/platform/linux/powermanagement/LogindUPowerSyscall.cpp
+index bd04197a51..7cb68a19b4 100644
+--- a/xbmc/platform/linux/powermanagement/LogindUPowerSyscall.cpp
++++ b/xbmc/platform/linux/powermanagement/LogindUPowerSyscall.cpp
+@@ -39,8 +39,8 @@ CLogindUPowerSyscall::CLogindUPowerSyscall()
+   if (!m_hasUPower)
+     CLog::Log(LOGINFO, "LogindUPowerSyscall - UPower not found, battery information will not be available");
+ 
+-  m_canPowerdown = LogindCheckCapability("CanPowerOff");
+-  m_canReboot    = LogindCheckCapability("CanReboot");
++  m_canPowerdown = true;
++  m_canReboot    = true;
+   m_canHibernate = LogindCheckCapability("CanHibernate");
+   m_canSuspend   = LogindCheckCapability("CanSuspend");
+ 
+-- 
+2.34.1
+
+
+From 4ae22b3482359e9ce9a015b61001eb1e9385c19c Mon Sep 17 00:00:00 2001
+From: Sam Nazarko <email@samnazarko.co.uk>
+Date: Thu, 21 Dec 2017 11:38:02 +0000
+Subject: [PATCH 02/24] Add OSMC Helper routines to improve Kodi integration
+ with OSMC
+
+Signed-off-by: Sam Nazarko <email@samnazarko.co.uk>
+---
+ xbmc/CMakeLists.txt |  2 ++
+ xbmc/OSMCHelper.cpp | 36 ++++++++++++++++++++++++++++++++++++
+ xbmc/OSMCHelper.h   | 38 ++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 76 insertions(+)
+ create mode 100644 xbmc/OSMCHelper.cpp
+ create mode 100644 xbmc/OSMCHelper.h
+
+diff --git a/xbmc/CMakeLists.txt b/xbmc/CMakeLists.txt
+index b7c838b3da..3ddcc4da76 100644
+--- a/xbmc/CMakeLists.txt
++++ b/xbmc/CMakeLists.txt
+@@ -16,6 +16,7 @@ set(SOURCES AutoSwitch.cpp
+             LangInfo.cpp
+             MediaSource.cpp
+             NfoFile.cpp
++            OSMCHelper.cpp
+             PasswordManager.cpp
+             PlayListPlayer.cpp
+             PartyModeManager.cpp
+@@ -58,6 +59,7 @@ set(HEADERS AutoSwitch.h
+             LockType.h
+             MediaSource.h
+             NfoFile.h
++	    OSMCHelper.h
+             PartyModeManager.h
+             PasswordManager.h
+             PlayListPlayer.h
+diff --git a/xbmc/OSMCHelper.cpp b/xbmc/OSMCHelper.cpp
+new file mode 100644
+index 0000000000..2b605881b1
+--- /dev/null
++++ b/xbmc/OSMCHelper.cpp
+@@ -0,0 +1,36 @@
++/*                                                   
++ *      Copyright (C) 2005-2013 Team XBMC            
++ *      http://xbmc.org                              
++ *                                                   
++ *  This Program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2, or (at your option) 
++ *  any later version.                                                  
++ *                                                                      
++ *  This Program is distributed in the hope that it will be useful,     
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of      
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the        
++ *  GNU General Public License for more details.                        
++ *                                                                      
++ *  You should have received a copy of the GNU General Public License   
++ *  along with XBMC; see the file COPYING.  If not, see                 
++ *  <http://www.gnu.org/licenses/>.                                     
++ *                                                                      
++ */                                                                     
++// OSMCHelper.cpp: implementation of OSMC helper routines
++//                                                                       
++//////////////////////////////////////////////////////////////////////   
++                                                                         
++extern "C" {                                                            
++#include "OSMCHelper.h"                                                 
++   #if defined(__arm__)                                                 
++     /* Ensure that uname returns arm, or machine model will reflect kernel bitness only */
++     int uname(struct utsname *buf)                                                        
++     {                                                                                     
++       int r;                                                                              
++       r = syscall(SYS_uname, buf);                                                        
++       strcpy(buf->machine, "armv7");                                                        
++       return r;                                                                           
++     }                                                                                     
++   #endif // __arm__                                                                       
++}                                                                                          
+diff --git a/xbmc/OSMCHelper.h b/xbmc/OSMCHelper.h
+new file mode 100644
+index 0000000000..709d21afbe
+--- /dev/null
++++ b/xbmc/OSMCHelper.h
+@@ -0,0 +1,38 @@
++/*                                                                                         
++ *      Copyright (C) 2005-2013 Team XBMC                                                  
++ *      http://xbmc.org                                                                    
++ *                                                                                         
++ *  This Program is free software; you can redistribute it and/or modify                   
++ *  it under the terms of the GNU General Public License as published by                   
++ *  the Free Software Foundation; either version 2, or (at your option)                    
++ *  any later version.                                                                     
++ *                                                                                         
++ *  This Program is distributed in the hope that it will be useful,                        
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of                         
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ *  GNU General Public License for more details.                                           
++ *                                                                                         
++ *  You should have received a copy of the GNU General Public License                      
++ *  along with XBMC; see the file COPYING.  If not, see                                    
++ *  <http://www.gnu.org/licenses/>.                                                        
++ *                                                                                         
++ */                                                                                        
++// OSMCHelper.h: routines to improve behaviour of Kodi on OSMC                             
++//                                                                                         
++//////////////////////////////////////////////////////////////////////                     
++                                                                                           
++#pragma once                                                                               
++                                                                                           
++#include <sys/syscall.h>                                                                   
++#include <sys/types.h>                                                                     
++#include <sys/utsname.h>                                                                   
++#include <unistd.h>                                                                        
++#include <stdio.h>                                                                         
++#include <string.h>                                                                        
++                                                                                           
++extern "C" {                                                                               
++    #if defined(__arm__)                                                                   
++      /* Fix up uname for 64-bit kernels with 32-bit userland */                           
++      int uname(struct utsname *buf);                                                      
++    #endif // __arm__                                                                      
++}                                                                                          
+-- 
+2.34.1
+
+
+From 74b81e0fe134756e283f93b2acd20fada7bdc545 Mon Sep 17 00:00:00 2001
+From: Sam Nazarko <email@samnazarko.co.uk>
+Date: Sat, 6 Mar 2021 18:29:22 +0000
+Subject: [PATCH 03/24] Enable DRM-PRIME for Raspberry Pi
+
+Signed-off-by: Sam Nazarko <email@samnazarko.co.uk>
+---
+ system/settings/linux.xml | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/system/settings/linux.xml b/system/settings/linux.xml
+index 531974f3f4..89b91db23b 100644
+--- a/system/settings/linux.xml
++++ b/system/settings/linux.xml
+@@ -165,12 +165,12 @@
+           <requirement>HAS_GLES</requirement>
+           <visible>false</visible>
+           <level>2</level>
+-          <default>false</default>
++          <default>true</default>
+           <control type="toggle" />
+         </setting>
+         <setting id="videoplayer.useprimedecoderforhw" type="boolean" parent="videoplayer.useprimedecoder" label="13438" help="36172">
+           <requirement>HAS_GLES</requirement>
+-          <visible>false</visible>
++          <visible>true</visible>
+           <dependencies>
+             <dependency type="enable">
+               <condition setting="videoplayer.useprimedecoder" operator="is">true</condition>
+@@ -184,7 +184,7 @@
+           <requirement>HAS_GLES</requirement>
+           <visible>false</visible>
+           <level>2</level>
+-          <default>1</default>
++          <default>0</default>
+           <constraints>
+             <options>
+               <option label="13464">0</option> <!-- DIRECT -->
+-- 
+2.34.1
+
+
+From 945c325a7ff47f0aa9ed3988349bfb7bee7938a9 Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Mon, 20 Jan 2020 16:53:52 +0000
+Subject: [PATCH 04/24] ffmpeg: Add v4l2 support
+
+---
+ cmake/modules/FindFFMPEG.cmake                |     5 +-
+ .../0001-rpi-Add-hevc-acceleration.patch      | 23708 ++++++++++++++++
+ tools/depends/target/ffmpeg/CMakeLists.txt    |     5 +
+ 3 files changed, 23717 insertions(+), 1 deletion(-)
+ create mode 100644 tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch
+
+diff --git a/cmake/modules/FindFFMPEG.cmake b/cmake/modules/FindFFMPEG.cmake
+index 6c6bf973de..13c810591b 100644
+--- a/cmake/modules/FindFFMPEG.cmake
++++ b/cmake/modules/FindFFMPEG.cmake
+@@ -80,7 +80,10 @@ macro(buildFFMPEG)
+                  -DPKG_CONFIG_PATH=${CMAKE_BINARY_DIR}/${CORE_BUILD_DIR}/lib/pkgconfig)
+   set(PATCH_COMMAND ${CMAKE_COMMAND} -E copy
+                     ${CMAKE_SOURCE_DIR}/tools/depends/target/ffmpeg/CMakeLists.txt
+-                    <SOURCE_DIR>)
++                    <SOURCE_DIR> &&
++                    patch -p1 < ${CMAKE_SOURCE_DIR}/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch &&
++                    echo "########################################## patched ffmpeg ##############################"
++                    )
+ 
+   if(CMAKE_GENERATOR STREQUAL Xcode)
+     set(FFMPEG_GENERATOR CMAKE_GENERATOR "Unix Makefiles")
+diff --git a/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch b/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch
+new file mode 100644
+index 0000000000..54573fab28
+--- /dev/null
++++ b/tools/depends/target/ffmpeg/0001-rpi-Add-hevc-acceleration.patch
+@@ -0,0 +1,23708 @@
++diff --git a/configure b/configure
++index b6616f00b6..94c8161b91 100755
++--- a/configure
+++++ b/configure
++@@ -205,6 +205,7 @@ External library support:
++   --disable-bzlib          disable bzlib [autodetect]
++   --disable-coreimage      disable Apple CoreImage framework [autodetect]
++   --enable-chromaprint     enable audio fingerprinting with chromaprint [no]
+++  --disable-epoxy          disable epoxy [autodetect]
++   --enable-frei0r          enable frei0r video filtering [no]
++   --enable-gcrypt          enable gcrypt, needed for rtmp(t)e support
++                            if openssl, librtmp or gmp is not used [no]
++@@ -281,6 +282,7 @@ External library support:
++                            if openssl, gnutls or mbedtls is not used [no]
++   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
++   --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
+++  --disable-libudev        disable libudev [autodetect]
++   --enable-libv4l2         enable libv4l2/v4l-utils [no]
++   --enable-libvidstab      enable video stabilization using vid.stab [no]
++   --enable-libvmaf         enable vmaf filter via libvmaf [no]
++@@ -344,12 +346,16 @@ External library support:
++   --enable-libvpl          enable Intel oneVPL code via libvpl if libmfx is not used [no]
++   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
++   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
+++  --enable-sand            enable sand video formats [rpi]
+++  --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
+++  --enable-vout-egl        enable the vout_egl module - for internal testing only [no]
++   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
++   --disable-nvenc          disable Nvidia video encoding code [autodetect]
++   --enable-omx             enable OpenMAX IL code [no]
++   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
++   --enable-rkmpp           enable Rockchip Media Process Platform code [no]
++   --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
+++  --enable-v4l2-request    enable V4L2 request API code [no]
++   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
++   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
++   --disable-videotoolbox   disable VideoToolbox code [autodetect]
++@@ -1742,7 +1748,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST="
++     avfoundation
++     bzlib
++     coreimage
+++    epoxy
++     iconv
+++    libudev
++     libxcb
++     libxcb_shm
++     libxcb_shape
++@@ -1913,6 +1921,7 @@ HWACCEL_LIBRARY_LIST="
++     mmal
++     omx
++     opencl
+++    v4l2_request
++ "
++ 
++ DOCUMENT_LIST="
++@@ -1930,10 +1939,14 @@ FEATURE_LIST="
++     omx_rpi
++     runtime_cpudetect
++     safe_bitstream_reader
+++    sand
++     shared
++     small
++     static
++     swscale_alpha
+++    vout_drm
+++    vout_egl
+++    v4l2_req_hevc_vx
++ "
++ 
++ # this list should be kept in linking order
++@@ -2495,6 +2508,7 @@ CONFIG_EXTRA="
++     rtpdec
++     rtpenc_chain
++     rv34dsp
+++    sand
++     scene_sad
++     sinewin
++     snappy
++@@ -2999,6 +3013,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
++ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
++ ffnvcodec_deps_any="libdl LoadLibrary"
++ nvdec_deps="ffnvcodec"
+++v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
++ vaapi_x11_deps="xlib_x11"
++ videotoolbox_hwaccel_deps="videotoolbox pthreads"
++ videotoolbox_hwaccel_extralibs="-framework QuartzCore"
++@@ -3042,6 +3057,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
++ hevc_dxva2_hwaccel_select="hevc_decoder"
++ hevc_nvdec_hwaccel_deps="nvdec"
++ hevc_nvdec_hwaccel_select="hevc_decoder"
+++hevc_v4l2request_hwaccel_deps="v4l2_request"
+++hevc_v4l2request_hwaccel_select="hevc_decoder"
++ hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
++ hevc_vaapi_hwaccel_select="hevc_decoder"
++ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
++@@ -3549,8 +3566,11 @@ sndio_indev_deps="sndio"
++ sndio_outdev_deps="sndio"
++ v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
++ v4l2_indev_suggest="libv4l2"
+++v4l2_outdev_deps="libdrm"
++ v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
++ v4l2_outdev_suggest="libv4l2"
+++vout_drm_outdev_deps="libdrm"
+++vout_egl_outdev_deps="xlib epoxy"
++ vfwcap_indev_deps="vfw32 vfwcap_defines"
++ xcbgrab_indev_deps="libxcb"
++ xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
++@@ -3751,6 +3771,7 @@ tonemap_opencl_filter_deps="opencl const_nan"
++ transpose_opencl_filter_deps="opencl"
++ transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
++ transpose_vulkan_filter_deps="vulkan spirv_compiler"
+++unsand_filter_select="sand"
++ unsharp_opencl_filter_deps="opencl"
++ uspp_filter_deps="gpl avcodec"
++ vaguedenoiser_filter_deps="gpl"
++@@ -6335,6 +6356,12 @@ if enabled xlib; then
++         disable xlib
++ fi
++ 
+++enabled libudev &&
+++    check_pkg_config libudev libudev libudev.h udev_new
+++
+++enabled epoxy &&
+++    check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
+++
++ check_headers direct.h
++ check_headers dirent.h
++ check_headers dxgidebug.h
++@@ -6794,8 +6821,16 @@ enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/r
++                                { enabled libdrm ||
++                                  die "ERROR: rkmpp requires --enable-libdrm"; }
++                              }
+++enabled v4l2_request      && { enabled libdrm ||
+++                               die "ERROR: v4l2-request requires --enable-libdrm"; } &&
+++                             { enabled libudev ||
+++                               die "ERROR: v4l2-request requires libudev"; }
++ enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
++ 
+++enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; }
+++
+++enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } &&
+++                    { enabled xlib  || die "ERROR: vout_egl requires xlib"; }
++ 
++ if enabled gcrypt; then
++     GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
++@@ -6876,6 +6911,10 @@ if enabled v4l2_m2m; then
++     check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
++ fi
++ 
+++check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
+++check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
+++disable v4l2_req_hevc_vx
+++
++ check_headers sys/videoio.h
++ test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
++ 
++@@ -7370,6 +7409,9 @@ check_deps $CONFIG_LIST       \
++ 
++ enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
++ 
+++# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done
+++enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx
+++
++ case $target_os in
++ haiku)
++     disable memalign
++diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
++index d721a5e721..839da7b472 100644
++--- a/fftools/ffmpeg.c
+++++ b/fftools/ffmpeg.c
++@@ -1993,8 +1993,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref
++                        av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout);
++         break;
++     case AVMEDIA_TYPE_VIDEO:
++-        need_reinit |= ifilter->width  != frame->width ||
++-                       ifilter->height != frame->height;
+++        need_reinit |= ifilter->width  != av_frame_cropped_width(frame) ||
+++                       ifilter->height != av_frame_cropped_height(frame);
++         break;
++     }
++ 
++@@ -2005,6 +2005,9 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref
++         (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
++         need_reinit = 1;
++ 
+++    if (no_cvt_hw && fg->graph)
+++        need_reinit = 0;
+++
++     if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) {
++         if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9))
++             need_reinit = 1;
++@@ -2274,8 +2277,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
++         decoded_frame->top_field_first = ist->top_field_first;
++ 
++     ist->frames_decoded++;
++-
++-    if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
+++    if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
++         err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame);
++         if (err < 0)
++             goto fail;
++@@ -2607,7 +2609,12 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
++         case AVMEDIA_TYPE_VIDEO:
++             ret = decode_video    (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt,
++                                    &decode_failed);
++-            if (!repeating || !pkt || got_output) {
+++            // Pi: Do not inc dts if no_cvt_hw set
+++            // V4L2 H264 decode has long latency and sometimes spits out a long
+++            // stream of output without input. In this case incrementing DTS is wrong.
+++            // There may be cases where the condition as written is correct so only
+++            // "fix" in the cases which cause problems
+++            if (!repeating || !pkt || (got_output && !no_cvt_hw)) {
++                 if (pkt && pkt->duration) {
++                     duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
++                 } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
++@@ -2756,12 +2763,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
++             break;
++ 
++         if (ist->hwaccel_id == HWACCEL_GENERIC ||
++-            ist->hwaccel_id == HWACCEL_AUTO) {
+++            ist->hwaccel_id == HWACCEL_AUTO ||
+++            no_cvt_hw) {
++             for (i = 0;; i++) {
++                 config = avcodec_get_hw_config(s->codec, i);
++                 if (!config)
++                     break;
++-                if (!(config->methods &
+++                if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
+++                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
+++                else if (!(config->methods &
++                       AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
++                     continue;
++                 if (config->pix_fmt == *p)
++diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
++index f1412f6446..8f478619b3 100644
++--- a/fftools/ffmpeg.h
+++++ b/fftools/ffmpeg.h
++@@ -729,6 +729,8 @@ extern enum VideoSyncMethod video_sync_method;
++ extern float frame_drop_threshold;
++ extern int do_benchmark;
++ extern int do_benchmark_all;
+++extern int no_cvt_hw;
+++extern int do_deinterlace;
++ extern int do_hex_dump;
++ extern int do_pkt_dump;
++ extern int copy_ts;
++diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
++index 1f5bbf6c4d..f888307762 100644
++--- a/fftools/ffmpeg_filter.c
+++++ b/fftools/ffmpeg_filter.c
++@@ -1281,8 +1281,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame)
++ 
++     ifilter->format = frame->format;
++ 
++-    ifilter->width               = frame->width;
++-    ifilter->height              = frame->height;
+++    ifilter->width               = av_frame_cropped_width(frame);
+++    ifilter->height              = av_frame_cropped_height(frame);
++     ifilter->sample_aspect_ratio = frame->sample_aspect_ratio;
++ 
++     ifilter->sample_rate         = frame->sample_rate;
++diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
++index 88fa782470..740a5e7153 100644
++--- a/fftools/ffmpeg_hw.c
+++++ b/fftools/ffmpeg_hw.c
++@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type)
++     char *name;
++     size_t index_pos;
++     int index, index_limit = 1000;
+++    if (!type_name)
+++        return NULL;
++     index_pos = strlen(type_name);
++     name = av_malloc(index_pos + 4);
++     if (!name)
++diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
++index 055275d813..761db36588 100644
++--- a/fftools/ffmpeg_opt.c
+++++ b/fftools/ffmpeg_opt.c
++@@ -71,6 +71,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO;
++ float frame_drop_threshold = 0;
++ int do_benchmark      = 0;
++ int do_benchmark_all  = 0;
+++int no_cvt_hw         = 0;
++ int do_hex_dump       = 0;
++ int do_pkt_dump       = 0;
++ int copy_ts           = 0;
++@@ -1427,6 +1428,8 @@ const OptionDef options[] = {
++         "add timings for benchmarking" },
++     { "benchmark_all",  OPT_BOOL | OPT_EXPERT,                       { &do_benchmark_all },
++       "add timings for each task" },
+++    { "no_cvt_hw",      OPT_BOOL | OPT_EXPERT,                       { &no_cvt_hw },
+++      "do not auto-convert hw frames to sw" },
++     { "progress",       HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_progress },
++       "write program-readable progress information", "url" },
++     { "stdin",          OPT_BOOL | OPT_EXPERT,                       { &stdin_interaction },
++diff --git a/libavcodec/Makefile b/libavcodec/Makefile
++index 389253f5d0..8b1d669834 100644
++--- a/libavcodec/Makefile
+++++ b/libavcodec/Makefile
++@@ -169,7 +169,10 @@ OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
++ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
++ OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
++ OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
++-OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
+++OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
+++                                          weak_link.o v4l2_req_dmabufs.o
+++OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
+++					  v4l2_req_devscan.o weak_link.o
++ OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
++ OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
++ 
++@@ -996,6 +999,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
++ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
++ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
++ OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
+++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o
+++OBJS-$(CONFIG_V4L2_REQ_HEVC_VX)           += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
++ OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
++ OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
++ OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
++index 39881a1d2b..32bc78e2be 100644
++--- a/libavcodec/avcodec.h
+++++ b/libavcodec/avcodec.h
++@@ -2221,6 +2221,17 @@ typedef struct AVHWAccel {
++      * that avctx->hwaccel_priv_data is invalid.
++      */
++     int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+++
+++    /**
+++     * Called if parsing fails
+++     *
+++     * An error has occured, end_frame will not be called
+++     * start_frame & decode_slice may or may not have been called
+++     * Optional
+++     *
+++     * @param avctx the codec context
+++     */
+++    void (*abort_frame)(AVCodecContext *avctx);
++ } AVHWAccel;
++ 
++ /**
++diff --git a/libavcodec/hevc-ctrls-v1.h b/libavcodec/hevc-ctrls-v1.h
++new file mode 100644
++index 0000000000..72cbba0953
++--- /dev/null
+++++ b/libavcodec/hevc-ctrls-v1.h
++@@ -0,0 +1,229 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/*
+++ * These are the HEVC state controls for use with stateless HEVC
+++ * codec drivers.
+++ *
+++ * It turns out that these structs are not stable yet and will undergo
+++ * more changes. So keep them private until they are stable and ready to
+++ * become part of the official public API.
+++ */
+++
+++#ifndef _HEVC_CTRLS_H_
+++#define _HEVC_CTRLS_H_
+++
+++#include <linux/videodev2.h>
+++
+++/* The pixel format isn't stable at the moment and will likely be renamed. */
+++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
+++
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_MPEG_BASE + 1008)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_MPEG_BASE + 1009)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_MPEG_BASE + 1010)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_MPEG_BASE + 1011)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_MPEG_BASE + 1015)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_MPEG_BASE + 1016)
+++
+++/* enum v4l2_ctrl_type type values */
+++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
+++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
+++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
+++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
+++
+++enum v4l2_mpeg_video_hevc_decode_mode {
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+++};
+++
+++enum v4l2_mpeg_video_hevc_start_code {
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+++};
+++
+++#define V4L2_HEVC_SLICE_TYPE_B	0
+++#define V4L2_HEVC_SLICE_TYPE_P	1
+++#define V4L2_HEVC_SLICE_TYPE_I	2
+++
+++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
+++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
+++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
+++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
+++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
+++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
+++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
+++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
+++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
+++
+++/* The controls are not stable at the moment and will likely be reworked. */
+++struct v4l2_ctrl_hevc_sps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
+++	__u16	pic_width_in_luma_samples;
+++	__u16	pic_height_in_luma_samples;
+++	__u8	bit_depth_luma_minus8;
+++	__u8	bit_depth_chroma_minus8;
+++	__u8	log2_max_pic_order_cnt_lsb_minus4;
+++	__u8	sps_max_dec_pic_buffering_minus1;
+++	__u8	sps_max_num_reorder_pics;
+++	__u8	sps_max_latency_increase_plus1;
+++	__u8	log2_min_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_luma_coding_block_size;
+++	__u8	log2_min_luma_transform_block_size_minus2;
+++	__u8	log2_diff_max_min_luma_transform_block_size;
+++	__u8	max_transform_hierarchy_depth_inter;
+++	__u8	max_transform_hierarchy_depth_intra;
+++	__u8	pcm_sample_bit_depth_luma_minus1;
+++	__u8	pcm_sample_bit_depth_chroma_minus1;
+++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
+++	__u8	num_short_term_ref_pic_sets;
+++	__u8	num_long_term_ref_pics_sps;
+++	__u8	chroma_format_idc;
+++	__u8	sps_max_sub_layers_minus1;
+++
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 0)
+++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
+++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
+++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
+++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
+++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
+++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
+++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
+++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
+++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
+++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
+++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
+++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
+++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
+++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
+++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
+++
+++struct v4l2_ctrl_hevc_pps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
+++	__u8	num_extra_slice_header_bits;
+++	__s8	init_qp_minus26;
+++	__u8	diff_cu_qp_delta_depth;
+++	__s8	pps_cb_qp_offset;
+++	__s8	pps_cr_qp_offset;
+++	__u8	num_tile_columns_minus1;
+++	__u8	num_tile_rows_minus1;
+++	__u8	column_width_minus1[20];
+++	__u8	row_height_minus1[22];
+++	__s8	pps_beta_offset_div2;
+++	__s8	pps_tc_offset_div2;
+++	__u8	log2_parallel_merge_level_minus2;
+++
+++	__u8	padding[4];
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
+++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
+++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
+++
+++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
+++
+++struct v4l2_hevc_dpb_entry {
+++	__u64	timestamp;
+++	__u8	rps;
+++	__u8	field_pic;
+++	__u16	pic_order_cnt[2];
+++	__u8	padding[2];
+++};
+++
+++struct v4l2_hevc_pred_weight_table {
+++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__u8	padding[6];
+++
+++	__u8	luma_log2_weight_denom;
+++	__s8	delta_chroma_log2_weight_denom;
+++};
+++
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 9)
+++
+++struct v4l2_ctrl_hevc_slice_params {
+++	__u32	bit_size;
+++	__u32	data_bit_offset;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u32	slice_segment_addr;
+++	__u32	num_entry_point_offsets;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+++	__u8	nal_unit_type;
+++	__u8	nuh_temporal_id_plus1;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	slice_type;
+++	__u8	colour_plane_id;
+++	__u16	slice_pic_order_cnt;
+++	__u8	num_ref_idx_l0_active_minus1;
+++	__u8	num_ref_idx_l1_active_minus1;
+++	__u8	collocated_ref_idx;
+++	__u8	five_minus_max_num_merge_cand;
+++	__s8	slice_qp_delta;
+++	__s8	slice_cb_qp_offset;
+++	__s8	slice_cr_qp_offset;
+++	__s8	slice_act_y_qp_offset;
+++	__s8	slice_act_cb_qp_offset;
+++	__s8	slice_act_cr_qp_offset;
+++	__s8	slice_beta_offset_div2;
+++	__s8	slice_tc_offset_div2;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+++	__u8	pic_struct;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	num_active_dpb_entries;
+++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++
+++	__u8	num_rps_poc_st_curr_before;
+++	__u8	num_rps_poc_st_curr_after;
+++	__u8	num_rps_poc_lt_curr;
+++
+++	__u8	padding;
+++
+++	__u32	entry_point_offset_minus1[256];
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
+++	struct v4l2_hevc_pred_weight_table pred_weight_table;
+++
+++	__u64	flags;
+++};
+++
+++struct v4l2_ctrl_hevc_scaling_matrix {
+++	__u8	scaling_list_4x4[6][16];
+++	__u8	scaling_list_8x8[6][64];
+++	__u8	scaling_list_16x16[6][64];
+++	__u8	scaling_list_32x32[2][64];
+++	__u8	scaling_list_dc_coef_16x16[6];
+++	__u8	scaling_list_dc_coef_32x32[2];
+++};
+++
+++#endif
++diff --git a/libavcodec/hevc-ctrls-v2.h b/libavcodec/hevc-ctrls-v2.h
++new file mode 100644
++index 0000000000..7cbbbf055f
++--- /dev/null
+++++ b/libavcodec/hevc-ctrls-v2.h
++@@ -0,0 +1,257 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/*
+++ * These are the HEVC state controls for use with stateless HEVC
+++ * codec drivers.
+++ *
+++ * It turns out that these structs are not stable yet and will undergo
+++ * more changes. So keep them private until they are stable and ready to
+++ * become part of the official public API.
+++ */
+++
+++#ifndef _HEVC_CTRLS_H_
+++#define _HEVC_CTRLS_H_
+++
+++#include <linux/videodev2.h>
+++
+++/* The pixel format isn't stable at the moment and will likely be renamed. */
+++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
+++
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_BASE + 1011)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
+++
+++/* enum v4l2_ctrl_type type values */
+++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
+++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
+++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
+++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
+++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
+++
+++enum v4l2_mpeg_video_hevc_decode_mode {
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+++};
+++
+++enum v4l2_mpeg_video_hevc_start_code {
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+++};
+++
+++#define V4L2_HEVC_SLICE_TYPE_B	0
+++#define V4L2_HEVC_SLICE_TYPE_P	1
+++#define V4L2_HEVC_SLICE_TYPE_I	2
+++
+++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
+++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
+++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
+++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
+++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
+++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
+++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
+++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
+++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
+++
+++/* The controls are not stable at the moment and will likely be reworked. */
+++struct v4l2_ctrl_hevc_sps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
+++	__u16	pic_width_in_luma_samples;
+++	__u16	pic_height_in_luma_samples;
+++	__u8	bit_depth_luma_minus8;
+++	__u8	bit_depth_chroma_minus8;
+++	__u8	log2_max_pic_order_cnt_lsb_minus4;
+++	__u8	sps_max_dec_pic_buffering_minus1;
+++	__u8	sps_max_num_reorder_pics;
+++	__u8	sps_max_latency_increase_plus1;
+++	__u8	log2_min_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_luma_coding_block_size;
+++	__u8	log2_min_luma_transform_block_size_minus2;
+++	__u8	log2_diff_max_min_luma_transform_block_size;
+++	__u8	max_transform_hierarchy_depth_inter;
+++	__u8	max_transform_hierarchy_depth_intra;
+++	__u8	pcm_sample_bit_depth_luma_minus1;
+++	__u8	pcm_sample_bit_depth_chroma_minus1;
+++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
+++	__u8	num_short_term_ref_pic_sets;
+++	__u8	num_long_term_ref_pics_sps;
+++	__u8	chroma_format_idc;
+++	__u8	sps_max_sub_layers_minus1;
+++
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
+++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
+++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
+++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
+++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
+++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
+++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
+++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
+++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
+++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
+++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
+++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
+++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
+++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
+++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
+++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
+++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
+++
+++struct v4l2_ctrl_hevc_pps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
+++	__u8	num_extra_slice_header_bits;
+++	__u8	num_ref_idx_l0_default_active_minus1;
+++	__u8	num_ref_idx_l1_default_active_minus1;
+++	__s8	init_qp_minus26;
+++	__u8	diff_cu_qp_delta_depth;
+++	__s8	pps_cb_qp_offset;
+++	__s8	pps_cr_qp_offset;
+++	__u8	num_tile_columns_minus1;
+++	__u8	num_tile_rows_minus1;
+++	__u8	column_width_minus1[20];
+++	__u8	row_height_minus1[22];
+++	__s8	pps_beta_offset_div2;
+++	__s8	pps_tc_offset_div2;
+++	__u8	log2_parallel_merge_level_minus2;
+++
+++	__u8	padding[4];
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
+++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
+++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
+++
+++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
+++
+++struct v4l2_hevc_dpb_entry {
+++	__u64	timestamp;
+++	__u8	rps;
+++	__u8	field_pic;
+++	__u16	pic_order_cnt[2];
+++	__u8	padding[2];
+++};
+++
+++struct v4l2_hevc_pred_weight_table {
+++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__u8	padding[6];
+++
+++	__u8	luma_log2_weight_denom;
+++	__s8	delta_chroma_log2_weight_denom;
+++};
+++
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
+++
+++struct v4l2_ctrl_hevc_slice_params {
+++	__u32	bit_size;
+++	__u32	data_bit_offset;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u32	slice_segment_addr;
+++	__u32	num_entry_point_offsets;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+++	__u8	nal_unit_type;
+++	__u8	nuh_temporal_id_plus1;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	slice_type;
+++	__u8	colour_plane_id;
+++	__u16	slice_pic_order_cnt;
+++	__u8	num_ref_idx_l0_active_minus1;
+++	__u8	num_ref_idx_l1_active_minus1;
+++	__u8	collocated_ref_idx;
+++	__u8	five_minus_max_num_merge_cand;
+++	__s8	slice_qp_delta;
+++	__s8	slice_cb_qp_offset;
+++	__s8	slice_cr_qp_offset;
+++	__s8	slice_act_y_qp_offset;
+++	__s8	slice_act_cb_qp_offset;
+++	__s8	slice_act_cr_qp_offset;
+++	__s8	slice_beta_offset_div2;
+++	__s8	slice_tc_offset_div2;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+++	__u8	pic_struct;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++
+++	__u8	padding[5];
+++
+++	__u32	entry_point_offset_minus1[256];
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
+++	struct v4l2_hevc_pred_weight_table pred_weight_table;
+++
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
+++
+++struct v4l2_ctrl_hevc_decode_params {
+++	__s32	pic_order_cnt_val;
+++	__u8	num_active_dpb_entries;
+++	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	num_poc_st_curr_before;
+++	__u8	num_poc_st_curr_after;
+++	__u8	num_poc_lt_curr;
+++	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u64	flags;
+++};
+++
+++/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
+++#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
+++/*
+++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
+++ * the number of data (in bits) to skip in the
+++ * slice segment header.
+++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
+++ * to before syntax element "slice_temporal_mvp_enabled_flag".
+++ * If IDR, the skipped bits are just "pic_output_flag"
+++ * (separate_colour_plane_flag is not supported).
+++ */
+++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
+++
+++struct v4l2_ctrl_hevc_scaling_matrix {
+++	__u8	scaling_list_4x4[6][16];
+++	__u8	scaling_list_8x8[6][64];
+++	__u8	scaling_list_16x16[6][64];
+++	__u8	scaling_list_32x32[2][64];
+++	__u8	scaling_list_dc_coef_16x16[6];
+++	__u8	scaling_list_dc_coef_32x32[2];
+++};
+++
+++#endif
++diff --git a/libavcodec/hevc-ctrls-v3.h b/libavcodec/hevc-ctrls-v3.h
++new file mode 100644
++index 0000000000..4e35bd583d
++--- /dev/null
+++++ b/libavcodec/hevc-ctrls-v3.h
++@@ -0,0 +1,255 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/*
+++ * These are the HEVC state controls for use with stateless HEVC
+++ * codec drivers.
+++ *
+++ * It turns out that these structs are not stable yet and will undergo
+++ * more changes. So keep them private until they are stable and ready to
+++ * become part of the official public API.
+++ */
+++
+++#ifndef _HEVC_CTRLS_H_
+++#define _HEVC_CTRLS_H_
+++
+++#include <linux/videodev2.h>
+++
+++/* The pixel format isn't stable at the moment and will likely be renamed. */
+++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
+++
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_BASE + 1011)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
+++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
+++
+++/* enum v4l2_ctrl_type type values */
+++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
+++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
+++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
+++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
+++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
+++
+++enum v4l2_mpeg_video_hevc_decode_mode {
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+++};
+++
+++enum v4l2_mpeg_video_hevc_start_code {
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
+++	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+++};
+++
+++#define V4L2_HEVC_SLICE_TYPE_B	0
+++#define V4L2_HEVC_SLICE_TYPE_P	1
+++#define V4L2_HEVC_SLICE_TYPE_I	2
+++
+++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
+++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
+++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
+++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
+++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
+++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
+++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
+++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
+++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
+++
+++/* The controls are not stable at the moment and will likely be reworked. */
+++struct v4l2_ctrl_hevc_sps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
+++	__u16	pic_width_in_luma_samples;
+++	__u16	pic_height_in_luma_samples;
+++	__u8	bit_depth_luma_minus8;
+++	__u8	bit_depth_chroma_minus8;
+++	__u8	log2_max_pic_order_cnt_lsb_minus4;
+++	__u8	sps_max_dec_pic_buffering_minus1;
+++	__u8	sps_max_num_reorder_pics;
+++	__u8	sps_max_latency_increase_plus1;
+++	__u8	log2_min_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_luma_coding_block_size;
+++	__u8	log2_min_luma_transform_block_size_minus2;
+++	__u8	log2_diff_max_min_luma_transform_block_size;
+++	__u8	max_transform_hierarchy_depth_inter;
+++	__u8	max_transform_hierarchy_depth_intra;
+++	__u8	pcm_sample_bit_depth_luma_minus1;
+++	__u8	pcm_sample_bit_depth_chroma_minus1;
+++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
+++	__u8	num_short_term_ref_pic_sets;
+++	__u8	num_long_term_ref_pics_sps;
+++	__u8	chroma_format_idc;
+++	__u8	sps_max_sub_layers_minus1;
+++
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
+++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
+++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
+++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
+++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
+++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
+++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
+++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
+++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
+++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
+++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
+++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
+++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
+++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
+++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
+++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
+++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
+++
+++struct v4l2_ctrl_hevc_pps {
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
+++	__u8	num_extra_slice_header_bits;
+++	__u8	num_ref_idx_l0_default_active_minus1;
+++	__u8	num_ref_idx_l1_default_active_minus1;
+++	__s8	init_qp_minus26;
+++	__u8	diff_cu_qp_delta_depth;
+++	__s8	pps_cb_qp_offset;
+++	__s8	pps_cr_qp_offset;
+++	__u8	num_tile_columns_minus1;
+++	__u8	num_tile_rows_minus1;
+++	__u8	column_width_minus1[20];
+++	__u8	row_height_minus1[22];
+++	__s8	pps_beta_offset_div2;
+++	__s8	pps_tc_offset_div2;
+++	__u8	log2_parallel_merge_level_minus2;
+++
+++	__u8	padding[4];
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE	0x01
+++
+++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
+++
+++struct v4l2_hevc_dpb_entry {
+++	__u64	timestamp;
+++	__u8	flags;
+++	__u8	field_pic;
+++	__u16	pic_order_cnt[2];
+++	__u8	padding[2];
+++};
+++
+++struct v4l2_hevc_pred_weight_table {
+++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__u8	padding[6];
+++
+++	__u8	luma_log2_weight_denom;
+++	__s8	delta_chroma_log2_weight_denom;
+++};
+++
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
+++
+++struct v4l2_ctrl_hevc_slice_params {
+++	__u32	bit_size;
+++	__u32	data_bit_offset;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u32	slice_segment_addr;
+++	__u32	num_entry_point_offsets;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+++	__u8	nal_unit_type;
+++	__u8	nuh_temporal_id_plus1;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	slice_type;
+++	__u8	colour_plane_id;
+++	__u16	slice_pic_order_cnt;
+++	__u8	num_ref_idx_l0_active_minus1;
+++	__u8	num_ref_idx_l1_active_minus1;
+++	__u8	collocated_ref_idx;
+++	__u8	five_minus_max_num_merge_cand;
+++	__s8	slice_qp_delta;
+++	__s8	slice_cb_qp_offset;
+++	__s8	slice_cr_qp_offset;
+++	__s8	slice_act_y_qp_offset;
+++	__s8	slice_act_cb_qp_offset;
+++	__s8	slice_act_cr_qp_offset;
+++	__s8	slice_beta_offset_div2;
+++	__s8	slice_tc_offset_div2;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+++	__u8	pic_struct;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++
+++	__u8	padding[5];
+++
+++	__u32	entry_point_offset_minus1[256];
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
+++	struct v4l2_hevc_pred_weight_table pred_weight_table;
+++
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
+++
+++struct v4l2_ctrl_hevc_decode_params {
+++	__s32	pic_order_cnt_val;
+++	__u8	num_active_dpb_entries;
+++	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	num_poc_st_curr_before;
+++	__u8	num_poc_st_curr_after;
+++	__u8	num_poc_lt_curr;
+++	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u64	flags;
+++};
+++
+++struct v4l2_ctrl_hevc_scaling_matrix {
+++	__u8	scaling_list_4x4[6][16];
+++	__u8	scaling_list_8x8[6][64];
+++	__u8	scaling_list_16x16[6][64];
+++	__u8	scaling_list_32x32[2][64];
+++	__u8	scaling_list_dc_coef_16x16[6];
+++	__u8	scaling_list_dc_coef_32x32[2];
+++};
+++
+++/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
+++#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
+++/*
+++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
+++ * the number of data (in bits) to skip in the
+++ * slice segment header.
+++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
+++ * to before syntax element "slice_temporal_mvp_enabled_flag".
+++ * If IDR, the skipped bits are just "pic_output_flag"
+++ * (separate_colour_plane_flag is not supported).
+++ */
+++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
+++
+++#endif
++diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h
++new file mode 100644
++index 0000000000..c02fdbe5a8
++--- /dev/null
+++++ b/libavcodec/hevc-ctrls-v4.h
++@@ -0,0 +1,524 @@
+++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
+++/*
+++ *  Video for Linux Two controls header file
+++ *
+++ *  Copyright (C) 1999-2012 the contributors
+++ *
+++ *  This program is free software; you can redistribute it and/or modify
+++ *  it under the terms of the GNU General Public License as published by
+++ *  the Free Software Foundation; either version 2 of the License, or
+++ *  (at your option) any later version.
+++ *
+++ *  This program is distributed in the hope that it will be useful,
+++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+++ *  GNU General Public License for more details.
+++ *
+++ *  Alternatively you can redistribute this file under the terms of the
+++ *  BSD license as stated below:
+++ *
+++ *  Redistribution and use in source and binary forms, with or without
+++ *  modification, are permitted provided that the following conditions
+++ *  are met:
+++ *  1. Redistributions of source code must retain the above copyright
+++ *     notice, this list of conditions and the following disclaimer.
+++ *  2. Redistributions in binary form must reproduce the above copyright
+++ *     notice, this list of conditions and the following disclaimer in
+++ *     the documentation and/or other materials provided with the
+++ *     distribution.
+++ *  3. The names of its contributors may not be used to endorse or promote
+++ *     products derived from this software without specific prior written
+++ *     permission.
+++ *
+++ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+++ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+++ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+++ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+++ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+++ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+++ *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+++ *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++ *
+++ *  The contents of this header was split off from videodev2.h. All control
+++ *  definitions should be added to this header, which is included by
+++ *  videodev2.h.
+++ */
+++
+++#ifndef AVCODEC_HEVC_CTRLS_V4_H
+++#define AVCODEC_HEVC_CTRLS_V4_H
+++
+++#include <linux/const.h>
+++#include <linux/types.h>
+++
+++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
+++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
+++#endif
+++#ifndef V4L2_CID_CODEC_STATELESS_BASE
+++#define V4L2_CID_CODEC_STATELESS_BASE		(V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
+++#endif
+++
+++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
+++
+++#define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
+++#define V4L2_CID_STATELESS_HEVC_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 401)
+++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 402)
+++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_STATELESS_BASE + 403)
+++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 404)
+++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE	(V4L2_CID_CODEC_STATELESS_BASE + 405)
+++#define V4L2_CID_STATELESS_HEVC_START_CODE	(V4L2_CID_CODEC_STATELESS_BASE + 406)
+++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407)
+++
+++enum v4l2_stateless_hevc_decode_mode {
+++	V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED,
+++	V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
+++};
+++
+++enum v4l2_stateless_hevc_start_code {
+++	V4L2_STATELESS_HEVC_START_CODE_NONE,
+++	V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
+++};
+++
+++#define V4L2_HEVC_SLICE_TYPE_B	0
+++#define V4L2_HEVC_SLICE_TYPE_P	1
+++#define V4L2_HEVC_SLICE_TYPE_I	2
+++
+++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
+++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
+++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
+++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
+++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
+++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
+++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
+++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
+++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
+++
+++/**
+++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set
+++ *
+++ * @video_parameter_set_id: specifies the value of the
+++ *			vps_video_parameter_set_id of the active VPS
+++ * @seq_parameter_set_id: provides an identifier for the SPS for
+++ *			  reference by other syntax elements
+++ * @pic_width_in_luma_samples:	specifies the width of each decoded picture
+++ *				in units of luma samples
+++ * @pic_height_in_luma_samples: specifies the height of each decoded picture
+++ *				in units of luma samples
+++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the
+++ *                         samples of the luma array
+++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the
+++ *                           samples of the chroma arrays
+++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of
+++ *                                     the variable MaxPicOrderCntLsb
+++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum
+++ *                                    required size of the decoded picture
+++ *                                    buffer for the codec video sequence
+++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures
+++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the
+++ *				    value of SpsMaxLatencyPictures array
+++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum
+++ *					    luma coding block size
+++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between
+++ *					      the maximum and minimum luma
+++ *					      coding block size
+++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma
+++ *					       transform block size
+++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between
+++ *						 the maximum and minimum luma
+++ *						 transform block size
+++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy
+++ *					 depth for transform units of
+++ *					 coding units coded in inter
+++ *					 prediction mode
+++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy
+++ *					 depth for transform units of
+++ *					 coding units coded in intra
+++ *					 prediction mode
+++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of
+++ *                                    bits used to represent each of PCM sample
+++ *                                    values of the luma component
+++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number
+++ *                                      of bits used to represent each of PCM
+++ *                                      sample values of the chroma components
+++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the
+++ *                                              minimum size of coding blocks
+++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between
+++ *						  the maximum and minimum size of
+++ *						  coding blocks
+++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set()
+++ *				 syntax structures included in the SPS
+++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term
+++ *				reference pictures that are specified in the SPS
+++ * @chroma_format_idc: specifies the chroma sampling
+++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number
+++ *                             of temporal sub-layers
+++ * @reserved: padding field. Should be zeroed by applications.
+++ * @flags: see V4L2_HEVC_SPS_FLAG_{}
+++ */
+++struct v4l2_ctrl_hevc_sps {
+++	__u8	video_parameter_set_id;
+++	__u8	seq_parameter_set_id;
+++	__u16	pic_width_in_luma_samples;
+++	__u16	pic_height_in_luma_samples;
+++	__u8	bit_depth_luma_minus8;
+++	__u8	bit_depth_chroma_minus8;
+++	__u8	log2_max_pic_order_cnt_lsb_minus4;
+++	__u8	sps_max_dec_pic_buffering_minus1;
+++	__u8	sps_max_num_reorder_pics;
+++	__u8	sps_max_latency_increase_plus1;
+++	__u8	log2_min_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_luma_coding_block_size;
+++	__u8	log2_min_luma_transform_block_size_minus2;
+++	__u8	log2_diff_max_min_luma_transform_block_size;
+++	__u8	max_transform_hierarchy_depth_inter;
+++	__u8	max_transform_hierarchy_depth_intra;
+++	__u8	pcm_sample_bit_depth_luma_minus1;
+++	__u8	pcm_sample_bit_depth_chroma_minus1;
+++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
+++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
+++	__u8	num_short_term_ref_pic_sets;
+++	__u8	num_long_term_ref_pics_sps;
+++	__u8	chroma_format_idc;
+++	__u8	sps_max_sub_layers_minus1;
+++
+++	__u8	reserved[6];
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
+++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
+++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
+++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
+++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
+++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
+++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
+++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
+++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
+++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
+++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
+++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
+++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
+++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
+++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
+++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
+++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
+++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
+++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
+++
+++/**
+++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set
+++ *
+++ * @pic_parameter_set_id: identifies the PPS for reference by other
+++ *			  syntax elements
+++ * @num_extra_slice_header_bits: specifies the number of extra slice header
+++ *				 bits that are present in the slice header RBSP
+++ *				 for coded pictures referring to the PPS.
+++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the
+++ *                                        inferred value of num_ref_idx_l0_active_minus1
+++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the
+++ *                                        inferred value of num_ref_idx_l1_active_minus1
+++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for
+++ *		     each slice referring to the PPS
+++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding
+++ *			    tree block size and the minimum luma coding block
+++ *			    size of coding units that convey cu_qp_delta_abs
+++ *			    and cu_qp_delta_sign_flag
+++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb
+++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr
+++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns
+++ *			     partitioning the picture
+++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning
+++ *			  the picture
+++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in
+++ *			 units of coding tree blocks
+++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in
+++ *		       units of coding tree blocks
+++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for
+++ *			  beta divided by 2
+++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC
+++ *			divided by 2
+++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of
+++ *                                    the variable Log2ParMrgLevel
+++ * @reserved: padding field. Should be zeroed by applications.
+++ * @flags: see V4L2_HEVC_PPS_FLAG_{}
+++ */
+++struct v4l2_ctrl_hevc_pps {
+++	__u8	pic_parameter_set_id;
+++	__u8	num_extra_slice_header_bits;
+++	__u8	num_ref_idx_l0_default_active_minus1;
+++	__u8	num_ref_idx_l1_default_active_minus1;
+++	__s8	init_qp_minus26;
+++	__u8	diff_cu_qp_delta_depth;
+++	__s8	pps_cb_qp_offset;
+++	__s8	pps_cr_qp_offset;
+++	__u8	num_tile_columns_minus1;
+++	__u8	num_tile_rows_minus1;
+++	__u8	column_width_minus1[20];
+++	__u8	row_height_minus1[22];
+++	__s8	pps_beta_offset_div2;
+++	__s8	pps_tc_offset_div2;
+++	__u8	log2_parallel_merge_level_minus2;
+++	__u8	reserved;
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE	0x01
+++
+++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME				0
+++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD			1
+++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD			2
+++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM			3
+++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP			4
+++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP			5
+++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM		6
+++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING			7
+++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING			8
+++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM	9
+++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP	10
+++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM		11
+++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP		12
+++
+++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
+++
+++/**
+++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry
+++ *
+++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference.
+++ * @flags: long term flag for the reference frame
+++ * @field_pic: whether the reference is a field picture or a frame.
+++ * @reserved: padding field. Should be zeroed by applications.
+++ * @pic_order_cnt_val: the picture order count of the current picture.
+++ */
+++struct v4l2_hevc_dpb_entry {
+++	__u64	timestamp;
+++	__u8	flags;
+++	__u8	field_pic;
+++	__u16	reserved;
+++	__s32	pic_order_cnt_val;
+++};
+++
+++/**
+++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters
+++ *
+++ * @delta_luma_weight_l0: the difference of the weighting factor applied
+++ *			  to the luma prediction value for list 0
+++ * @luma_offset_l0: the additive offset applied to the luma prediction value
+++ *		    for list 0
+++ * @delta_chroma_weight_l0: the difference of the weighting factor applied
+++ *			    to the chroma prediction values for list 0
+++ * @chroma_offset_l0: the difference of the additive offset applied to
+++ *		      the chroma prediction values for list 0
+++ * @delta_luma_weight_l1: the difference of the weighting factor applied
+++ *			  to the luma prediction value for list 1
+++ * @luma_offset_l1: the additive offset applied to the luma prediction value
+++ *		    for list 1
+++ * @delta_chroma_weight_l1: the difference of the weighting factor applied
+++ *			    to the chroma prediction values for list 1
+++ * @chroma_offset_l1: the difference of the additive offset applied to
+++ *		      the chroma prediction values for list 1
+++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for
+++ *			    all luma weighting factors
+++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm
+++ *				    of the denominator for all chroma
+++ *				    weighting factors
+++ */
+++struct v4l2_hevc_pred_weight_table {
+++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
+++
+++	__u8	luma_log2_weight_denom;
+++	__s8	delta_chroma_log2_weight_denom;
+++};
+++
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
+++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
+++
+++/**
+++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters
+++ *
+++ * This control is a dynamically sized 1-dimensional array,
+++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it.
+++ *
+++ * @bit_size: size (in bits) of the current slice data
+++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data
+++ * @num_entry_point_offsets: specifies the number of entry point offset syntax
+++ *			     elements in the slice header.
+++ * @nal_unit_type: specifies the coding type of the slice (B, P or I)
+++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit
+++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{}
+++ * @colour_plane_id: specifies the colour plane associated with the current slice
+++ * @slice_pic_order_cnt: specifies the picture order count
+++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum
+++ *                                reference index for reference picture list 0
+++ *                                that may be used to decode the slice
+++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum
+++ *                                reference index for reference picture list 1
+++ *                                that may be used to decode the slice
+++ * @collocated_ref_idx: specifies the reference index of the collocated picture used
+++ *			for temporal motion vector prediction
+++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging
+++ *				   motion vector prediction candidates supported in
+++ *				   the slice subtracted from 5
+++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding
+++ *		    blocks in the slice
+++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset
+++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset
+++ * @slice_act_y_qp_offset: screen content extension parameters
+++ * @slice_act_cb_qp_offset: screen content extension parameters
+++ * @slice_act_cr_qp_offset: screen content extension parameters
+++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2
+++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2
+++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or
+++ *		more fields
+++ * @reserved0: padding field. Should be zeroed by applications.
+++ * @slice_segment_addr: specifies the address of the first coding tree block in
+++ *			the slice segment
+++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB
+++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB
+++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
+++ *				 pictures set included in the SPS
+++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
+++ *				pictures set include in the SPS
+++ * @pred_weight_table: the prediction weight coefficients for inter-picture
+++ *		       prediction
+++ * @reserved1: padding field. Should be zeroed by applications.
+++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{}
+++ */
+++struct v4l2_ctrl_hevc_slice_params {
+++	__u32	bit_size;
+++	__u32	data_byte_offset;
+++	__u32	num_entry_point_offsets;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+++	__u8	nal_unit_type;
+++	__u8	nuh_temporal_id_plus1;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u8	slice_type;
+++	__u8	colour_plane_id;
+++	__s32	slice_pic_order_cnt;
+++	__u8	num_ref_idx_l0_active_minus1;
+++	__u8	num_ref_idx_l1_active_minus1;
+++	__u8	collocated_ref_idx;
+++	__u8	five_minus_max_num_merge_cand;
+++	__s8	slice_qp_delta;
+++	__s8	slice_cb_qp_offset;
+++	__s8	slice_cr_qp_offset;
+++	__s8	slice_act_y_qp_offset;
+++	__s8	slice_act_cb_qp_offset;
+++	__s8	slice_act_cr_qp_offset;
+++	__s8	slice_beta_offset_div2;
+++	__s8	slice_tc_offset_div2;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+++	__u8	pic_struct;
+++
+++	__u8	reserved0[3];
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++	__u32	slice_segment_addr;
+++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u16	short_term_ref_pic_set_size;
+++	__u16	long_term_ref_pic_set_size;
+++
+++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
+++	struct v4l2_hevc_pred_weight_table pred_weight_table;
+++
+++	__u8	reserved1[2];
+++	__u64	flags;
+++};
+++
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
+++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
+++
+++/**
+++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters
+++ *
+++ * @pic_order_cnt_val: picture order count
+++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
+++ *				 pictures set included in the SPS of the first slice
+++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
+++ *				pictures set include in the SPS of the first slice
+++ * @num_active_dpb_entries: the number of entries in dpb
+++ * @num_poc_st_curr_before: the number of reference pictures in the short-term
+++ *			    set that come before the current frame
+++ * @num_poc_st_curr_after: the number of reference pictures in the short-term
+++ *			   set that come after the current frame
+++ * @num_poc_lt_curr: the number of reference pictures in the long-term set
+++ * @poc_st_curr_before: provides the index of the short term before references
+++ *			in DPB array
+++ * @poc_st_curr_after: provides the index of the short term after references
+++ *		       in DPB array
+++ * @poc_lt_curr: provides the index of the long term references in DPB array
+++ * @reserved: padding field. Should be zeroed by applications.
+++ * @dpb: the decoded picture buffer, for meta-data about reference frames
+++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{}
+++ */
+++struct v4l2_ctrl_hevc_decode_params {
+++	__s32	pic_order_cnt_val;
+++	__u16	short_term_ref_pic_set_size;
+++	__u16	long_term_ref_pic_set_size;
+++	__u8	num_active_dpb_entries;
+++	__u8	num_poc_st_curr_before;
+++	__u8	num_poc_st_curr_after;
+++	__u8	num_poc_lt_curr;
+++	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u8	reserved[4];
+++	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+++	__u64	flags;
+++};
+++
+++/**
+++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters
+++ *
+++ * @scaling_list_4x4: scaling list is used for the scaling process for
+++ *		      transform coefficients. The values on each scaling
+++ *		      list are expected in raster scan order
+++ * @scaling_list_8x8: scaling list is used for the scaling process for
+++ *		      transform coefficients. The values on each scaling
+++ *		      list are expected in raster scan order
+++ * @scaling_list_16x16:	scaling list is used for the scaling process for
+++ *			transform coefficients. The values on each scaling
+++ *			list are expected in raster scan order
+++ * @scaling_list_32x32:	scaling list is used for the scaling process for
+++ *			transform coefficients. The values on each scaling
+++ *			list are expected in raster scan order
+++ * @scaling_list_dc_coef_16x16:	scaling list is used for the scaling process
+++ *				for transform coefficients. The values on each
+++ *				scaling list are expected in raster scan order.
+++ * @scaling_list_dc_coef_32x32:	scaling list is used for the scaling process
+++ *				for transform coefficients. The values on each
+++ *				scaling list are expected in raster scan order.
+++ */
+++struct v4l2_ctrl_hevc_scaling_matrix {
+++	__u8	scaling_list_4x4[6][16];
+++	__u8	scaling_list_8x8[6][64];
+++	__u8	scaling_list_16x16[6][64];
+++	__u8	scaling_list_32x32[2][64];
+++	__u8	scaling_list_dc_coef_16x16[6];
+++	__u8	scaling_list_dc_coef_32x32[2];
+++};
+++
+++#endif
++diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
++index 59f9a0ff3e..4ae7222e8b 100644
++--- a/libavcodec/hevc_parser.c
+++++ b/libavcodec/hevc_parser.c
++@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal,
++     avctx->profile  = ps->sps->ptl.general_ptl.profile_idc;
++     avctx->level    = ps->sps->ptl.general_ptl.level_idc;
++ 
+++    if (ps->sps->chroma_format_idc == 1) {
+++        avctx->chroma_sample_location = ps->sps->vui.common.chroma_loc_info_present_flag ?
+++            ps->sps->vui.common.chroma_sample_loc_type_top_field + 1 :
+++            AVCHROMA_LOC_LEFT;
+++    }
+++    else if (ps->sps->chroma_format_idc == 2 ||
+++             ps->sps->chroma_format_idc == 3) {
+++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
+++    }
+++    else {
+++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
+++    }
+++
++     if (ps->vps->vps_timing_info_present_flag) {
++         num = ps->vps->vps_num_units_in_tick;
++         den = ps->vps->vps_time_scale;
++diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
++index 811e8feff8..f7cf14eabc 100644
++--- a/libavcodec/hevc_refs.c
+++++ b/libavcodec/hevc_refs.c
++@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
++         if (!frame->rpl_buf)
++             goto fail;
++ 
++-        frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
++-        if (!frame->tab_mvf_buf)
++-            goto fail;
++-        frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+++        if (s->tab_mvf_pool) {
+++            frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
+++            if (!frame->tab_mvf_buf)
+++                goto fail;
+++            frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+++        }
++ 
++-        frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
++-        if (!frame->rpl_tab_buf)
++-            goto fail;
++-        frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
++-        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
++-        for (j = 0; j < frame->ctb_count; j++)
++-            frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+++        if (s->rpl_tab_pool) {
+++            frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
+++            if (!frame->rpl_tab_buf)
+++                goto fail;
+++            frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
+++            frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+++            for (j = 0; j < frame->ctb_count; j++)
+++                frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+++        }
++ 
++         frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
++         frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
++@@ -297,14 +301,17 @@ static int init_slice_rpl(HEVCContext *s)
++     int ctb_count    = frame->ctb_count;
++     int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
++     int i;
+++    RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
++ 
++     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
++         return AVERROR_INVALIDDATA;
++ 
++-    for (i = ctb_addr_ts; i < ctb_count; i++)
++-        frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
+++    if (frame->rpl_tab) {
+++        for (i = ctb_addr_ts; i < ctb_count; i++)
+++            frame->rpl_tab[i] = tab;
+++    }
++ 
++-    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
+++    frame->refPicList = tab->refPicList;
++ 
++     return 0;
++ }
++diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
++index 567e8d81d4..17f53322fb 100644
++--- a/libavcodec/hevcdec.c
+++++ b/libavcodec/hevcdec.c
++@@ -347,6 +347,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
++     else
++         avctx->color_range = AVCOL_RANGE_MPEG;
++ 
+++    if (sps->chroma_format_idc == 1) {
+++        avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ?
+++            sps->vui.common.chroma_sample_loc_type_top_field + 1 :
+++            AVCHROMA_LOC_LEFT;
+++    }
+++    else if (sps->chroma_format_idc == 2 ||
+++             sps->chroma_format_idc == 3) {
+++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
+++    }
+++    else {
+++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
+++    }
+++
++     if (sps->vui.common.colour_description_present_flag) {
++         avctx->color_primaries = sps->vui.common.colour_primaries;
++         avctx->color_trc       = sps->vui.common.transfer_characteristics;
++@@ -403,6 +416,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
++ #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
++                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
++                      CONFIG_HEVC_NVDEC_HWACCEL + \
+++                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
++                      CONFIG_HEVC_VAAPI_HWACCEL + \
++                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
++                      CONFIG_HEVC_VDPAU_HWACCEL)
++@@ -429,6 +443,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
++ #endif
++ #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
++         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+++#endif
+++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
++ #endif
++         break;
++     case AV_PIX_FMT_YUV420P10:
++@@ -450,6 +467,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
++ #endif
++ #if CONFIG_HEVC_NVDEC_HWACCEL
++         *fmt++ = AV_PIX_FMT_CUDA;
+++#endif
+++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
++ #endif
++         break;
++     case AV_PIX_FMT_YUV444P:
++@@ -516,6 +536,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps,
++     if (!sps)
++         return 0;
++ 
+++    // If hwaccel then we don't need all the s/w decode helper arrays
+++    if (s->avctx->hwaccel) {
+++        export_stream_params(s, sps);
+++
+++        s->avctx->pix_fmt = pix_fmt;
+++        s->ps.sps = sps;
+++        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
+++        return 0;
+++    }
+++
++     ret = pic_arrays_init(s, sps);
++     if (ret < 0)
++         goto fail;
++@@ -2870,11 +2900,13 @@ static int hevc_frame_start(HEVCContext *s)
++                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
++     int ret;
++ 
++-    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
++-    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
++-    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
++-    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
++-    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+++    if (s->horizontal_bs) {
+++        memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+++        memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
+++        memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+++        memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+++        memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+++    }
++ 
++     s->is_decoded        = 0;
++     s->first_nal_type    = s->nal_unit_type;
++@@ -3362,8 +3394,13 @@ static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
++ 
++     s->ref = NULL;
++     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
++-    if (ret < 0)
+++    if (ret < 0) {
+++        // Ensure that hwaccel knows this frame is over
+++        if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame)
+++            s->avctx->hwaccel->abort_frame(s->avctx);
+++
++         return ret;
+++    }
++ 
++     if (avctx->hwaccel) {
++         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
++@@ -3413,15 +3450,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
++         dst->needs_fg = 1;
++     }
++ 
++-    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
++-    if (!dst->tab_mvf_buf)
++-        goto fail;
++-    dst->tab_mvf = src->tab_mvf;
+++    if (src->tab_mvf_buf) {
+++        dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
+++        if (!dst->tab_mvf_buf)
+++            goto fail;
+++        dst->tab_mvf = src->tab_mvf;
+++    }
++ 
++-    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
++-    if (!dst->rpl_tab_buf)
++-        goto fail;
++-    dst->rpl_tab = src->rpl_tab;
+++    if (src->rpl_tab_buf) {
+++        dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
+++        if (!dst->rpl_tab_buf)
+++            goto fail;
+++        dst->rpl_tab = src->rpl_tab;
+++    }
++ 
++     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
++     if (!dst->rpl_buf)
++@@ -3731,6 +3772,9 @@ const FFCodec ff_hevc_decoder = {
++ #if CONFIG_HEVC_NVDEC_HWACCEL
++                                HWACCEL_NVDEC(hevc),
++ #endif
+++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
+++                               HWACCEL_V4L2REQUEST(hevc),
+++#endif
++ #if CONFIG_HEVC_VAAPI_HWACCEL
++                                HWACCEL_VAAPI(hevc),
++ #endif
++diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
++index aca55831f3..f32d1c4ec4 100644
++--- a/libavcodec/hwaccels.h
+++++ b/libavcodec/hwaccels.h
++@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
++ extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
++ extern const AVHWAccel ff_hevc_dxva2_hwaccel;
++ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
+++extern const AVHWAccel ff_hevc_v4l2request_hwaccel;
++ extern const AVHWAccel ff_hevc_vaapi_hwaccel;
++ extern const AVHWAccel ff_hevc_vdpau_hwaccel;
++ extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
++diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
++index 721424912c..b8aa383071 100644
++--- a/libavcodec/hwconfig.h
+++++ b/libavcodec/hwconfig.h
++@@ -24,6 +24,7 @@
++ 
++ 
++ #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
+++#define HWACCEL_CAP_MT_SAFE         (1 << 1)
++ 
++ 
++ typedef struct AVCodecHWConfigInternal {
++@@ -70,6 +71,8 @@ typedef struct AVCodecHWConfigInternal {
++     HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
++ #define HWACCEL_NVDEC(codec) \
++     HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
+++#define HWACCEL_V4L2REQUEST(codec) \
+++    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)
++ #define HWACCEL_VAAPI(codec) \
++     HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
++ #define HWACCEL_VDPAU(codec) \
++diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
++index 3092f58510..6f41b41ac4 100644
++--- a/libavcodec/mmaldec.c
+++++ b/libavcodec/mmaldec.c
++@@ -24,6 +24,9 @@
++  * MMAL Video Decoder
++  */
++ 
+++#pragma GCC diagnostic push
+++// Many many redundant decls in the header files
+++#pragma GCC diagnostic ignored "-Wredundant-decls"
++ #include <bcm_host.h>
++ #include <interface/mmal/mmal.h>
++ #include <interface/mmal/mmal_parameters_video.h>
++@@ -31,6 +34,7 @@
++ #include <interface/mmal/util/mmal_util_params.h>
++ #include <interface/mmal/util/mmal_default_components.h>
++ #include <interface/mmal/vc/mmal_vc_api.h>
+++#pragma GCC diagnostic pop
++ #include <stdatomic.h>
++ 
++ #include "avcodec.h"
++diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
++index d9d5afaa82..b14f8e9360 100644
++--- a/libavcodec/pthread_frame.c
+++++ b/libavcodec/pthread_frame.c
++@@ -204,7 +204,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
++ 
++         /* if the previous thread uses hwaccel then we take the lock to ensure
++          * the threads don't run concurrently */
++-        if (avctx->hwaccel) {
+++        if (avctx->hwaccel &&
+++            !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
++             pthread_mutex_lock(&p->parent->hwaccel_mutex);
++             p->hwaccel_serializing = 1;
++         }
++@@ -230,7 +231,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
++             p->hwaccel_serializing = 0;
++             pthread_mutex_unlock(&p->parent->hwaccel_mutex);
++         }
++-        av_assert0(!avctx->hwaccel);
+++        av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
++ 
++         if (p->async_serializing) {
++             p->async_serializing = 0;
++@@ -318,6 +319,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
++         }
++ 
++         dst->hwaccel_flags = src->hwaccel_flags;
+++        if (src->hwaccel &&
+++            (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+++            dst->hwaccel = src->hwaccel;
+++            dst->hwaccel_context = src->hwaccel_context;
+++            dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
+++        }
++ 
++         err = av_buffer_replace(&dst->internal->pool, src->internal->pool);
++         if (err < 0)
++@@ -433,10 +440,13 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
++     }
++ 
++     /* transfer the stashed hwaccel state, if any */
++-    av_assert0(!p->avctx->hwaccel);
++-    FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
++-    FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
++-    FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+++    av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
+++    if (p->avctx->hwaccel &&
+++        !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+++        FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
+++        FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
+++        FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+++    }
++ 
++     av_packet_unref(p->avpkt);
++     ret = av_packet_ref(p->avpkt, avpkt);
++@@ -590,7 +600,9 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
++ 
++     if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
++ 
++-    if (avctx->hwaccel && !p->hwaccel_serializing) {
+++    if (avctx->hwaccel &&
+++        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
+++        !p->hwaccel_serializing) {
++         pthread_mutex_lock(&p->parent->hwaccel_mutex);
++         p->hwaccel_serializing = 1;
++     }
++@@ -607,9 +619,12 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
++      * this is done here so that this worker thread can wipe its own hwaccel
++      * state after decoding, without requiring synchronization */
++     av_assert0(!p->parent->stash_hwaccel);
++-    p->parent->stash_hwaccel         = avctx->hwaccel;
++-    p->parent->stash_hwaccel_context = avctx->hwaccel_context;
++-    p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
+++    if (avctx->hwaccel &&
+++        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+++        p->parent->stash_hwaccel         = avctx->hwaccel;
+++        p->parent->stash_hwaccel_context = avctx->hwaccel_context;
+++        p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
+++    }
++ 
++     pthread_mutex_lock(&p->progress_mutex);
++     if(atomic_load(&p->state) == STATE_SETUP_FINISHED){
++@@ -664,6 +679,15 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
++ 
++     park_frame_worker_threads(fctx, thread_count);
++ 
+++     if (fctx->prev_thread &&
+++         avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
+++         avctx->internal->hwaccel_priv_data !=
+++                             fctx->prev_thread->avctx->internal->hwaccel_priv_data) {
+++        if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n");
+++        }
+++    }
+++
++     for (i = 0; i < thread_count; i++) {
++         PerThreadContext *p = &fctx->threads[i];
++         AVCodecContext *ctx = p->avctx;
++@@ -707,10 +731,13 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
++ 
++     /* if we have stashed hwaccel state, move it to the user-facing context,
++      * so it will be freed in avcodec_close() */
++-    av_assert0(!avctx->hwaccel);
++-    FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
++-    FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
++-    FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+++    av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
+++    if (avctx->hwaccel &&
+++        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+++        FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
+++        FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
+++        FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
+++    }
++ 
++     av_freep(&avctx->internal->thread_ctx);
++ }
++diff --git a/libavcodec/raw.c b/libavcodec/raw.c
++index 1e5b48d1e0..1e689f9ee0 100644
++--- a/libavcodec/raw.c
+++++ b/libavcodec/raw.c
++@@ -295,6 +295,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = {
++     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
++     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
++ 
+++    /* RPI (Might as well define for everything) */
+++    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
+++    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
+++    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
+++    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
+++
++     { AV_PIX_FMT_NONE, 0 },
++ };
++ 
++diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
++index 8c577006d9..8ca0379e12 100644
++--- a/libavcodec/rawenc.c
+++++ b/libavcodec/rawenc.c
++@@ -24,6 +24,7 @@
++  * Raw Video Encoder
++  */
++ 
+++#include "config.h"
++ #include "avcodec.h"
++ #include "codec_internal.h"
++ #include "encode.h"
++@@ -33,6 +34,10 @@
++ #include "libavutil/intreadwrite.h"
++ #include "libavutil/imgutils.h"
++ #include "libavutil/internal.h"
+++#include "libavutil/avassert.h"
+++#if CONFIG_SAND
+++#include "libavutil/rpi_sand_fns.h"
+++#endif
++ 
++ static av_cold int raw_encode_init(AVCodecContext *avctx)
++ {
++@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
++     return 0;
++ }
++ 
+++#if CONFIG_SAND
+++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+++                      const AVFrame *frame)
+++{
+++    const int width = av_frame_cropped_width(frame);
+++    const int height = av_frame_cropped_height(frame);
+++    const int x0 = frame->crop_left;
+++    const int y0 = frame->crop_top;
+++    const int size = width * height * 3 / 2;
+++    uint8_t * dst;
+++    int ret;
+++
+++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+++        return ret;
+++
+++    dst = pkt->data;
+++
+++    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
+++    dst += width * height;
+++    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
+++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
+++    return 0;
+++}
+++
+++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+++                      const AVFrame *frame)
+++{
+++    const int width = av_frame_cropped_width(frame);
+++    const int height = av_frame_cropped_height(frame);
+++    const int x0 = frame->crop_left;
+++    const int y0 = frame->crop_top;
+++    const int size = width * height * 3;
+++    uint8_t * dst;
+++    int ret;
+++
+++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+++        return ret;
+++
+++    dst = pkt->data;
+++
+++    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
+++    dst += width * height * 2;
+++    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
+++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
+++    return 0;
+++}
+++
+++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
+++                      const AVFrame *frame)
+++{
+++    const int width = av_frame_cropped_width(frame);
+++    const int height = av_frame_cropped_height(frame);
+++    const int x0 = frame->crop_left;
+++    const int y0 = frame->crop_top;
+++    const int size = width * height * 3;
+++    uint8_t * dst;
+++    int ret;
+++
+++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+++        return ret;
+++
+++    dst = pkt->data;
+++
+++    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
+++    dst += width * height * 2;
+++    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
+++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
+++    return 0;
+++}
+++#endif
+++
+++
++ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
++-                      const AVFrame *frame, int *got_packet)
+++                      const AVFrame *src_frame, int *got_packet)
++ {
++-    int ret = av_image_get_buffer_size(frame->format,
++-                                       frame->width, frame->height, 1);
+++    int ret;
+++    AVFrame * frame = NULL;
++ 
++-    if (ret < 0)
+++#if CONFIG_SAND
+++    if (av_rpi_is_sand_frame(src_frame)) {
+++        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
+++            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
+++            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
+++        *got_packet = (ret == 0);
++         return ret;
+++    }
+++#endif
+++
+++    if ((frame = av_frame_clone(src_frame)) == NULL) {
+++        ret = AVERROR(ENOMEM);
+++        goto fail;
+++    }
+++
+++    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
+++        goto fail;
+++
+++    ret = av_image_get_buffer_size(frame->format,
+++                                       frame->width, frame->height, 1);
+++    if (ret < 0)
+++        goto fail;
++ 
++     if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
++-        return ret;
+++        goto fail;
++     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
++                                        (const uint8_t **)frame->data, frame->linesize,
++                                        frame->format,
++                                        frame->width, frame->height, 1)) < 0)
++-        return ret;
+++        goto fail;
++ 
++     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
++        frame->format   == AV_PIX_FMT_YUYV422) {
++@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
++             AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
++         }
++     }
+++    pkt->flags |= AV_PKT_FLAG_KEY;
+++    av_frame_free(&frame);
++     *got_packet = 1;
++     return 0;
+++
+++fail:
+++    av_frame_free(&frame);
+++    *got_packet = 0;
+++    return ret;
++ }
++ 
++ const FFCodec ff_rawvideo_encoder = {
++diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
++index 3f5471067a..8d80d19788 100644
++--- a/libavcodec/v4l2_buffers.c
+++++ b/libavcodec/v4l2_buffers.c
++@@ -21,6 +21,7 @@
++  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++  */
++ 
+++#include <drm_fourcc.h>
++ #include <linux/videodev2.h>
++ #include <sys/ioctl.h>
++ #include <sys/mman.h>
++@@ -28,57 +29,89 @@
++ #include <fcntl.h>
++ #include <poll.h>
++ #include "libavcodec/avcodec.h"
+++#include "libavcodec/internal.h"
+++#include "libavutil/avassert.h"
++ #include "libavutil/pixdesc.h"
+++#include "libavutil/hwcontext.h"
++ #include "v4l2_context.h"
++ #include "v4l2_buffers.h"
++ #include "v4l2_m2m.h"
+++#include "v4l2_req_dmabufs.h"
+++#include "weak_link.h"
++ 
++ #define USEC_PER_SEC 1000000
++-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
+++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
++ 
++-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
+++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
++ {
++-    return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
++-        container_of(buf->context, V4L2m2mContext, output) :
++-        container_of(buf->context, V4L2m2mContext, capture);
+++    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+++        container_of(ctx, V4L2m2mContext, output) :
+++        container_of(ctx, V4L2m2mContext, capture);
++ }
++ 
++-static inline AVCodecContext *logger(V4L2Buffer *buf)
+++static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
++ {
++-    return buf_to_m2mctx(buf)->avctx;
+++    return ctx_to_m2mctx(buf->context);
++ }
++ 
++-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
+++static inline AVCodecContext *logger(const V4L2Buffer * const buf)
++ {
++-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+++    return buf_to_m2mctx(buf)->avctx;
+++}
++ 
++-    if (s->avctx->pkt_timebase.num)
++-        return s->avctx->pkt_timebase;
++-    return s->avctx->time_base;
+++static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
+++{
+++    const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+++    const AVRational tb = s->avctx->pkt_timebase.num ?
+++        s->avctx->pkt_timebase :
+++        s->avctx->time_base;
+++    return tb.num && tb.den ? tb : v4l2_timebase;
++ }
++ 
++-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
+++static inline struct timeval tv_from_int(const int64_t t)
++ {
++-    int64_t v4l2_pts;
+++    return (struct timeval){
+++        .tv_usec = t % USEC_PER_SEC,
+++        .tv_sec  = t / USEC_PER_SEC
+++    };
+++}
++ 
++-    if (pts == AV_NOPTS_VALUE)
++-        pts = 0;
+++static inline int64_t int_from_tv(const struct timeval t)
+++{
+++    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
+++}
++ 
+++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
+++{
++     /* convert pts to v4l2 timebase */
++-    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
++-    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
++-    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
+++    const int64_t v4l2_pts =
+++        pts == AV_NOPTS_VALUE ? 0 :
+++            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+++    out->buf.timestamp = tv_from_int(v4l2_pts);
++ }
++ 
++-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
+++static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
++ {
++-    int64_t v4l2_pts;
++-
+++    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
+++    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
+++#if 0
++     /* convert pts back to encoder timebase */
++-    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
++-                        avbuf->buf.timestamp.tv_usec;
+++    return
+++        avbuf->context->no_pts_rescale ? v4l2_pts :
+++        v4l2_pts == 0 ? AV_NOPTS_VALUE :
+++            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
+++#endif
+++}
++ 
++-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
+++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
+++{
+++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+++        out->planes[plane].bytesused = bytesused;
+++        out->planes[plane].length = length;
+++    } else {
+++        out->buf.bytesused = bytesused;
+++        out->buf.length = length;
+++    }
++ }
++ 
++ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
++@@ -115,6 +148,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
++     return AVCOL_PRI_UNSPECIFIED;
++ }
++ 
+++static void v4l2_set_color(V4L2Buffer *buf,
+++                           const enum AVColorPrimaries avcp,
+++                           const enum AVColorSpace avcs,
+++                           const enum AVColorTransferCharacteristic avxc)
+++{
+++    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
+++    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
+++    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
+++
+++    switch (avcp) {
+++    case AVCOL_PRI_BT709:
+++        cs = V4L2_COLORSPACE_REC709;
+++        ycbcr = V4L2_YCBCR_ENC_709;
+++        break;
+++    case AVCOL_PRI_BT470M:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+++        ycbcr = V4L2_YCBCR_ENC_601;
+++        break;
+++    case AVCOL_PRI_BT470BG:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+++        break;
+++    case AVCOL_PRI_SMPTE170M:
+++        cs = V4L2_COLORSPACE_SMPTE170M;
+++        break;
+++    case AVCOL_PRI_SMPTE240M:
+++        cs = V4L2_COLORSPACE_SMPTE240M;
+++        break;
+++    case AVCOL_PRI_BT2020:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        break;
+++    case AVCOL_PRI_SMPTE428:
+++    case AVCOL_PRI_SMPTE431:
+++    case AVCOL_PRI_SMPTE432:
+++    case AVCOL_PRI_EBU3213:
+++    case AVCOL_PRI_RESERVED:
+++    case AVCOL_PRI_FILM:
+++    case AVCOL_PRI_UNSPECIFIED:
+++    default:
+++        break;
+++    }
+++
+++    switch (avcs) {
+++    case AVCOL_SPC_RGB:
+++        cs = V4L2_COLORSPACE_SRGB;
+++        break;
+++    case AVCOL_SPC_BT709:
+++        cs = V4L2_COLORSPACE_REC709;
+++        break;
+++    case AVCOL_SPC_FCC:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+++        break;
+++    case AVCOL_SPC_BT470BG:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+++        break;
+++    case AVCOL_SPC_SMPTE170M:
+++        cs = V4L2_COLORSPACE_SMPTE170M;
+++        break;
+++    case AVCOL_SPC_SMPTE240M:
+++        cs = V4L2_COLORSPACE_SMPTE240M;
+++        break;
+++    case AVCOL_SPC_BT2020_CL:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
+++        break;
+++    case AVCOL_SPC_BT2020_NCL:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        break;
+++    default:
+++        break;
+++    }
+++
+++    switch (xfer) {
+++    case AVCOL_TRC_BT709:
+++        xfer = V4L2_XFER_FUNC_709;
+++        break;
+++    case AVCOL_TRC_IEC61966_2_1:
+++        xfer = V4L2_XFER_FUNC_SRGB;
+++        break;
+++    case AVCOL_TRC_SMPTE240M:
+++        xfer = V4L2_XFER_FUNC_SMPTE240M;
+++        break;
+++    case AVCOL_TRC_SMPTE2084:
+++        xfer = V4L2_XFER_FUNC_SMPTE2084;
+++        break;
+++    default:
+++        break;
+++    }
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
+++        buf->context->format.fmt.pix_mp.colorspace = cs;
+++        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
+++        buf->context->format.fmt.pix_mp.xfer_func = xfer;
+++    } else {
+++        buf->context->format.fmt.pix.colorspace = cs;
+++        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
+++        buf->context->format.fmt.pix.xfer_func = xfer;
+++    }
+++}
+++
++ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
++ {
++     enum v4l2_quantization qt;
++@@ -133,6 +265,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
++      return AVCOL_RANGE_UNSPECIFIED;
++ }
++ 
+++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
+++{
+++    const enum v4l2_quantization q =
+++        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
+++        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
+++            V4L2_QUANTIZATION_DEFAULT;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
+++        buf->context->format.fmt.pix_mp.quantization = q;
+++    } else {
+++        buf->context->format.fmt.pix.quantization = q;
+++    }
+++}
+++
++ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
++ {
++     enum v4l2_ycbcr_encoding ycbcr;
++@@ -209,73 +355,178 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
++     return AVCOL_TRC_UNSPECIFIED;
++ }
++ 
++-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
+++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
++ {
++-    V4L2Buffer* avbuf = opaque;
++-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+++    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
+++}
++ 
++-    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
++-        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
+++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
+++{
+++    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
+++}
++ 
++-        if (s->reinit) {
++-            if (!atomic_load(&s->refcount))
++-                sem_post(&s->refsync);
++-        } else {
++-            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
++-                /* no need to queue more buffers to the driver */
++-                avbuf->status = V4L2BUF_AVAILABLE;
++-            }
++-            else if (avbuf->context->streamon)
++-                ff_v4l2_buffer_enqueue(avbuf);
++-        }
+++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
+++{
+++    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
+++        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
+++}
++ 
++-        av_buffer_unref(&avbuf->context_ref);
+++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
+++{
+++    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
+++    AVDRMLayerDescriptor *layer;
+++
+++    /* fill the DRM frame descriptor */
+++    drm_desc->nb_objects = avbuf->num_planes;
+++    drm_desc->nb_layers = 1;
+++
+++    layer = &drm_desc->layers[0];
+++    layer->nb_planes = avbuf->num_planes;
+++
+++    for (int i = 0; i < avbuf->num_planes; i++) {
+++        layer->planes[i].object_index = i;
+++        layer->planes[i].offset = avbuf->plane_info[i].offset;
+++        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
++     }
+++
+++    switch (avbuf->context->av_pix_fmt) {
+++    case AV_PIX_FMT_YUYV422:
+++
+++        layer->format = DRM_FORMAT_YUYV;
+++        layer->nb_planes = 1;
+++
+++        break;
+++
+++    case AV_PIX_FMT_NV12:
+++    case AV_PIX_FMT_NV21:
+++
+++        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
+++            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
+++
+++        if (avbuf->num_planes > 1)
+++            break;
+++
+++        layer->nb_planes = 2;
+++
+++        layer->planes[1].object_index = 0;
+++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
+++            avbuf->context->format.fmt.pix.height;
+++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
+++        break;
+++
+++    case AV_PIX_FMT_YUV420P:
+++
+++        layer->format = DRM_FORMAT_YUV420;
+++
+++        if (avbuf->num_planes > 1)
+++            break;
+++
+++        layer->nb_planes = 3;
+++
+++        layer->planes[1].object_index = 0;
+++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
+++            avbuf->context->format.fmt.pix.height;
+++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
+++
+++        layer->planes[2].object_index = 0;
+++        layer->planes[2].offset = layer->planes[1].offset +
+++            ((avbuf->plane_info[0].bytesperline *
+++              avbuf->context->format.fmt.pix.height) >> 2);
+++        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
+++        break;
+++
+++    default:
+++        drm_desc->nb_layers = 0;
+++        break;
+++    }
+++
+++    return (uint8_t *) drm_desc;
++ }
++ 
++-static int v4l2_buf_increase_ref(V4L2Buffer *in)
+++static void v4l2_free_bufref(void *opaque, uint8_t *data)
++ {
++-    V4L2m2mContext *s = buf_to_m2mctx(in);
+++    AVBufferRef * bufref = (AVBufferRef *)data;
+++    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
+++    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
++ 
++-    if (in->context_ref)
++-        atomic_fetch_add(&in->context_refcount, 1);
++-    else {
++-        in->context_ref = av_buffer_ref(s->self_ref);
++-        if (!in->context_ref)
++-            return AVERROR(ENOMEM);
+++    if (ctx != NULL) {
+++        // Buffer still attached to context
+++        V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++ 
++-        in->context_refcount = 1;
++-    }
+++        ff_mutex_lock(&ctx->lock);
++ 
++-    in->status = V4L2BUF_RET_USER;
++-    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
+++        ff_v4l2_buffer_set_avail(avbuf);
++ 
++-    return 0;
+++        if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
+++            /* no need to queue more buffers to the driver */
+++        }
+++        else if (ctx->streamon) {
+++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
+++            avbuf->buf.timestamp.tv_sec = 0;
+++            avbuf->buf.timestamp.tv_usec = 0;
+++            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
+++        }
+++        else {
+++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
+++        }
+++
+++        ff_mutex_unlock(&ctx->lock);
+++    }
+++
+++    ff_weak_link_unlock(avbuf->context_wl);
+++    av_buffer_unref(&bufref);
++ }
++ 
++-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
+++static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
++ {
++-    int ret;
+++    return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
+++}
++ 
++-    if (plane >= in->num_planes)
++-        return AVERROR(EINVAL);
+++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
+++{
+++    int i, ret;
+++    const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
++ 
++-    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
++-    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
++-                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
++-    if (!*buf)
++-        return AVERROR(ENOMEM);
+++    for (i = 0; i < avbuf->num_planes; i++) {
+++        int dma_fd = -1;
+++        const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
+++
+++        if (s->db_ctl != NULL) {
+++            if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
+++                return AVERROR(ENOMEM);
+++            dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
+++            if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
+++                avbuf->buf.m.planes[i].m.fd = dma_fd;
+++            else
+++                avbuf->buf.m.fd = dma_fd;
+++        }
+++        else {
+++            struct v4l2_exportbuffer expbuf;
+++            memset(&expbuf, 0, sizeof(expbuf));
+++
+++            expbuf.index = avbuf->buf.index;
+++            expbuf.type = avbuf->buf.type;
+++            expbuf.plane = i;
+++
+++            ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
+++            if (ret < 0)
+++                return AVERROR(errno);
+++            dma_fd = expbuf.fd;
+++        }
++ 
++-    ret = v4l2_buf_increase_ref(in);
++-    if (ret)
++-        av_buffer_unref(buf);
+++        avbuf->drm_frame.objects[i].size = blen;
+++        avbuf->drm_frame.objects[i].fd = dma_fd;
+++        avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
+++    }
++ 
++-    return ret;
+++    return 0;
++ }
++ 
++ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
++ {
++     unsigned int bytesused, length;
+++    int rv = 0;
++ 
++     if (plane >= out->num_planes)
++         return AVERROR(EINVAL);
++@@ -283,32 +534,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
++     length = out->plane_info[plane].length;
++     bytesused = FFMIN(size+offset, length);
++ 
++-    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
++-
++-    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++-        out->planes[plane].bytesused = bytesused;
++-        out->planes[plane].length = length;
++-    } else {
++-        out->buf.bytesused = bytesused;
++-        out->buf.length = length;
+++    if (size > length - offset) {
+++        size = length - offset;
+++        rv = AVERROR(ENOMEM);
++     }
++ 
++-    return 0;
+++    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
+++
+++    set_buf_length(out, plane, bytesused, length);
+++
+++    return rv;
+++}
+++
+++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
+++{
+++    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
+++    AVBufferRef * newbuf;
+++
+++    if (!bufref)
+++        return NULL;
+++
+++    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
+++    if (newbuf == NULL)
+++        av_buffer_unref(&bufref);
+++
+++    avbuf->status = V4L2BUF_RET_USER;
+++    return newbuf;
++ }
++ 
++ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
++ {
++-    int i, ret;
+++    int i;
++ 
++     frame->format = avbuf->context->av_pix_fmt;
++ 
++-    for (i = 0; i < avbuf->num_planes; i++) {
++-        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
++-        if (ret)
++-            return ret;
+++    frame->buf[0] = wrap_avbuf(avbuf);
+++    if (frame->buf[0] == NULL)
+++        return AVERROR(ENOMEM);
++ 
+++    if (buf_to_m2mctx(avbuf)->output_drm) {
+++        /* 1. get references to the actual data */
+++        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
+++        frame->format = AV_PIX_FMT_DRM_PRIME;
+++        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
+++        return 0;
+++    }
+++
+++
+++    /* 1. get references to the actual data */
+++    for (i = 0; i < avbuf->num_planes; i++) {
+++        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
++         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
++-        frame->data[i] = frame->buf[i]->data;
++     }
++ 
++     /* fixup special cases */
++@@ -317,17 +593,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
++     case AV_PIX_FMT_NV21:
++         if (avbuf->num_planes > 1)
++             break;
++-        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
++-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
+++        frame->linesize[1] = frame->linesize[0];
+++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
++         break;
++ 
++     case AV_PIX_FMT_YUV420P:
++         if (avbuf->num_planes > 1)
++             break;
++-        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
++-        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
++-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
++-        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
+++        frame->linesize[1] = frame->linesize[0] / 2;
+++        frame->linesize[2] = frame->linesize[1];
+++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
+++        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
++         break;
++ 
++     default:
++@@ -337,68 +613,127 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
++     return 0;
++ }
++ 
+++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
+++{
+++    if (dst_stride == src_stride && w + 32 >= dst_stride) {
+++        memcpy(dst, src, dst_stride * h);
+++    }
+++    else {
+++        while (--h >= 0) {
+++            memcpy(dst, src, w);
+++            dst += dst_stride;
+++            src += src_stride;
+++        }
+++    }
+++}
+++
+++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
+++{
+++    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
+++}
+++
+++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+++{
+++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+++
+++    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
+++        return AVERROR(EINVAL);
+++
+++    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+++        // Only currently cope with single buffer types
+++        if (out->buf.length != 1)
+++            return AVERROR_PATCHWELCOME;
+++        if (src->nb_objects != 1)
+++            return AVERROR(EINVAL);
+++
+++        out->planes[0].m.fd = src->objects[0].fd;
+++    }
+++    else {
+++        if (src->nb_objects != 1)
+++            return AVERROR(EINVAL);
+++
+++        out->buf.m.fd      = src->objects[0].fd;
+++    }
+++
+++    // No need to copy src AVDescriptor and if we did then we may confuse
+++    // fd close on free
+++    out->ref_buf = av_buffer_ref(frame->buf[0]);
+++
+++    return 0;
+++}
+++
++ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++ {
++-    int i, ret;
++-    struct v4l2_format fmt = out->context->format;
++-    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
++-                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
++-    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
++-                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
++-    int is_planar_format = 0;
++-
++-    switch (pixel_format) {
++-    case V4L2_PIX_FMT_YUV420M:
++-    case V4L2_PIX_FMT_YVU420M:
++-#ifdef V4L2_PIX_FMT_YUV422M
++-    case V4L2_PIX_FMT_YUV422M:
++-#endif
++-#ifdef V4L2_PIX_FMT_YVU422M
++-    case V4L2_PIX_FMT_YVU422M:
++-#endif
++-#ifdef V4L2_PIX_FMT_YUV444M
++-    case V4L2_PIX_FMT_YUV444M:
++-#endif
++-#ifdef V4L2_PIX_FMT_YVU444M
++-    case V4L2_PIX_FMT_YVU444M:
++-#endif
++-    case V4L2_PIX_FMT_NV12M:
++-    case V4L2_PIX_FMT_NV21M:
++-    case V4L2_PIX_FMT_NV12MT_16X16:
++-    case V4L2_PIX_FMT_NV12MT:
++-    case V4L2_PIX_FMT_NV16M:
++-    case V4L2_PIX_FMT_NV61M:
++-        is_planar_format = 1;
++-    }
++-
++-    if (!is_planar_format) {
++-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
++-        int planes_nb = 0;
++-        int offset = 0;
++-
++-        for (i = 0; i < desc->nb_components; i++)
++-            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
++-
++-        for (i = 0; i < planes_nb; i++) {
++-            int size, h = height;
++-            if (i == 1 || i == 2) {
+++    int i;
+++    int num_planes = 0;
+++    int pel_strides[4] = {0};
+++
+++    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+++
+++    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
+++        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
+++        return -1;
+++    }
+++
+++    for (i = 0; i != desc->nb_components; ++i) {
+++        if (desc->comp[i].plane >= num_planes)
+++            num_planes = desc->comp[i].plane + 1;
+++        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
+++    }
+++
+++    if (out->num_planes > 1) {
+++        if (num_planes != out->num_planes) {
+++            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
+++            return -1;
+++        }
+++        for (i = 0; i != num_planes; ++i) {
+++            int w = frame->width;
+++            int h = frame->height;
+++            if (is_chroma(desc, i, num_planes)) {
+++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
++                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
++             }
++-            size = frame->linesize[i] * h;
++-            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
++-            if (ret)
++-                return ret;
++-            offset += size;
+++
+++            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
+++                   frame->data[i], frame->linesize[i],
+++                   w * pel_strides[i], h);
+++            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
++         }
++-        return 0;
++     }
+++    else
+++    {
+++        unsigned int offset = 0;
+++
+++        for (i = 0; i != num_planes; ++i) {
+++            int w = frame->width;
+++            int h = frame->height;
+++            int dst_stride = out->plane_info[0].bytesperline;
+++            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
+++
+++            if (is_chroma(desc, i, num_planes)) {
+++                // Is chroma
+++                dst_stride >>= desc->log2_chroma_w;
+++                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
+++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+++                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+++            }
+++            else {
+++                // Is luma or alpha
+++                offset += dst_stride * out->context->height;
+++            }
+++            if (offset > out->plane_info[0].length) {
+++                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
+++                return -1;
+++            }
++ 
++-    for (i = 0; i < out->num_planes; i++) {
++-        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
++-        if (ret)
++-            return ret;
+++            cpy_2d(dst, dst_stride,
+++                   frame->data[i], frame->linesize[i],
+++                   w * pel_strides[i], h);
+++        }
+++        set_buf_length(out, 0, offset, out->plane_info[0].length);
++     }
++-
++     return 0;
++ }
++ 
++@@ -408,16 +743,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++  *
++  ******************************************************************************/
++ 
++-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
++ {
++-    v4l2_set_pts(out, frame->pts);
++-
++-    return v4l2_buffer_swframe_to_buf(frame, out);
+++    out->buf.flags = frame->key_frame ?
+++        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
+++        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
+++    // Beware that colour info is held in format rather than the actual
+++    // v4l2 buffer struct so this may not be as useful as you might hope
+++    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
+++    v4l2_set_color_range(out, frame->color_range);
+++    // PTS & interlace are buffer vars
+++    if (track_ts)
+++        out->buf.timestamp = tv_from_int(track_ts);
+++    else
+++        v4l2_set_pts(out, frame->pts);
+++    v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
+++
+++    return frame->format == AV_PIX_FMT_DRM_PRIME ?
+++        v4l2_buffer_primeframe_to_buf(frame, out) :
+++        v4l2_buffer_swframe_to_buf(frame, out);
++ }
++ 
++ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
++ {
++     int ret;
+++    V4L2Context * const ctx = avbuf->context;
++ 
++     av_frame_unref(frame);
++ 
++@@ -428,17 +778,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
++ 
++     /* 2. get frame information */
++     frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
+++    frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I :
+++        (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
+++        (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
+++            AV_PICTURE_TYPE_NONE;
++     frame->color_primaries = v4l2_get_color_primaries(avbuf);
++     frame->colorspace = v4l2_get_color_space(avbuf);
++     frame->color_range = v4l2_get_color_range(avbuf);
++     frame->color_trc = v4l2_get_color_trc(avbuf);
++     frame->pts = v4l2_get_pts(avbuf);
++     frame->pkt_dts = AV_NOPTS_VALUE;
+++    frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
+++    frame->top_field_first = v4l2_buf_is_top_first(avbuf);
++ 
++     /* these values are updated also during re-init in v4l2_process_driver_event */
++-    frame->height = avbuf->context->height;
++-    frame->width = avbuf->context->width;
++-    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
+++    frame->height = ctx->height;
+++    frame->width = ctx->width;
+++    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
+++
+++    if (ctx->selection.height && ctx->selection.width) {
+++        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
+++        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
+++        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
+++            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
+++        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
+++            frame->height - (ctx->selection.top + ctx->selection.height) : 0;
+++    }
++ 
++     /* 3. report errors upstream */
++     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
++@@ -451,15 +816,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
++ 
++ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
++ {
++-    int ret;
++-
++     av_packet_unref(pkt);
++-    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
++-    if (ret)
++-        return ret;
+++
+++    pkt->buf = wrap_avbuf(avbuf);
+++    if (pkt->buf == NULL)
+++        return AVERROR(ENOMEM);
++ 
++     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
++-    pkt->data = pkt->buf->data;
+++    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
+++    pkt->flags = 0;
++ 
++     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
++         pkt->flags |= AV_PKT_FLAG_KEY;
++@@ -474,39 +839,107 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
++     return 0;
++ }
++ 
++-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
+++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
+++                                    const void *extdata, size_t extlen,
+++                                    const int64_t timestamp)
++ {
++     int ret;
++ 
++-    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
++-    if (ret)
+++    if (extlen) {
+++        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
+++        if (ret)
+++            return ret;
+++    }
+++
+++    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
+++    if (ret && ret != AVERROR(ENOMEM))
++         return ret;
++ 
++-    v4l2_set_pts(out, pkt->pts);
+++    if (timestamp)
+++        out->buf.timestamp = tv_from_int(timestamp);
+++    else
+++        v4l2_set_pts(out, pkt->pts);
++ 
++-    if (pkt->flags & AV_PKT_FLAG_KEY)
++-        out->flags = V4L2_BUF_FLAG_KEYFRAME;
+++    out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
+++        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
+++        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
++ 
++-    return 0;
+++    return ret;
+++}
+++
+++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
+++{
+++    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
++ }
++ 
++-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+++
+++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
+++{
+++    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
+++    int i;
+++
+++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
+++        struct V4L2Plane_info *p = avbuf->plane_info + i;
+++        if (p->mm_addr != NULL)
+++            munmap(p->mm_addr, p->length);
+++    }
+++
+++    if (avbuf->dmabuf[0] == NULL) {
+++        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
+++            if (avbuf->drm_frame.objects[i].fd != -1)
+++                close(avbuf->drm_frame.objects[i].fd);
+++        }
+++    }
+++    else {
+++        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
+++            dmabuf_free(avbuf->dmabuf[i]);
+++        }
+++    }
+++
+++    av_buffer_unref(&avbuf->ref_buf);
+++
+++    ff_weak_link_unref(&avbuf->context_wl);
+++
+++    av_free(avbuf);
+++}
+++
+++
+++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
++ {
++-    V4L2Context *ctx = avbuf->context;
++     int ret, i;
+++    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
+++    AVBufferRef * bufref;
+++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++ 
++-    avbuf->buf.memory = V4L2_MEMORY_MMAP;
+++    *pbufref = NULL;
+++    if (avbuf == NULL)
+++        return AVERROR(ENOMEM);
+++
+++    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
+++    if (bufref == NULL) {
+++        av_free(avbuf);
+++        return AVERROR(ENOMEM);
+++    }
+++
+++    avbuf->context = ctx;
+++    avbuf->buf.memory = mem;
++     avbuf->buf.type = ctx->type;
++     avbuf->buf.index = index;
++ 
+++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
+++        avbuf->drm_frame.objects[i].fd = -1;
+++    }
+++
+++    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
+++
++     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++         avbuf->buf.length = VIDEO_MAX_PLANES;
++         avbuf->buf.m.planes = avbuf->planes;
++     }
++ 
++-    ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
+++    ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
++     if (ret < 0)
++-        return AVERROR(errno);
+++        goto fail;
++ 
++     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++         avbuf->num_planes = 0;
++@@ -519,6 +952,8 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++         avbuf->num_planes = 1;
++ 
++     for (i = 0; i < avbuf->num_planes; i++) {
+++        const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
+++            (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
++ 
++         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
++             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
++@@ -526,25 +961,31 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++ 
++         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
++-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
++-                                           PROT_READ | PROT_WRITE, MAP_SHARED,
++-                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
+++            avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
+++
+++            if (want_mmap)
+++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
+++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
+++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
++         } else {
++             avbuf->plane_info[i].length = avbuf->buf.length;
++-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
++-                                          PROT_READ | PROT_WRITE, MAP_SHARED,
++-                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
+++            avbuf->plane_info[i].offset = 0;
+++
+++            if (want_mmap)
+++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
+++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
+++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
++         }
++ 
++-        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
++-            return AVERROR(ENOMEM);
+++        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
+++            avbuf->plane_info[i].mm_addr = NULL;
+++            ret = AVERROR(ENOMEM);
+++            goto fail;
+++        }
++     }
++ 
++     avbuf->status = V4L2BUF_AVAILABLE;
++ 
++-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
++-        return 0;
++-
++     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++         avbuf->buf.m.planes = avbuf->planes;
++         avbuf->buf.length   = avbuf->num_planes;
++@@ -554,20 +995,53 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++         avbuf->buf.length    = avbuf->planes[0].length;
++     }
++ 
++-    return ff_v4l2_buffer_enqueue(avbuf);
+++    if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+++        if (s->output_drm) {
+++            ret = v4l2_buffer_export_drm(avbuf);
+++            if (ret) {
+++                av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
+++                goto fail;
+++            }
+++        }
+++    }
+++
+++    *pbufref = bufref;
+++    return 0;
+++
+++fail:
+++    av_buffer_unref(&bufref);
+++    return ret;
++ }
++ 
++ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
++ {
++     int ret;
+++    int qc;
++ 
++-    avbuf->buf.flags = avbuf->flags;
+++    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
+++        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
+++               avbuf->context->name, avbuf->buf.index,
+++               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
+++               avbuf->context->q_count);
+++    }
++ 
++     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
++-    if (ret < 0)
++-        return AVERROR(errno);
+++    if (ret < 0) {
+++        int err = errno;
+++        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
+++               avbuf->context->name, avbuf->buf.index,
+++               err, strerror(err));
+++        return AVERROR(err);
+++    }
++ 
+++    // Lock not wanted - if called from buffer free then lock already obtained
+++    qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
++     avbuf->status = V4L2BUF_IN_DRIVER;
+++    pthread_cond_broadcast(&avbuf->context->cond);
+++
+++    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
+++           avbuf->context->name, avbuf->buf.index,
+++           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
++ 
++     return 0;
++ }
++diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
++index 3d2ff1b9a5..444ad94b14 100644
++--- a/libavcodec/v4l2_buffers.h
+++++ b/libavcodec/v4l2_buffers.h
++@@ -28,31 +28,47 @@
++ #include <stddef.h>
++ #include <linux/videodev2.h>
++ 
+++#include "avcodec.h"
++ #include "libavutil/buffer.h"
++ #include "libavutil/frame.h"
+++#include "libavutil/hwcontext_drm.h"
++ #include "packet.h"
++ 
++ enum V4L2Buffer_status {
++     V4L2BUF_AVAILABLE,
++     V4L2BUF_IN_DRIVER,
+++    V4L2BUF_IN_USE,
++     V4L2BUF_RET_USER,
++ };
++ 
++ /**
++  * V4L2Buffer (wrapper for v4l2_buffer management)
++  */
+++struct V4L2Context;
+++struct ff_weak_link_client;
+++struct dmabuf_h;
+++
++ typedef struct V4L2Buffer {
++-    /* each buffer needs to have a reference to its context */
+++    /* each buffer needs to have a reference to its context
+++     * The pointer is good enough for most operation but once the buffer has
+++     * been passed to the user the buffer may become orphaned so for free ops
+++     * the weak link must be used to ensure that the context is actually
+++     * there
+++     */
++     struct V4L2Context *context;
+++    struct ff_weak_link_client *context_wl;
++ 
++-    /* This object is refcounted per-plane, so we need to keep track
++-     * of how many context-refs we are holding. */
++-    AVBufferRef *context_ref;
++-    atomic_uint context_refcount;
+++    /* DRM descriptor */
+++    AVDRMFrameDescriptor drm_frame;
+++    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
+++     * are done
+++     */
+++    AVBufferRef * ref_buf;
++ 
++     /* keep track of the mmap address and mmap length */
++     struct V4L2Plane_info {
++-        int bytesperline;
+++        size_t bytesperline;
+++        size_t offset;
++         void * mm_addr;
++         size_t length;
++     } plane_info[VIDEO_MAX_PLANES];
++@@ -63,9 +79,9 @@ typedef struct V4L2Buffer {
++     struct v4l2_buffer buf;
++     struct v4l2_plane planes[VIDEO_MAX_PLANES];
++ 
++-    int flags;
++     enum V4L2Buffer_status status;
++ 
+++    struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
++ } V4L2Buffer;
++ 
++ /**
++@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
++  */
++ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
++ 
+++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
+++                                    const void *extdata, size_t extlen,
+++                                    const int64_t timestamp);
+++
++ /**
++  * Extracts the data from an AVFrame to a V4L2Buffer
++  *
++@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
++  *
++  * @returns 0 in case of success, a negative AVERROR code otherwise
++  */
++-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
+++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
++ 
++ /**
++  * Initializes a V4L2Buffer
++@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
++  *
++  * @returns 0 in case of success, a negative AVERROR code otherwise
++  */
++-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
+++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
++ 
++ /**
++  * Enqueues a V4L2Buffer
++@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
++  */
++ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
++ 
+++static inline void
+++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
+++{
+++    avbuf->status = V4L2BUF_AVAILABLE;
+++    av_buffer_unref(&avbuf->ref_buf);
+++}
+++
++ 
++ #endif // AVCODEC_V4L2_BUFFERS_H
++diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
++index a40be94690..79a31cf930 100644
++--- a/libavcodec/v4l2_context.c
+++++ b/libavcodec/v4l2_context.c
++@@ -27,11 +27,13 @@
++ #include <unistd.h>
++ #include <fcntl.h>
++ #include <poll.h>
+++#include "libavutil/avassert.h"
++ #include "libavcodec/avcodec.h"
++ #include "decode.h"
++ #include "v4l2_buffers.h"
++ #include "v4l2_fmt.h"
++ #include "v4l2_m2m.h"
+++#include "weak_link.h"
++ 
++ struct v4l2_format_update {
++     uint32_t v4l2_fmt;
++@@ -41,26 +43,168 @@ struct v4l2_format_update {
++     int update_avfmt;
++ };
++ 
++-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
+++
+++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
++ {
++-    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++-        container_of(ctx, V4L2m2mContext, output) :
++-        container_of(ctx, V4L2m2mContext, capture);
+++    return (int64_t)n;
++ }
++ 
++-static inline AVCodecContext *logger(V4L2Context *ctx)
+++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
++ {
++-    return ctx_to_m2mctx(ctx)->avctx;
+++    return (unsigned int)pts;
+++}
+++
+++// FFmpeg requires us to propagate a number of vars from the coded pkt into
+++// the decoded frame. The only thing that tracks like that in V4L2 stateful
+++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
+++// guarantees about PTS being unique or specified for every frame so replace
+++// the supplied PTS with a simple incrementing number and keep a circular
+++// buffer of all the things we want preserved (including the original PTS)
+++// indexed by the tracking no.
+++static int64_t
+++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
+++{
+++    int64_t track_pts;
+++
+++    // Avoid 0
+++    if (++x->track_no == 0)
+++        x->track_no = 1;
+++
+++    track_pts = track_to_pts(avctx, x->track_no);
+++
+++    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
+++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
+++        .discard          = 0,
+++        .pending          = 1,
+++        .pkt_size         = avpkt->size,
+++        .pts              = avpkt->pts,
+++        .dts              = avpkt->dts,
+++        .reordered_opaque = avctx->reordered_opaque,
+++        .pkt_pos          = avpkt->pos,
+++        .pkt_duration     = avpkt->duration,
+++        .track_pts        = track_pts
+++    };
+++    return track_pts;
+++}
+++
+++static int64_t
+++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
+++{
+++    int64_t track_pts;
+++
+++    // Avoid 0
+++    if (++x->track_no == 0)
+++        x->track_no = 1;
+++
+++    track_pts = track_to_pts(avctx, x->track_no);
+++
+++    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
+++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
+++        .discard          = 0,
+++        .pending          = 1,
+++        .pkt_size         = 0,
+++        .pts              = frame->pts,
+++        .dts              = AV_NOPTS_VALUE,
+++        .reordered_opaque = frame->reordered_opaque,
+++        .pkt_pos          = frame->pkt_pos,
+++        .pkt_duration     = frame->pkt_duration,
+++        .track_pts        = track_pts
+++    };
+++    return track_pts;
+++}
+++
+++
+++// Returns -1 if we should discard the frame
+++static int
+++xlat_pts_frame_out(AVCodecContext *const avctx,
+++             xlat_track_t * const x,
+++             AVFrame *const frame)
+++{
+++    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
+++    V4L2m2mTrackEl *const t = x->track_els + n;
+++    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
+++    {
+++        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
+++               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
+++        frame->pts              = AV_NOPTS_VALUE;
+++        frame->pkt_dts          = AV_NOPTS_VALUE;
+++        frame->reordered_opaque = x->last_opaque;
+++        frame->pkt_pos          = -1;
+++        frame->pkt_duration     = 0;
+++        frame->pkt_size         = -1;
+++    }
+++    else if (!t->discard)
+++    {
+++        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
+++        frame->pkt_dts          = t->dts;
+++        frame->reordered_opaque = t->reordered_opaque;
+++        frame->pkt_pos          = t->pkt_pos;
+++        frame->pkt_duration     = t->pkt_duration;
+++        frame->pkt_size         = t->pkt_size;
+++
+++        x->last_opaque = x->track_els[n].reordered_opaque;
+++        if (frame->pts != AV_NOPTS_VALUE)
+++            x->last_pts = frame->pts;
+++        t->pending = 0;
+++    }
+++    else
+++    {
+++        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
+++        return -1;
+++    }
+++
+++    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
+++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
+++    return 0;
++ }
++ 
++-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
+++// Returns -1 if we should discard the frame
+++static int
+++xlat_pts_pkt_out(AVCodecContext *const avctx,
+++             xlat_track_t * const x,
+++             AVPacket *const pkt)
++ {
++-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+++    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
+++    V4L2m2mTrackEl *const t = x->track_els + n;
+++    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
+++    {
+++        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
+++               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
+++        pkt->pts                = AV_NOPTS_VALUE;
+++    }
+++    else if (!t->discard)
+++    {
+++        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
+++
+++        x->last_opaque = x->track_els[n].reordered_opaque;
+++        if (pkt->pts != AV_NOPTS_VALUE)
+++            x->last_pts = pkt->pts;
+++        t->pending = 0;
+++    }
+++    else
+++    {
+++        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
+++        return -1;
+++    }
+++
+++    // * Would like something much better than this...xlat(offset + out_count)?
+++    pkt->dts = pkt->pts;
+++    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
+++           pkt->pts, t->track_pts, n);
+++    return 0;
++ }
++ 
++-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
+++
+++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
++ {
++-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+++    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+++        container_of(ctx, V4L2m2mContext, output) :
+++        container_of(ctx, V4L2m2mContext, capture);
+++}
+++
+++static inline AVCodecContext *logger(const V4L2Context *ctx)
+++{
+++    return ctx_to_m2mctx(ctx)->avctx;
++ }
++ 
++ static AVRational v4l2_get_sar(V4L2Context *ctx)
++@@ -81,21 +225,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
++     return sar;
++ }
++ 
++-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
+++static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
+++{
+++    return ctx->bufrefs != NULL;
+++}
+++
+++// Width/Height changed or we don't have an alloc in the first place?
+++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
++ {
++-    struct v4l2_format *fmt1 = &ctx->format;
++-    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
++-        fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
++-        fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
++-        :
++-        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
++-        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
+++    const struct v4l2_format *fmt1 = &ctx->format;
+++    int ret = !ctx_buffers_alloced(ctx) ||
+++        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+++            fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
+++            fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
+++            :
+++            fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
+++            fmt1->fmt.pix.height != fmt2->fmt.pix.height);
++ 
++     if (ret)
++-        av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
+++        av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
++             ctx->name,
++-            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
++-            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
+++            ctx_buffers_alloced(ctx),
+++            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
+++            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
++ 
++     return ret;
++ }
++@@ -153,76 +305,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
++     }
++ }
++ 
++-static int v4l2_start_decode(V4L2Context *ctx)
+++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
++ {
++-    struct v4l2_decoder_cmd cmd = {
++-        .cmd = V4L2_DEC_CMD_START,
++-        .flags = 0,
+++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+++    struct v4l2_selection selection = {
+++        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+++        .target = V4L2_SEL_TGT_COMPOSE
++     };
++-    int ret;
++ 
++-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
++-    if (ret)
+++    memset(r, 0, sizeof(*r));
+++    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
++         return AVERROR(errno);
++ 
+++    *r = selection.r;
++     return 0;
++ }
++ 
++-/**
++- * handle resolution change event and end of stream event
++- * returns 1 if reinit was successful, negative if it failed
++- * returns 0 if reinit was not executed
++- */
++-static int v4l2_handle_event(V4L2Context *ctx)
+++static int do_source_change(V4L2m2mContext * const s)
++ {
++-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++-    struct v4l2_format cap_fmt = s->capture.format;
++-    struct v4l2_event evt = { 0 };
+++    AVCodecContext *const avctx = s->avctx;
+++
++     int ret;
+++    int reinit;
+++    struct v4l2_format cap_fmt = s->capture.format;
++ 
++-    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
++-    if (ret < 0) {
++-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
++-        return 0;
++-    }
+++    s->capture.done = 0;
++ 
++-    if (evt.type == V4L2_EVENT_EOS) {
++-        ctx->done = 1;
+++    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
+++    if (ret) {
+++        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
++         return 0;
++     }
++ 
++-    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
++-        return 0;
+++    get_default_selection(&s->capture, &s->capture.selection);
++ 
++-    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
++-    if (ret) {
++-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
++-        return 0;
+++    reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
+++    if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
+++        reinit = 1;
+++
+++    s->capture.format = cap_fmt;
+++    if (reinit) {
+++        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
+++        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
++     }
++ 
++-    if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
++-        s->capture.height = v4l2_get_height(&cap_fmt);
++-        s->capture.width = v4l2_get_width(&cap_fmt);
++-        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
++-    } else {
++-        v4l2_start_decode(ctx);
++-        return 0;
+++    // If we don't support selection (or it is bust) and we obviously have HD then kludge
+++    if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
+++        (s->capture.height == 1088 && s->capture.width == 1920)) {
+++        s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
++     }
++ 
++-    s->reinit = 1;
+++    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
++ 
++-    if (s->avctx)
++-        ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
++-    if (ret < 0)
++-        av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
+++    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
+++           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
+++           s->capture.width, s->capture.height,
+++           s->capture.selection.width, s->capture.selection.height,
+++           s->capture.selection.left, s->capture.selection.top, reinit);
++ 
++-    ret = ff_v4l2_m2m_codec_reinit(s);
++-    if (ret) {
++-        av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
++-        return AVERROR(EINVAL);
+++    if (reinit) {
+++        if (avctx)
+++            ret = ff_set_dimensions(s->avctx,
+++                                    s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
+++                                    s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
+++        if (ret < 0)
+++            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
+++
+++        ret = ff_v4l2_m2m_codec_reinit(s);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n");
+++            return AVERROR(EINVAL);
+++        }
+++
+++        if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
+++            s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
+++            av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
+++                   s->capture.width, s->capture.height,
+++                   ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
+++            return AVERROR(EINVAL);
+++        }
+++
+++        // Update pixel format - should only actually do something on initial change
+++        s->capture.av_pix_fmt =
+++            ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
+++        if (s->output_drm) {
+++            avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
+++            avctx->sw_pix_fmt = s->capture.av_pix_fmt;
+++        }
+++        else
+++            avctx->pix_fmt = s->capture.av_pix_fmt;
+++
+++        goto reinit_run;
++     }
++ 
+++    /* Buffers are OK so just stream off to ack */
+++    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
+++
+++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+++    if (ret)
+++        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
+++    s->draining = 0;
+++
++     /* reinit executed */
+++reinit_run:
+++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
++     return 1;
++ }
++ 
++@@ -266,171 +452,293 @@ static int v4l2_stop_encode(V4L2Context *ctx)
++     return 0;
++ }
++ 
++-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
+++// DQ a buffer
+++// Amalgamates all the various ways there are of signalling EOS/Event to
+++// generate a consistant EPIPE.
+++//
+++// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
+++//
+++// Returns:
+++//  0               Success
+++//  AVERROR(EPIPE)  Nothing more to read
+++//  AVERROR(ENOSPC) No buffers in Q to put result in
+++//  *               AVERROR(..)
+++
+++ static int
+++dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
++ {
++-    struct v4l2_plane planes[VIDEO_MAX_PLANES];
++-    struct v4l2_buffer buf = { 0 };
++-    V4L2Buffer *avbuf;
++-    struct pollfd pfd = {
++-        .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
++-        .fd = ctx_to_m2mctx(ctx)->fd,
+++    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
+++    AVCodecContext * const avctx = m->avctx;
+++    V4L2Buffer * avbuf;
+++    const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
+++
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
+++
+++    struct v4l2_buffer buf = {
+++        .type = ctx->type,
+++        .memory = V4L2_MEMORY_MMAP,
++     };
++-    int i, ret;
++ 
++-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
++-        for (i = 0; i < ctx->num_buffers; i++) {
++-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
++-                break;
++-        }
++-        if (i == ctx->num_buffers)
++-            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
++-                                                "userspace. Increase num_capture_buffers "
++-                                                "to prevent device deadlock or dropped "
++-                                                "packets/frames.\n");
++-    }
++-
++-    /* if we are draining and there are no more capture buffers queued in the driver we are done */
++-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
++-        for (i = 0; i < ctx->num_buffers; i++) {
++-            /* capture buffer initialization happens during decode hence
++-             * detection happens at runtime
++-             */
++-            if (!ctx->buffers)
++-                break;
++-
++-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
++-                goto start;
+++    *ppavbuf = NULL;
+++
+++    if (ctx->flag_last)
+++        return AVERROR(EPIPE);
+++
+++    if (is_mp) {
+++        buf.length = VIDEO_MAX_PLANES;
+++        buf.m.planes = planes;
+++    }
+++
+++    while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
+++        const int err = errno;
+++        av_assert0(AVERROR(err) < 0);
+++        if (err != EINTR) {
+++            av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
+++                ctx->name, av_err2str(AVERROR(err)));
+++
+++            if (err == EPIPE)
+++                ctx->flag_last = 1;
+++
+++            return AVERROR(err);
++         }
++-        ctx->done = 1;
++-        return NULL;
++     }
+++    atomic_fetch_sub(&ctx->q_count, 1);
+++
+++    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
+++    ff_v4l2_buffer_set_avail(avbuf);
+++    avbuf->buf = buf;
+++    if (is_mp) {
+++        memcpy(avbuf->planes, planes, sizeof(planes));
+++        avbuf->buf.m.planes = avbuf->planes;
+++    }
+++    // Done with any attached buffer
+++    av_buffer_unref(&avbuf->ref_buf);
++ 
++-start:
++-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
++-        pfd.events =  POLLOUT | POLLWRNORM;
++-    else {
++-        /* no need to listen to requests for more input while draining */
++-        if (ctx_to_m2mctx(ctx)->draining)
++-            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
+++    if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
+++        // Zero length cap buffer return == EOS
+++        if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
+++            av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
+++
+++            // Must reQ so we don't leak
+++            // May not matter if the next thing we do is release all the
+++            // buffers but better to be tidy.
+++            ff_v4l2_buffer_enqueue(avbuf);
+++
+++            ctx->flag_last = 1;
+++            return AVERROR(EPIPE);
+++        }
+++
+++#ifdef V4L2_BUF_FLAG_LAST
+++        // If flag_last set then this contains data but is the last frame
+++        // so remember that but return OK
+++        if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
+++            ctx->flag_last = 1;
+++#endif
++     }
++ 
++-    for (;;) {
++-        ret = poll(&pfd, 1, timeout);
++-        if (ret > 0)
++-            break;
++-        if (errno == EINTR)
+++    *ppavbuf = avbuf;
+++    return 0;
+++}
+++
+++/**
+++ * handle resolution change event and end of stream event
+++ * Expects to be called after the stream has stopped
+++ *
+++ * returns 1 if reinit was successful, negative if it failed
+++ * returns 0 if reinit was not executed
+++ */
+++static int
+++get_event(V4L2m2mContext * const m)
+++{
+++    AVCodecContext * const avctx = m->avctx;
+++    struct v4l2_event evt = { 0 };
+++
+++    while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
+++        const int rv = AVERROR(errno);
+++        if (rv == AVERROR(EINTR))
++             continue;
++-        return NULL;
+++        if (rv == AVERROR(EAGAIN)) {
+++            av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
+++            return AVERROR_EOF;
+++        }
+++        av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
+++        return rv;
++     }
++ 
++-    /* 0. handle errors */
++-    if (pfd.revents & POLLERR) {
++-        /* if we are trying to get free buffers but none have been queued yet
++-           no need to raise a warning */
++-        if (timeout == 0) {
++-            for (i = 0; i < ctx->num_buffers; i++) {
++-                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
++-                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
++-            }
++-        }
++-        else
++-            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+++    av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
++ 
++-        return NULL;
+++    if (evt.type == V4L2_EVENT_EOS) {
+++        av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
+++        return AVERROR_EOF;
++     }
++ 
++-    /* 1. handle resolution changes */
++-    if (pfd.revents & POLLPRI) {
++-        ret = v4l2_handle_event(ctx);
++-        if (ret < 0) {
++-            /* if re-init failed, abort */
++-            ctx->done = 1;
++-            return NULL;
++-        }
++-        if (ret) {
++-            /* if re-init was successful drop the buffer (if there was one)
++-             * since we had to reconfigure capture (unmap all buffers)
++-             */
++-            return NULL;
+++    if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
+++        return do_source_change(m);
+++
+++    return 0;
+++}
+++
+++static inline int
+++dq_ok(const V4L2Context * const c)
+++{
+++    return c->streamon && atomic_load(&c->q_count) != 0;
+++}
+++
+++// Get a buffer
+++// If output then just gets the buffer in the expected way
+++// If capture then runs the capture state m/c to deal with res change etc.
+++// If return value == 0 then *ppavbuf != NULL
+++
+++static int
+++get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
+++{
+++    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
+++    AVCodecContext * const avctx = m->avctx;
+++    const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
+++
+++    const unsigned int poll_cap = (POLLIN | POLLRDNORM);
+++    const unsigned int poll_out = (POLLOUT | POLLWRNORM);
+++    const unsigned int poll_event = POLLPRI;
+++
+++    *ppavbuf = NULL;
+++
+++    for (;;) {
+++        struct pollfd pfd = {
+++            .fd = m->fd,
+++            // If capture && stream not started then assume we are waiting for the initial event
+++            .events = !is_cap ? poll_out :
+++                !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
+++                    poll_event,
+++        };
+++        int ret;
+++
+++        if (ctx->done) {
+++            av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
+++            return AVERROR_EOF;
++         }
++-    }
++ 
++-    /* 2. dequeue the buffer */
++-    if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
+++        // If capture && timeout == -1 then also wait for rx buffer free
+++        if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
+++            pfd.events |= poll_out;
++ 
++-        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++-            /* there is a capture buffer ready */
++-            if (pfd.revents & (POLLIN | POLLRDNORM))
++-                goto dequeue;
+++        // If nothing Qed all we will get is POLLERR - avoid that
+++        if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
+++            (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
+++            (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
+++            av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
+++            return AVERROR(ENOSPC);
+++        }
++ 
++-            /* the driver is ready to accept more input; instead of waiting for the capture
++-             * buffer to complete we return NULL so input can proceed (we are single threaded)
++-             */
++-            if (pfd.revents & (POLLOUT | POLLWRNORM))
++-                return NULL;
+++        // Timeout kludged s.t. "forever" eventually gives up & produces logging
+++        // If waiting for an event when we have seen a last_frame then we expect
+++        //   it to be ready already so force a short timeout
+++        ret = poll(&pfd, 1,
+++                   ff_v4l2_ctx_eos(ctx) ? 10 :
+++                   timeout == -1 ? 3000 : timeout);
+++        if (ret < 0) {
+++            ret = AVERROR(errno);  // Remember errno before logging etc.
+++            av_assert0(ret < 0);
++         }
++ 
++-dequeue:
++-        memset(&buf, 0, sizeof(buf));
++-        buf.memory = V4L2_MEMORY_MMAP;
++-        buf.type = ctx->type;
++-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++-            memset(planes, 0, sizeof(planes));
++-            buf.length = VIDEO_MAX_PLANES;
++-            buf.m.planes = planes;
+++        av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
+++               ctx->name, ret, timeout, pfd.events, pfd.revents);
+++
+++        if (ret < 0) {
+++            if (ret == AVERROR(EINTR))
+++                continue;
+++            av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
+++            return ret;
++         }
++ 
++-        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
++-        if (ret) {
++-            if (errno != EAGAIN) {
++-                ctx->done = 1;
++-                if (errno != EPIPE)
++-                    av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
++-                        ctx->name, av_err2str(AVERROR(errno)));
+++        if (ret == 0) {
+++            if (timeout == -1)
+++                av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
+++            if (ff_v4l2_ctx_eos(ctx)) {
+++                av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
+++                ret = get_event(m);
+++                if (ret < 0) {
+++                    ctx->done = 1;
+++                    return ret;
+++                }
++             }
++-            return NULL;
+++            return AVERROR(EAGAIN);
+++        }
+++
+++        if ((pfd.revents & POLLERR) != 0) {
+++            av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
+++            return AVERROR_UNKNOWN;
++         }
++ 
++-        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++-            int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
++-                            buf.m.planes[0].bytesused : buf.bytesused;
++-            if (bytesused == 0) {
+++        if ((pfd.revents & poll_event) != 0) {
+++            ret = get_event(m);
+++            if (ret < 0) {
++                 ctx->done = 1;
++-                return NULL;
+++                return ret;
++             }
++-#ifdef V4L2_BUF_FLAG_LAST
++-            if (buf.flags & V4L2_BUF_FLAG_LAST)
++-                ctx->done = 1;
++-#endif
+++            continue;
+++        }
+++
+++        if ((pfd.revents & poll_cap) != 0) {
+++            ret = dq_buf(ctx, ppavbuf);
+++            if (ret == AVERROR(EPIPE))
+++                continue;
+++            return ret;
++         }
++ 
++-        avbuf = &ctx->buffers[buf.index];
++-        avbuf->status = V4L2BUF_AVAILABLE;
++-        avbuf->buf = buf;
++-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++-            memcpy(avbuf->planes, planes, sizeof(planes));
++-            avbuf->buf.m.planes = avbuf->planes;
+++        if ((pfd.revents & poll_out) != 0) {
+++            if (is_cap)
+++                return AVERROR(EAGAIN);
+++            return dq_buf(ctx, ppavbuf);
++         }
++-        return avbuf;
+++
+++        av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
+++        return AVERROR_UNKNOWN;
++     }
+++}
++ 
++-    return NULL;
+++// Clear out flags and timestamps that should should be set by the user
+++// Returns the passed avbuf
+++static V4L2Buffer *
+++clean_v4l2_buffer(V4L2Buffer * const avbuf)
+++{
+++    struct v4l2_buffer *const buf = &avbuf->buf;
+++
+++    buf->flags = 0;
+++    buf->field = V4L2_FIELD_ANY;
+++    buf->timestamp = (struct timeval){0};
+++    buf->timecode = (struct v4l2_timecode){0};
+++    buf->sequence = 0;
+++
+++    return avbuf;
+++}
+++
+++int
+++ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
+++{
+++    V4L2Buffer * avbuf;
+++    if (timeout1 != 0) {
+++        int rv = get_qbuf(ctx, &avbuf, timeout1);
+++        if (rv != 0)
+++            return rv;
+++    }
+++    do {
+++        get_qbuf(ctx, &avbuf, 0);
+++    } while (avbuf);
+++    return 0;
++ }
++ 
++ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
++ {
++-    int timeout = 0; /* return when no more buffers to dequeue */
++     int i;
++ 
++     /* get back as many output buffers as possible */
++-    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++-          do {
++-          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
++-    }
+++    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+++        ff_v4l2_dq_all(ctx, 0);
++ 
++     for (i = 0; i < ctx->num_buffers; i++) {
++-        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
++-            return &ctx->buffers[i];
+++        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
+++        if (avbuf->status == V4L2BUF_AVAILABLE)
+++            return clean_v4l2_buffer(avbuf);
++     }
++ 
++     return NULL;
++@@ -438,25 +746,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
++ 
++ static int v4l2_release_buffers(V4L2Context* ctx)
++ {
++-    struct v4l2_requestbuffers req = {
++-        .memory = V4L2_MEMORY_MMAP,
++-        .type = ctx->type,
++-        .count = 0, /* 0 -> unmaps buffers from the driver */
++-    };
++-    int i, j;
+++    int i;
+++    int ret = 0;
+++    const int fd = ctx_to_m2mctx(ctx)->fd;
++ 
++-    for (i = 0; i < ctx->num_buffers; i++) {
++-        V4L2Buffer *buffer = &ctx->buffers[i];
+++    // Orphan any buffers in the wild
+++    ff_weak_link_break(&ctx->wl_master);
+++
+++    if (ctx->bufrefs) {
+++        for (i = 0; i < ctx->num_buffers; i++)
+++            av_buffer_unref(ctx->bufrefs + i);
+++    }
+++
+++    if (fd != -1) {
+++        struct v4l2_requestbuffers req = {
+++            .memory = V4L2_MEMORY_MMAP,
+++            .type = ctx->type,
+++            .count = 0, /* 0 -> unmap all buffers from the driver */
+++        };
+++
+++        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
+++            if (errno == EINTR)
+++                continue;
++ 
++-        for (j = 0; j < buffer->num_planes; j++) {
++-            struct V4L2Plane_info *p = &buffer->plane_info[j];
++-            if (p->mm_addr && p->length)
++-                if (munmap(p->mm_addr, p->length) < 0)
++-                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
+++            ret = AVERROR(errno);
+++
+++            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
+++                ctx->name, av_err2str(AVERROR(errno)));
+++
+++            if (ctx_to_m2mctx(ctx)->output_drm)
+++                av_log(logger(ctx), AV_LOG_ERROR,
+++                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
+++                    "for all buffers: \n"
+++                    "  1. drmModeRmFB(..)\n"
+++                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
++         }
++     }
+++    atomic_store(&ctx->q_count, 0);
++ 
++-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
+++    return ret;
++ }
++ 
++ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
++@@ -485,6 +813,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
++ 
++ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
++ {
+++    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
+++    V4L2m2mPriv *priv = s->avctx->priv_data;
++     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
++     struct v4l2_fmtdesc fdesc;
++     int ret;
++@@ -498,21 +828,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
++             return 0;
++     }
++ 
++-    for (;;) {
+++    for (;; ++fdesc.index) {
++         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
++         if (ret)
++             return AVERROR(EINVAL);
++ 
+++        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
+++            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
+++                continue;
+++        }
+++
++         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
++         ret = v4l2_try_raw_format(ctx, pixfmt);
++-        if (ret){
++-            fdesc.index++;
++-            continue;
+++        if (ret == 0) {
+++            *p = pixfmt;
+++            return 0;
++         }
++-
++-        *p = pixfmt;
++-
++-        return 0;
++     }
++ 
++     return AVERROR(EINVAL);
++@@ -555,30 +886,99 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
++   *
++   *****************************************************************************/
++ 
+++
+++static void flush_all_buffers_status(V4L2Context* const ctx)
+++{
+++    int i;
+++
+++    if (!ctx->bufrefs)
+++        return;
+++
+++    for (i = 0; i < ctx->num_buffers; ++i) {
+++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
+++        if (buf->status == V4L2BUF_IN_DRIVER)
+++            ff_v4l2_buffer_set_avail(buf);
+++    }
+++    atomic_store(&ctx->q_count, 0);
+++}
+++
+++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
+++{
+++    int i;
+++    int rv;
+++
+++    if (!ctx->bufrefs) {
+++        rv = ff_v4l2_context_init(ctx);
+++        if (rv) {
+++            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+++            return rv;
+++        }
+++    }
+++
+++    for (i = 0; i < ctx->num_buffers; ++i) {
+++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
+++        if (buf->status == V4L2BUF_AVAILABLE) {
+++            rv = ff_v4l2_buffer_enqueue(buf);
+++            if (rv < 0)
+++                return rv;
+++        }
+++    }
+++    return 0;
+++}
+++
++ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
++ {
++     int type = ctx->type;
++-    int ret;
+++    int ret = 0;
+++    AVCodecContext * const avctx = logger(ctx);
++ 
++-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
++-    if (ret < 0)
++-        return AVERROR(errno);
+++    // Avoid doing anything if there is nothing we can do
+++    if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon)
+++        return 0;
++ 
++-    ctx->streamon = (cmd == VIDIOC_STREAMON);
+++    ff_mutex_lock(&ctx->lock);
++ 
++-    return 0;
+++    if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
+++        stuff_all_buffers(avctx, ctx);
+++
+++    if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) {
+++        const int err = errno;
+++        av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name,
+++               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err);
+++        ret = AVERROR(err);
+++    }
+++    else
+++    {
+++        if (cmd == VIDIOC_STREAMOFF)
+++            flush_all_buffers_status(ctx);
+++        else
+++            ctx->first_buf = 1;
+++
+++        ctx->streamon = (cmd == VIDIOC_STREAMON);
+++        av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
+++               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF");
+++    }
+++
+++    // Both stream off & on effectively clear flag_last
+++    ctx->flag_last = 0;
+++
+++    ff_mutex_unlock(&ctx->lock);
+++
+++    return ret;
++ }
++ 
++ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
++ {
++-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+++    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
+++    AVCodecContext *const avctx = s->avctx;
+++    int64_t track_ts;
++     V4L2Buffer* avbuf;
++     int ret;
++ 
++     if (!frame) {
++         ret = v4l2_stop_encode(ctx);
++         if (ret)
++-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
+++            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
++         s->draining= 1;
++         return 0;
++     }
++@@ -587,23 +987,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
++     if (!avbuf)
++         return AVERROR(EAGAIN);
++ 
++-    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
+++    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
+++
+++    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
++     if (ret)
++         return ret;
++ 
++     return ff_v4l2_buffer_enqueue(avbuf);
++ }
++ 
++-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
+++                                   const void * extdata, size_t extlen)
++ {
++     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+++    AVCodecContext *const avctx = s->avctx;
++     V4L2Buffer* avbuf;
++     int ret;
+++    int64_t track_ts;
++ 
++     if (!pkt->size) {
++         ret = v4l2_stop_decode(ctx);
+++        // Log but otherwise ignore stop failure
++         if (ret)
++-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
+++            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
++         s->draining = 1;
++         return 0;
++     }
++@@ -612,8 +1018,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
++     if (!avbuf)
++         return AVERROR(EAGAIN);
++ 
++-    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
++-    if (ret)
+++    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
+++
+++    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
+++    if (ret == AVERROR(ENOMEM))
+++        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
+++               __func__, pkt->size, avbuf->planes[0].length);
+++    else if (ret)
++         return ret;
++ 
++     return ff_v4l2_buffer_enqueue(avbuf);
++@@ -621,42 +1032,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
++ 
++ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
++ {
+++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+++    AVCodecContext *const avctx = s->avctx;
++     V4L2Buffer *avbuf;
+++    int rv;
++ 
++-    /*
++-     * timeout=-1 blocks until:
++-     *  1. decoded frame available
++-     *  2. an input buffer is ready to be dequeued
++-     */
++-    avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
++-    if (!avbuf) {
++-        if (ctx->done)
++-            return AVERROR_EOF;
++-
++-        return AVERROR(EAGAIN);
++-    }
+++    do {
+++        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
+++            return rv;
+++        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
+++            return rv;
+++    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
++ 
++-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
+++   return 0;
++ }
++ 
++-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
+++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
++ {
+++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+++    AVCodecContext *const avctx = s->avctx;
++     V4L2Buffer *avbuf;
+++    int rv;
++ 
++-    /*
++-     * blocks until:
++-     *  1. encoded packet available
++-     *  2. an input buffer ready to be dequeued
++-     */
++-    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
++-    if (!avbuf) {
++-        if (ctx->done)
++-            return AVERROR_EOF;
+++    do {
+++        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
+++            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
+++        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
+++            return rv;
+++    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
++ 
++-        return AVERROR(EAGAIN);
++-    }
++-
++-    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
+++    return 0;
++ }
++ 
++ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
++@@ -688,78 +1093,179 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
++ 
++ int ff_v4l2_context_set_format(V4L2Context* ctx)
++ {
++-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+++    int ret;
+++
+++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+++    if (ret != 0)
+++        return ret;
+++
+++    // Check returned size against min size and if smaller have another go
+++    // Only worry about plane[0] as this is meant to enforce limits for
+++    // encoded streams where we might know a bit more about the shape
+++    // than the driver
+++    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
+++        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
+++            return 0;
+++        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
+++    }
+++    else {
+++        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
+++            return 0;
+++        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
+++    }
+++
+++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
+++    return ret;
++ }
++ 
++ void ff_v4l2_context_release(V4L2Context* ctx)
++ {
++     int ret;
++ 
++-    if (!ctx->buffers)
+++    if (!ctx->bufrefs)
++         return;
++ 
++     ret = v4l2_release_buffers(ctx);
++     if (ret)
++         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
++ 
++-    av_freep(&ctx->buffers);
+++    av_freep(&ctx->bufrefs);
+++    av_buffer_unref(&ctx->frames_ref);
+++
+++    ff_mutex_destroy(&ctx->lock);
+++    pthread_cond_destroy(&ctx->cond);
++ }
++ 
++-int ff_v4l2_context_init(V4L2Context* ctx)
+++
+++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
++ {
++-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++     struct v4l2_requestbuffers req;
++-    int ret, i;
++-
++-    if (!v4l2_type_supported(ctx)) {
++-        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
++-        return AVERROR_PATCHWELCOME;
++-    }
+++    int ret;
+++    int i;
++ 
++-    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
++-    if (ret)
++-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
+++    av_assert0(ctx->bufrefs == NULL);
++ 
++     memset(&req, 0, sizeof(req));
++-    req.count = ctx->num_buffers;
++-    req.memory = V4L2_MEMORY_MMAP;
+++    req.count = req_buffers;
+++    req.memory = mem;
++     req.type = ctx->type;
++-    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
++-    if (ret < 0) {
++-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
++-        return AVERROR(errno);
+++    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
+++        if (errno != EINTR) {
+++            ret = AVERROR(errno);
+++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
+++            return ret;
+++        }
++     }
++ 
++     ctx->num_buffers = req.count;
++-    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
++-    if (!ctx->buffers) {
+++    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
+++    if (!ctx->bufrefs) {
++         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
++-        return AVERROR(ENOMEM);
+++        goto fail_release;
++     }
++ 
++-    for (i = 0; i < req.count; i++) {
++-        ctx->buffers[i].context = ctx;
++-        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
++-        if (ret < 0) {
+++    ctx->wl_master = ff_weak_link_new(ctx);
+++    if (!ctx->wl_master) {
+++        ret = AVERROR(ENOMEM);
+++        goto fail_release;
+++    }
+++
+++    for (i = 0; i < ctx->num_buffers; i++) {
+++        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
+++        if (ret) {
++             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
++-            goto error;
+++            goto fail_release;
++         }
++     }
++ 
++     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
++         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
++         req.count,
++-        v4l2_get_width(&ctx->format),
++-        v4l2_get_height(&ctx->format),
+++        ff_v4l2_get_format_width(&ctx->format),
+++        ff_v4l2_get_format_height(&ctx->format),
++         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
++         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
++ 
++     return 0;
++ 
++-error:
+++fail_release:
++     v4l2_release_buffers(ctx);
+++    av_freep(&ctx->bufrefs);
+++    return ret;
+++}
+++
+++int ff_v4l2_context_init(V4L2Context* ctx)
+++{
+++    struct v4l2_queryctrl qctrl;
+++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+++    int ret;
+++
+++    // It is not valid to reinit a context without a previous release
+++    av_assert0(ctx->bufrefs == NULL);
+++
+++    if (!v4l2_type_supported(ctx)) {
+++        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
+++        return AVERROR_PATCHWELCOME;
+++    }
+++
+++    ff_mutex_init(&ctx->lock, NULL);
+++    pthread_cond_init(&ctx->cond, NULL);
+++    atomic_init(&ctx->q_count, 0);
+++
+++    if (s->output_drm) {
+++        AVHWFramesContext *hwframes;
+++
+++        ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+++        if (!ctx->frames_ref) {
+++            ret = AVERROR(ENOMEM);
+++            goto fail_unlock;
+++        }
++ 
++-    av_freep(&ctx->buffers);
+++        hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
+++        hwframes->format = AV_PIX_FMT_DRM_PRIME;
+++        hwframes->sw_format = ctx->av_pix_fmt;
+++        hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width;
+++        hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height;
+++        ret = av_hwframe_ctx_init(ctx->frames_ref);
+++        if (ret < 0)
+++            goto fail_unref_hwframes;
+++    }
+++
+++    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
+++    if (ret) {
+++        ret = AVERROR(errno);
+++        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
+++        goto fail_unref_hwframes;
+++    }
+++
+++    memset(&qctrl, 0, sizeof(qctrl));
+++    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
+++    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
+++        ret = AVERROR(errno);
+++        if (ret != AVERROR(EINVAL)) {
+++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
+++            goto fail_unref_hwframes;
+++        }
+++        // Control unsupported - set default if wanted
+++        if (ctx->num_buffers < 2)
+++            ctx->num_buffers = 4;
+++    }
+++    else {
+++        if (ctx->num_buffers < 2)
+++            ctx->num_buffers = qctrl.minimum + 2;
+++        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
+++    }
+++
+++    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
+++    if (ret < 0)
+++        goto fail_unref_hwframes;
+++
+++    return 0;
++ 
+++fail_unref_hwframes:
+++    av_buffer_unref(&ctx->frames_ref);
+++fail_unlock:
+++    ff_mutex_destroy(&ctx->lock);
++     return ret;
++ }
++diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
++index 6f7460c89a..5afed3e6ec 100644
++--- a/libavcodec/v4l2_context.h
+++++ b/libavcodec/v4l2_context.h
++@@ -32,6 +32,8 @@
++ #include "libavutil/rational.h"
++ #include "codec_id.h"
++ #include "packet.h"
+++#include "libavutil/buffer.h"
+++#include "libavutil/thread.h"
++ #include "v4l2_buffers.h"
++ 
++ typedef struct V4L2Context {
++@@ -71,28 +73,57 @@ typedef struct V4L2Context {
++      */
++     int width, height;
++     AVRational sample_aspect_ratio;
+++    struct v4l2_rect selection;
++ 
++     /**
++-     * Indexed array of V4L2Buffers
+++     * If the default size of buffer is less than this then try to
+++     * set to this.
++      */
++-    V4L2Buffer *buffers;
+++    uint32_t min_buf_size;
+++
+++    /**
+++     * Indexed array of pointers to V4L2Buffers
+++     */
+++    AVBufferRef **bufrefs;
++ 
++     /**
++      * Readonly after init.
++      */
++     int num_buffers;
++ 
+++    /**
+++     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
+++     */
+++    enum v4l2_memory buf_mem;
+++
++     /**
++      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
++      */
++     int streamon;
++ 
+++    /* 1st buffer after stream on */
+++    int first_buf;
+++
++     /**
++      *  Either no more buffers available or an unrecoverable error was notified
++      *  by the V4L2 kernel driver: once set the context has to be exited.
++      */
++     int done;
++ 
+++    int flag_last;
+++
+++    /**
+++     * If NZ then when Qing frame/pkt use this rather than the
+++     * "real" PTS
+++     */
+++    uint64_t track_ts;
+++
+++    AVBufferRef *frames_ref;
+++    atomic_int q_count;
+++    struct ff_weak_link_master *wl_master;
+++
+++    AVMutex lock;
+++    pthread_cond_t cond;
++ } V4L2Context;
++ 
++ /**
++@@ -148,7 +179,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd);
++  * @param[inout] pkt The AVPacket to dequeue to.
++  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
++  */
++-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
+++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
++ 
++ /**
++  * Dequeues a buffer from a V4L2Context to an AVFrame.
++@@ -157,7 +188,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
++  * @param[in] ctx The V4L2Context to dequeue from.
++  * @param[inout] f The AVFrame to dequeue to.
++  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
+++ *
++  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+++ *                AVERROR(ENOSPC) if no buffer availible to put
+++ *                the frame in
++  */
++ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
++ 
++@@ -171,7 +205,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
++  * @param[in] pkt A pointer to an AVPacket.
++  * @return 0 in case of success, a negative error otherwise.
++  */
++-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
+++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
++ 
++ /**
++  * Enqueues a buffer to a V4L2Context from an AVFrame
++@@ -184,4 +218,28 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
++  */
++ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
++ 
+++/**
+++ * Dequeue all buffers on this queue
+++ *
+++ * Used to recycle output buffers
+++ *
+++ * @param[in] ctx The V4L2Context to dequeue from.
+++ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, 
+++ *       all others have a timeout of zero
+++ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
+++ *         of the first dequeue operation, 0 otherwise.
+++ */
+++int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
+++
+++/**
+++ * Returns the number of buffers currently queued
+++ *
+++ * @param[in] ctx The V4L2Context to evaluate
+++ */
+++static inline int
+++ff_v4l2_context_q_count(const V4L2Context* const ctx)
+++{
+++    return atomic_load(&ctx->q_count);
+++}
+++
++ #endif // AVCODEC_V4L2_CONTEXT_H
++diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
++index 602efb7a16..28d9ed4988 100644
++--- a/libavcodec/v4l2_m2m.c
+++++ b/libavcodec/v4l2_m2m.c
++@@ -34,6 +34,15 @@
++ #include "v4l2_context.h"
++ #include "v4l2_fmt.h"
++ #include "v4l2_m2m.h"
+++#include "v4l2_req_dmabufs.h"
+++
+++static void
+++xlat_init(xlat_track_t * const x)
+++{
+++    memset(x, 0, sizeof(*x));
+++    x->last_pts = AV_NOPTS_VALUE;
+++}
+++
++ 
++ static inline int v4l2_splane_video(struct v4l2_capability *cap)
++ {
++@@ -67,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
++ 
++     s->capture.done = s->output.done = 0;
++     s->capture.name = "capture";
+++    s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
++     s->output.name = "output";
+++    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
++     atomic_init(&s->refcount, 0);
++     sem_init(&s->refsync, 0, 0);
++ 
++@@ -84,18 +95,58 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
++     if (v4l2_mplane_video(&cap)) {
++         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
++         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+++        s->output.format.type = s->output.type;
++         return 0;
++     }
++ 
++     if (v4l2_splane_video(&cap)) {
++         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+++        s->output.format.type = s->output.type;
++         return 0;
++     }
++ 
++     return AVERROR(EINVAL);
++ }
++ 
+++static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
+++{
+++    struct v4l2_format fmt = {.type = s->output.type};
+++    int rv;
+++    uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
+++    unsigned int w;
+++    unsigned int h;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+++        fmt.fmt.pix_mp.pixelformat = pixfmt;
+++        fmt.fmt.pix_mp.width = avctx->width;
+++        fmt.fmt.pix_mp.height = avctx->height;
+++    }
+++    else {
+++        fmt.fmt.pix.pixelformat = pixfmt;
+++        fmt.fmt.pix.width = avctx->width;
+++        fmt.fmt.pix.height = avctx->height;
+++    }
+++
+++    rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
+++
+++    if (rv != 0) {
+++        rv = AVERROR(errno);
+++        av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
+++        return rv;
+++    }
+++
+++    w = ff_v4l2_get_format_width(&fmt);
+++    h = ff_v4l2_get_format_height(&fmt);
+++
+++    if (w < avctx->width || h < avctx->height) {
+++        av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    return 0;
+++}
+++
++ static int v4l2_probe_driver(V4L2m2mContext *s)
++ {
++     void *log_ctx = s->avctx;
++@@ -115,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s)
++         goto done;
++     }
++ 
+++    // If being given frames (encode) check that V4L2 can cope with the size
+++    if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
+++        (ret = check_size(s->avctx, s)) != 0)
+++        goto done;
+++
++     ret = ff_v4l2_context_get_format(&s->capture, 1);
++     if (ret) {
++         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
++@@ -216,13 +272,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
++         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
++ 
++     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
++-     *    we must wait for all references to be released before being allowed
++-     *    to queue new buffers.
++      */
++-    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
++-    if (atomic_load(&s->refcount))
++-        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
++-
++     ff_v4l2_context_release(&s->capture);
++ 
++     /* 3. get the new capture format */
++@@ -241,7 +291,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
++ 
++     /* 5. complete reinit */
++     s->draining = 0;
++-    s->reinit = 0;
++ 
++     return 0;
++ }
++@@ -258,6 +307,9 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
++     av_frame_unref(s->frame);
++     av_frame_free(&s->frame);
++     av_packet_unref(&s->buf_pkt);
+++    av_freep(&s->extdata_data);
+++
+++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
++ 
++     av_free(s);
++ }
++@@ -270,6 +322,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
++     if (!s)
++         return 0;
++ 
+++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
+++
+++    if (s->avctx && av_codec_is_decoder(s->avctx->codec))
+++        av_packet_unref(&s->buf_pkt);
+++
++     if (s->fd >= 0) {
++         ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
++         if (ret)
++@@ -282,7 +339,15 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
++ 
++     ff_v4l2_context_release(&s->output);
++ 
+++    dmabufs_ctl_unref(&s->db_ctl);
+++    close(s->fd);
+++    s->fd = -1;
+++
++     s->self_ref = NULL;
+++    // This is only called on avctx close so after this point we don't have that
+++    // Crash sooner if we find we are using it (can still log with avctx = NULL)
+++    s->avctx = NULL;
+++    priv->context = NULL;
++     av_buffer_unref(&priv->context_ref);
++ 
++     return 0;
++@@ -326,35 +391,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv)
++     return v4l2_configure_contexts(s);
++ }
++ 
++-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
+++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps)
++ {
++-    *s = av_mallocz(sizeof(V4L2m2mContext));
++-    if (!*s)
+++    V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext));
+++
+++    *pps = NULL;
+++    if (!s)
++         return AVERROR(ENOMEM);
++ 
++-    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
+++    priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s),
++                                          &v4l2_m2m_destroy_context, NULL, 0);
++     if (!priv->context_ref) {
++-        av_freep(s);
+++        av_free(s);
++         return AVERROR(ENOMEM);
++     }
++ 
++     /* assign the context */
++-    priv->context = *s;
++-    (*s)->priv = priv;
+++    priv->context = s;
+++    s->priv = priv;
++ 
++     /* populate it */
++-    priv->context->capture.num_buffers = priv->num_capture_buffers;
++-    priv->context->output.num_buffers  = priv->num_output_buffers;
++-    priv->context->self_ref = priv->context_ref;
++-    priv->context->fd = -1;
+++    s->capture.num_buffers = priv->num_capture_buffers;
+++    s->output.num_buffers  = priv->num_output_buffers;
+++    s->self_ref = priv->context_ref;
+++    s->fd = -1;
+++    xlat_init(&s->xlat);
++ 
++     priv->context->frame = av_frame_alloc();
++     if (!priv->context->frame) {
++         av_buffer_unref(&priv->context_ref);
++-        *s = NULL; /* freed when unreferencing context_ref */
++         return AVERROR(ENOMEM);
++     }
++ 
+++    *pps = s;
++     return 0;
++ }
++diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
++index 04d86d7b92..a506e69d67 100644
++--- a/libavcodec/v4l2_m2m.h
+++++ b/libavcodec/v4l2_m2m.h
++@@ -30,6 +30,7 @@
++ #include <linux/videodev2.h>
++ 
++ #include "libavcodec/avcodec.h"
+++#include "libavutil/pixfmt.h"
++ #include "v4l2_context.h"
++ 
++ #define container_of(ptr, type, member) ({ \
++@@ -40,6 +41,38 @@
++     { "num_output_buffers", "Number of buffers in the output context",\
++         OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
++ 
+++#define FF_V4L2_M2M_TRACK_SIZE 128
+++typedef struct V4L2m2mTrackEl {
+++    int     discard;   // If we see this buffer its been flushed, so discard
+++    int     pending;
+++    int     pkt_size;
+++    int64_t pts;
+++    int64_t dts;
+++    int64_t reordered_opaque;
+++    int64_t pkt_pos;
+++    int64_t pkt_duration;
+++    int64_t track_pts;
+++} V4L2m2mTrackEl;
+++
+++typedef struct pts_stats_s
+++{
+++    void * logctx;
+++    const char * name;  // For debug
+++    unsigned int last_count;
+++    unsigned int last_interval;
+++    int64_t last_pts;
+++    int64_t guess;
+++} pts_stats_t;
+++
+++typedef struct xlat_track_s {
+++    unsigned int track_no;
+++    int64_t last_pts;    // Last valid PTS decoded
+++    int64_t last_opaque;
+++    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
+++} xlat_track_t;
+++
+++struct dmabufs_ctl;
+++
++ typedef struct V4L2m2mContext {
++     char devname[PATH_MAX];
++     int fd;
++@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext {
++     AVCodecContext *avctx;
++     sem_t refsync;
++     atomic_uint refcount;
++-    int reinit;
++ 
++     /* null frame/packet received */
++     int draining;
+++    int running;
++     AVPacket buf_pkt;
++ 
++     /* Reference to a frame. Only used during encoding */
++@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext {
++ 
++     /* reference back to V4L2m2mPriv */
++     void *priv;
+++
+++    AVBufferRef *device_ref;
+++
+++    /* generate DRM frames */
+++    int output_drm;
+++
+++    /* input frames are drmprime */
+++    int input_drm;
+++
+++    /* Frame tracking */
+++    xlat_track_t xlat;
+++
+++    pts_stats_t pts_stat;
+++
+++    /* req pkt */
+++    int req_pkt;
+++    int reorder_size;
+++
+++    /* Ext data sent */
+++    int extdata_sent;
+++    /* Ext data sent in packet - overrides ctx */
+++    void * extdata_data;
+++    size_t extdata_size;
+++
+++#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
+++#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
+++    /* Quirks */
+++    unsigned int quirks;
+++
+++    struct dmabufs_ctl * db_ctl;
++ } V4L2m2mContext;
++ 
++ typedef struct V4L2m2mPriv {
++@@ -76,6 +139,8 @@ typedef struct V4L2m2mPriv {
++ 
++     int num_output_buffers;
++     int num_capture_buffers;
+++    const char * dmabuf_alloc;
+++    enum AVPixelFormat pix_fmt;
++ } V4L2m2mPriv;
++ 
++ /**
++@@ -129,4 +194,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
++  */
++ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
++ 
+++
+++static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+++}
+++
+++static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+++}
+++
+++static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
+++}
+++
+++static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
+++{
+++    return ctx->flag_last;
+++}
+++
+++
++ #endif /* AVCODEC_V4L2_M2M_H */
++diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
++index 4944d08511..11c83b2d66 100644
++--- a/libavcodec/v4l2_m2m_dec.c
+++++ b/libavcodec/v4l2_m2m_dec.c
++@@ -21,8 +21,14 @@
++  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++  */
++ 
+++#include "config_components.h"
+++
++ #include <linux/videodev2.h>
++ #include <sys/ioctl.h>
+++
+++#include "libavutil/avassert.h"
+++#include "libavutil/hwcontext.h"
+++#include "libavutil/hwcontext_drm.h"
++ #include "libavutil/pixfmt.h"
++ #include "libavutil/pixdesc.h"
++ #include "libavutil/opt.h"
++@@ -30,75 +36,279 @@
++ #include "codec_internal.h"
++ #include "libavcodec/decode.h"
++ 
+++#include "libavcodec/hwaccels.h"
+++#include "libavcodec/internal.h"
+++#include "libavcodec/hwconfig.h"
+++
++ #include "v4l2_context.h"
++ #include "v4l2_m2m.h"
++ #include "v4l2_fmt.h"
+++#include "v4l2_req_dmabufs.h"
++ 
++-static int v4l2_try_start(AVCodecContext *avctx)
+++#if CONFIG_H264_DECODER
+++#include "h264_parse.h"
+++#endif
+++#if CONFIG_HEVC_DECODER
+++#include "hevc_parse.h"
+++#endif
+++
+++// Pick 64 for max last count - that is >1sec at 60fps
+++#define STATS_LAST_COUNT_MAX 64
+++#define STATS_INTERVAL_MAX (1 << 30)
+++
+++#ifndef FF_API_BUFFER_SIZE_T
+++#define FF_API_BUFFER_SIZE_T 1
+++#endif
+++
+++#define DUMP_FAILED_EXTRADATA 0
+++
+++#if DUMP_FAILED_EXTRADATA
+++static inline char hex1(unsigned int x)
++ {
++-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++-    V4L2Context *const capture = &s->capture;
++-    V4L2Context *const output = &s->output;
++-    struct v4l2_selection selection = { 0 };
++-    int ret;
+++    x &= 0xf;
+++    return x <= 9 ? '0' + x : 'a' + x - 10;
+++}
++ 
++-    /* 1. start the output process */
++-    if (!output->streamon) {
++-        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
++-        if (ret < 0) {
++-            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
++-            return ret;
++-        }
+++static inline char * hex2(char * s, unsigned int x)
+++{
+++    *s++ = hex1(x >> 4);
+++    *s++ = hex1(x);
+++    return s;
+++}
+++
+++static inline char * hex4(char * s, unsigned int x)
+++{
+++    s = hex2(s, x >> 8);
+++    s = hex2(s, x);
+++    return s;
+++}
+++
+++static inline char * dash2(char * s)
+++{
+++    *s++ = '-';
+++    *s++ = '-';
+++    return s;
+++}
+++
+++static void
+++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
+++{
+++    size_t i;
+++    s = hex4(s, offset);
+++    m += offset;
+++    for (i = 0; i != 8; ++i) {
+++        *s++ = ' ';
+++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
++     }
+++    *s++ = ' ';
+++    *s++ = ':';
+++    for (; i != 16; ++i) {
+++        *s++ = ' ';
+++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
+++    }
+++    *s++ = 0;
+++}
++ 
++-    if (capture->streamon)
++-        return 0;
+++static void
+++log_dump(void * logctx, int lvl, const void * const data, const size_t len)
+++{
+++    size_t i;
+++    for (i = 0; i < len; i += 16) {
+++        char buf[80];
+++        data16(buf, i, data, len);
+++        av_log(logctx, lvl, "%s\n", buf);
+++    }
+++}
+++#endif
++ 
++-    /* 2. get the capture format */
++-    capture->format.type = capture->type;
++-    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
++-    if (ret) {
++-        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
++-        return ret;
+++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
+++{
+++    return stats->last_interval;
+++}
+++
+++static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
+++{
+++    if (stats->last_count <= 1)
+++        return stats->last_pts;
+++    if (stats->last_pts == AV_NOPTS_VALUE ||
+++            fail_bad_guess && (stats->last_interval == 0 ||
+++                               stats->last_count >= STATS_LAST_COUNT_MAX))
+++        return AV_NOPTS_VALUE;
+++    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
+++}
+++
+++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
+++{
+++    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
+++        if (stats->last_count < STATS_LAST_COUNT_MAX)
+++            ++stats->last_count;
+++        return;
++     }
++ 
++-    /* 2.1 update the AVCodecContext */
++-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
++-    capture->av_pix_fmt = avctx->pix_fmt;
+++    if (stats->last_pts != AV_NOPTS_VALUE) {
+++        const int64_t interval = pts - stats->last_pts;
++ 
++-    /* 3. set the crop parameters */
++-    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++-    selection.r.height = avctx->coded_height;
++-    selection.r.width = avctx->coded_width;
++-    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
++-    if (!ret) {
++-        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
++-        if (ret) {
++-            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
++-        } else {
++-            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
++-            /* update the size of the resulting frame */
++-            capture->height = selection.r.height;
++-            capture->width  = selection.r.width;
+++        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
+++            stats->last_count >= STATS_LAST_COUNT_MAX) {
+++            if (stats->last_interval != 0)
+++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
+++                       __func__, stats->name, interval, stats->last_count);
+++            stats->last_interval = 0;
+++        }
+++        else {
+++            const int64_t frame_time = interval / (int64_t)stats->last_count;
+++
+++            if (frame_time != stats->last_interval)
+++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
+++                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
+++            stats->last_interval = frame_time;
++         }
++     }
++ 
++-    /* 4. init the capture context now that we have the capture format */
++-    if (!capture->buffers) {
++-        ret = ff_v4l2_context_init(capture);
++-        if (ret) {
++-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
++-            return AVERROR(ENOMEM);
+++    stats->last_pts = pts;
+++    stats->last_count = 1;
+++}
+++
+++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
+++{
+++    *stats = (pts_stats_t){
+++        .logctx = logctx,
+++        .name = name,
+++        .last_count = 1,
+++        .last_interval = 0,
+++        .last_pts = AV_NOPTS_VALUE
+++    };
+++}
+++
+++// If abdata == NULL then this just counts space required
+++// Unpacks avcC if detected
+++static int
+++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
+++{
+++    const uint8_t * const xdend = extradata + extrasize;
+++    const uint8_t * p = extradata;
+++    uint8_t * d = abdata;
+++    unsigned int n;
+++    unsigned int len;
+++    const unsigned int hdrlen = 4;
+++    unsigned int need_pps = 1;
+++
+++    if (extrasize < 8)
+++        return AVERROR(EINVAL);
+++
+++    if (p[0] == 0 && p[1] == 0) {
+++        // Assume a couple of leading zeros are good enough to indicate NAL
+++        if (abdata)
+++            memcpy(d, p, extrasize);
+++        return extrasize;
+++    }
+++
+++    // avcC starts with a 1
+++    if (p[0] != 1)
+++        return AVERROR(EINVAL);
+++
+++    p += 5;
+++    n = *p++ & 0x1f;
+++
+++doxps:
+++    while (n--) {
+++        if (xdend - p < 2)
+++            return AVERROR(EINVAL);
+++        len = (p[0] << 8) | p[1];
+++        p += 2;
+++        if (xdend - p < (ptrdiff_t)len)
+++            return AVERROR(EINVAL);
+++        if (abdata) {
+++            d[0] = 0;
+++            d[1] = 0;
+++            d[2] = 0;
+++            d[3] = 1;
+++            memcpy(d + 4, p, len);
++         }
+++        d += len + hdrlen;
+++        p += len;
+++    }
+++    if (need_pps) {
+++        need_pps = 0;
+++        if (p >= xdend)
+++            return AVERROR(EINVAL);
+++        n = *p++;
+++        goto doxps;
++     }
++ 
++-    /* 5. start the capture process */
++-    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
++-    if (ret) {
++-        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
+++    return d - abdata;
+++}
+++
+++static int
+++copy_extradata(AVCodecContext * const avctx,
+++               const void * const src_data, const int src_len,
+++               void ** const pdst_data, size_t * const pdst_len)
+++{
+++    int len;
+++
+++    *pdst_len = 0;
+++    av_freep(pdst_data);
+++
+++    if (avctx->codec_id == AV_CODEC_ID_H264)
+++        len = h264_xd_copy(src_data, src_len, NULL);
+++    else
+++        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
+++
+++    // Zero length is OK but we want to stop - -ve is error val
+++    if (len <= 0)
+++        return len;
+++
+++    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
+++        return AVERROR(ENOMEM);
+++
+++    if (avctx->codec_id == AV_CODEC_ID_H264)
+++        h264_xd_copy(src_data, src_len, *pdst_data);
+++    else
+++        memcpy(*pdst_data, src_data, len);
+++    *pdst_len = len;
+++
+++    return 0;
+++}
+++
+++
+++
+++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
+++{
+++    int ret;
+++    struct v4l2_decoder_cmd cmd = {
+++        .cmd = V4L2_DEC_CMD_START,
+++        .flags = 0,
+++    };
+++
+++    if (s->output.streamon)
+++        return 0;
+++
+++    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
+++    if (ret != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
++         return ret;
++     }
++ 
+++    // STREAMON should do implicit START so this just for those that don't.
+++    // It is optional so don't worry if it fails
+++    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
+++        ret = AVERROR(errno);
+++        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
+++    }
+++    else {
+++        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
+++    }
+++    return 0;
+++}
+++
+++static int v4l2_try_start(AVCodecContext *avctx)
+++{
+++    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+++    int ret;
+++
+++    /* 1. start the output process */
+++    if ((ret = check_output_streamon(avctx, s)) != 0)
+++        return ret;
++     return 0;
++ }
++ 
++@@ -133,62 +343,760 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
++     return 0;
++ }
++ 
++-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+++static void
+++set_best_effort_pts(AVCodecContext *const avctx,
+++             pts_stats_t * const ps,
+++             AVFrame *const frame)
+++{
+++    pts_stats_add(ps, frame->pts);
+++
+++    frame->best_effort_timestamp = pts_stats_guess(ps, 1);
+++    // If we can't guess from just PTS - try DTS
+++    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
+++        frame->best_effort_timestamp = frame->pkt_dts;
+++
+++    // We can't emulate what s/w does in a useful manner and using the
+++    // "correct" answer seems to just confuse things.
+++    frame->pkt_dts               = frame->pts;
+++    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
+++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
+++}
+++
+++static void
+++xlat_flush(xlat_track_t * const x)
+++{
+++    unsigned int i;
+++    // Do not reset track_no - this ensures that any frames left in the decoder
+++    // that turn up later get discarded.
+++
+++    x->last_pts = AV_NOPTS_VALUE;
+++    x->last_opaque = 0;
+++    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
+++        x->track_els[i].pending = 0;
+++        x->track_els[i].discard = 1;
+++    }
+++}
+++
+++static void
+++xlat_init(xlat_track_t * const x)
+++{
+++    memset(x, 0, sizeof(*x));
+++    xlat_flush(x);
+++}
+++
+++static int
+++xlat_pending(const V4L2m2mContext * const s)
+++{
+++    const xlat_track_t *const x = &s->xlat;
+++    unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
+++    int i;
+++    const int64_t now = pts_stats_guess(&s->pts_stat, 0);
+++    int64_t first_dts = AV_NOPTS_VALUE;
+++    int no_dts_count = 0;
+++    unsigned int interval = pts_stats_interval(&s->pts_stat);
+++
+++    for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
+++        const V4L2m2mTrackEl * const t = x->track_els + n;
+++
+++        if (first_dts == AV_NOPTS_VALUE)
+++            if (t->dts == AV_NOPTS_VALUE)
+++                ++no_dts_count;
+++            else
+++                first_dts = t->dts;
+++
+++        // Discard only set on never-set or flushed entries
+++        // So if we get here we've never successfully decoded a frame so allow
+++        // more frames into the buffer before stalling
+++        if (t->discard)
+++            return i - 16;
+++
+++        // If we've got this frame out then everything before this point
+++        // must have entered the decoder
+++        if (!t->pending)
+++            break;
+++
+++        // If we've never seen a pts all we can do is count frames
+++        if (now == AV_NOPTS_VALUE)
+++            continue;
+++
+++        if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
+++            break;
+++    }
+++
+++    if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
+++        const int iframes = (first_dts - now) / (int)interval;
+++        const int t = iframes - s->reorder_size + no_dts_count;
+++
+++//        av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
+++//               x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
+++
+++        if (iframes > 0 && iframes < 64 && t < i) {
+++            return t;
+++        }
+++    }
+++
+++    return i;
+++}
+++
+++static inline int stream_started(const V4L2m2mContext * const s) {
+++    return s->output.streamon;
+++}
+++
+++#define NQ_OK        0
+++#define NQ_Q_FULL    1
+++#define NQ_SRC_EMPTY 2
+++#define NQ_NONE      3
+++#define NQ_DRAINING  4
+++#define NQ_DEAD      5
+++
+++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
+++#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
+++
+++// do_not_get      If true then no new packet will be got but status will
+++//                  be set appropriately
+++
+++// AVERROR_EOF     Flushing an already flushed stream
+++// -ve             Error (all errors except EOF are unexpected)
+++// NQ_OK (0)       OK
+++// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
+++// NQ_SRC_EMPTY    Src empty (do not retry)
+++// NQ_NONE         Enqueue not attempted
+++// NQ_DRAINING     At EOS, dQ dest until EOS there too
+++// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
+++
+++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
++ {
++-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++-    V4L2Context *const capture = &s->capture;
++-    V4L2Context *const output = &s->output;
++     int ret;
++ 
++-    if (!s->buf_pkt.size) {
++-        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
+++    // If we don't already have a coded packet - get a new one
+++    // We will already have a coded pkt if the output Q was full last time we
+++    // tried to Q it
+++    if (!s->buf_pkt.size && !do_not_get) {
+++        unsigned int i;
+++
+++        for (i = 0; i < 256; ++i) {
+++            uint8_t * side_data;
+++            size_t side_size;
+++
+++            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
+++            if (ret != 0)
+++                break;
+++
+++            // New extradata is the only side-data we undertand
+++            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+++            if (side_data) {
+++                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
+++                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
+++                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
+++                s->extdata_sent = 0;
+++            }
+++
+++            if (s->buf_pkt.size != 0)
+++                break;
+++
+++            if (s->buf_pkt.side_data_elems == 0) {
+++                av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
+++                ret = AVERROR_EOF;
+++                break;
+++            }
+++
+++            // Retry a side-data only pkt
+++        }
+++        // If i >= 256 something has gone wrong
+++        if (i >= 256) {
+++            av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
+++            return AVERROR(EIO);
+++        }
+++
+++        if (ret == AVERROR(EAGAIN)) {
+++            if (!stream_started(s)) {
+++                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
+++                return NQ_DEAD;
+++            }
+++            return NQ_SRC_EMPTY;
+++        }
+++
+++        if (ret == AVERROR_EOF) {
+++            // EOF - enter drain mode
+++            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
+++                   ret, s->buf_pkt.size, stream_started(s), s->draining);
+++            if (!stream_started(s)) {
+++                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
+++                s->draining = 1;
+++                s->capture.done = 1;
+++                return AVERROR_EOF;
+++            }
+++
+++            if (!s->draining) {
+++                // Calling enqueue with an empty pkt starts drain
+++                av_assert0(s->buf_pkt.size == 0);
+++                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
+++                if (ret) {
+++                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
+++                    return ret;
+++                }
+++            }
+++            return NQ_DRAINING;
+++        }
+++
++         if (ret < 0) {
++-            if (ret == AVERROR(EAGAIN))
++-                return ff_v4l2_context_dequeue_frame(capture, frame, 0);
++-            else if (ret != AVERROR_EOF)
++-                return ret;
+++            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
+++            return ret;
++         }
++     }
++ 
++-    if (s->draining)
++-        goto dequeue;
+++    if (s->draining) {
+++        if (s->buf_pkt.size) {
+++            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
+++            av_packet_unref(&s->buf_pkt);
+++        }
+++        return NQ_DRAINING;
+++    }
+++
+++    if (!s->buf_pkt.size)
+++        return NQ_NONE;
++ 
++-    ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
++-    if (ret < 0 && ret != AVERROR(EAGAIN))
++-        goto fail;
+++    if ((ret = check_output_streamon(avctx, s)) != 0)
+++        return ret;
++ 
++-    /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
++-    if (ret != AVERROR(EAGAIN))
+++    if (s->extdata_sent)
+++        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
+++    else
+++        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
+++
+++    if (ret == AVERROR(EAGAIN)) {
+++        // Out of input buffers - keep packet
+++        ret = NQ_Q_FULL;
+++    }
+++    else {
+++        // In all other cases we are done with this packet
++         av_packet_unref(&s->buf_pkt);
+++        s->extdata_sent = 1;
++ 
++-    if (!s->draining) {
++-        ret = v4l2_try_start(avctx);
++         if (ret) {
++-            /* cant recover */
++-            if (ret != AVERROR(ENOMEM))
++-                ret = 0;
++-            goto fail;
+++            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
+++            return ret;
++         }
++     }
++ 
++-dequeue:
++-    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
++-fail:
++-    av_packet_unref(&s->buf_pkt);
+++    // Start if we haven't
+++    {
+++        const int ret2 = v4l2_try_start(avctx);
+++        if (ret2) {
+++            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
+++            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
+++        }
+++    }
+++
+++    return ret;
+++}
+++
+++static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
+++{
+++    int rv = 0;
+++
+++    ff_mutex_lock(&ctx->lock);
+++
+++    while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
+++        if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
+++            rv = AVERROR(errno);
+++            av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
+++            break;
+++        }
+++    }
+++
+++    ff_mutex_unlock(&ctx->lock);
+++    return rv;
+++}
+++
+++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+++{
+++    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+++    int src_rv = -1;
+++    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
+++    unsigned int i = 0;
+++
+++    do {
+++        const int pending = xlat_pending(s);
+++        const int prefer_dq = (pending > 4);
+++        const int last_src_rv = src_rv;
+++
+++        av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
+++
+++        // Enqueue another pkt for decode if
+++        // (a) We don't have a lot of stuff in the buffer already OR
+++        // (b) ... we (think we) do but we've failed to get a frame already OR
+++        // (c) We've dequeued a lot of frames without asking for input
+++        src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
+++
+++        // If we got a frame last time or we've already tried to get a frame and
+++        // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
+++        // indicating that we want more input.
+++        // This should mean that once decode starts we enter a stable state where
+++        // we alternately ask for input and produce output
+++        if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
+++            break;
+++
+++        if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
+++            av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
+++            break;
+++        }
+++
+++        // Try to get a new frame if
+++        // (a) we haven't already got one AND
+++        // (b) enqueue returned a status indicating that decode should be attempted
+++        if (dst_rv != 0 && TRY_DQ(src_rv)) {
+++            // Pick a timeout depending on state
+++            // The pending count isn't completely reliable so it is good enough
+++            // hint that we want a frame but not good enough to require it in
+++            // all cases; however if it has got > 31 that exceeds its margin of
+++            // error so require a frame to prevent ridiculous levels of latency
+++            const int t =
+++                src_rv == NQ_Q_FULL ? -1 :
+++                src_rv == NQ_DRAINING ? 300 :
+++                prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
+++
+++            // Dequeue frame will unref any previous contents of frame
+++            // if it returns success so we don't need an explicit unref
+++            // when discarding
+++            // This returns AVERROR(EAGAIN) on timeout or if
+++            // there is room in the input Q and timeout == -1
+++            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
+++
+++            // Failure due to no buffer in Q?
+++            if (dst_rv == AVERROR(ENOSPC)) {
+++                // Wait & retry
+++                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
+++                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
+++                }
+++            }
+++
+++            if (dst_rv == 0) {
+++                set_best_effort_pts(avctx, &s->pts_stat, frame);
+++                if (!s->running) {
+++                    s->running = 1;
+++                    av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
+++                }
+++            }
+++
+++            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
+++                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
+++                dst_rv = AVERROR_EOF;
+++                s->capture.done = 1;
+++            }
+++            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
+++                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
+++                       s->draining, s->capture.done);
+++            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
+++                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
+++                       s->draining, s->capture.done, dst_rv);
+++        }
+++
+++        ++i;
+++        if (i >= 256) {
+++            av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
+++            src_rv = AVERROR(EIO);
+++        }
+++
+++        // Continue trying to enqueue packets if either
+++        // (a) we succeeded last time OR
+++        // (b) we didn't ret a frame and we can retry the input
+++    } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
+++
+++    // Ensure that the frame contains nothing if we aren't returning a frame
+++    // (might happen when discarding)
+++    if (dst_rv)
+++        av_frame_unref(frame);
+++
+++    // If we got a frame this time ask for a pkt next time
+++    s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
+++
+++#if 0
+++    if (dst_rv == 0)
+++    {
+++        static int z = 0;
+++        if (++z > 50) {
+++            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
+++            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+++            return -1;
+++        }
+++    }
+++#endif
+++
+++    return dst_rv == 0 ? 0 :
+++        src_rv < 0 ? src_rv :
+++        dst_rv < 0 ? dst_rv :
+++            AVERROR(EAGAIN);
+++}
+++
+++#if 0
+++#include <time.h>
+++static int64_t us_time(void)
+++{
+++    struct timespec ts;
+++    clock_gettime(CLOCK_MONOTONIC, &ts);
+++    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+++}
+++
+++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+++{
+++    int ret;
+++    const int64_t now = us_time();
+++    int64_t done;
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+++    ret = v4l2_receive_frame2(avctx, frame);
+++    done = us_time();
+++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
++     return ret;
++ }
+++#endif
+++
+++static uint32_t
+++avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
+++{
+++    switch (codec_id) {
+++        case AV_CODEC_ID_H264:
+++            switch (avprofile) {
+++                case FF_PROFILE_H264_BASELINE:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
+++                case FF_PROFILE_H264_CONSTRAINED_BASELINE:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
+++                case FF_PROFILE_H264_MAIN:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
+++                case FF_PROFILE_H264_EXTENDED:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
+++                case FF_PROFILE_H264_HIGH:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
+++                case FF_PROFILE_H264_HIGH_10:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
+++                case FF_PROFILE_H264_HIGH_10_INTRA:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
+++                case FF_PROFILE_H264_MULTIVIEW_HIGH:
+++                case FF_PROFILE_H264_HIGH_422:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
+++                case FF_PROFILE_H264_HIGH_422_INTRA:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
+++                case FF_PROFILE_H264_STEREO_HIGH:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
+++                case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
+++                case FF_PROFILE_H264_HIGH_444_INTRA:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
+++                case FF_PROFILE_H264_CAVLC_444:
+++                    return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
+++                case FF_PROFILE_H264_HIGH_444:
+++                default:
+++                    break;
+++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
+++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
+++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
+++//                    V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
+++//                    V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
+++            }
+++            break;
+++        case AV_CODEC_ID_MPEG2VIDEO:
+++        case AV_CODEC_ID_MPEG4:
+++        case AV_CODEC_ID_VC1:
+++        case AV_CODEC_ID_VP8:
+++        case AV_CODEC_ID_VP9:
+++        case AV_CODEC_ID_AV1:
+++            // Most profiles are a simple number that matches the V4L2 enum
+++            return avprofile;
+++        default:
+++            break;
+++    }
+++    return ~(uint32_t)0;
+++}
+++
+++// This check mirrors Chrome's profile check by testing to see if the profile
+++// exists as a possible value for the V4L2 profile control
+++static int
+++check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
+++{
+++    struct v4l2_queryctrl query_ctrl;
+++    struct v4l2_querymenu query_menu;
+++    uint32_t profile_id;
+++
+++    // An unset profile is almost certainly zero or -99 - do not reject
+++    if (avctx->profile <= 0) {
+++        av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
+++        return 0;
+++    }
+++
+++    memset(&query_ctrl, 0, sizeof(query_ctrl));
+++    switch (avctx->codec_id) {
+++        case AV_CODEC_ID_MPEG2VIDEO:
+++            profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
+++            break;
+++        case AV_CODEC_ID_MPEG4:
+++            profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
+++            break;
+++        case AV_CODEC_ID_H264:
+++            profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
+++            break;
+++        case AV_CODEC_ID_VP8:
+++            profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
+++            break;
+++        case AV_CODEC_ID_VP9:
+++            profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
+++            break;
+++#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
+++        case AV_CODEC_ID_AV1:
+++            profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
+++            break;
+++#endif
+++        default:
+++            av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
+++            return 0;
+++    }
+++
+++    query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
+++    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
+++        av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
+++    }
+++    else {
+++        av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
+++
+++        query_menu = (struct v4l2_querymenu){
+++            .id = query_ctrl.id,
+++            .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
+++        };
+++
+++        if (query_menu.index > query_ctrl.maximum ||
+++            query_menu.index < query_ctrl.minimum ||
+++            ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
+++            return AVERROR(ENOENT);
+++        }
+++    }
+++
+++    return 0;
+++};
+++
+++static int
+++check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
+++{
+++    unsigned int i;
+++    const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format);
+++    const uint32_t w = avctx->coded_width;
+++    const uint32_t h = avctx->coded_height;
+++
+++    if (w == 0 || h == 0 || fcc == 0) {
+++        av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
+++        return 0;
+++    }
+++    if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
+++        av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
+++        return 0;
+++    }
+++
+++    for (i = 0;; ++i) {
+++        struct v4l2_frmsizeenum fs = {
+++            .index = i,
+++            .pixel_format = fcc,
+++        };
+++
+++        while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
+++            const int err = AVERROR(errno);
+++            if (err == AVERROR(EINTR))
+++                continue;
+++            if (i == 0 && err == AVERROR(ENOTTY)) {
+++                av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
+++                return 0;
+++            }
+++            if (err != AVERROR(EINVAL)) {
+++                av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
+++                return err;
+++            }
+++            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
+++                   w, h, av_fourcc2str(fcc), i);
+++            return err;
+++        }
+++
+++        switch (fs.type) {
+++            case V4L2_FRMSIZE_TYPE_DISCRETE:
+++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
+++                       fs.discrete.width,fs.discrete.height);
+++                if (w == fs.discrete.width && h == fs.discrete.height)
+++                    return 0;
+++                break;
+++            case V4L2_FRMSIZE_TYPE_STEPWISE:
+++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
+++                       fs.stepwise.min_width, fs.stepwise.min_height,
+++                       fs.stepwise.max_width, fs.stepwise.max_height,
+++                       fs.stepwise.step_width,fs.stepwise.step_height);
+++                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
+++                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
+++                    (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
+++                    (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
+++                    return 0;
+++                break;
+++            case V4L2_FRMSIZE_TYPE_CONTINUOUS:
+++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
+++                       fs.stepwise.min_width, fs.stepwise.min_height,
+++                       fs.stepwise.max_width, fs.stepwise.max_height,
+++                       fs.stepwise.step_width,fs.stepwise.step_height);
+++                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
+++                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
+++                    return 0;
+++                break;
+++            default:
+++                av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
+++                return AVERROR(EINVAL);
+++        }
+++    }
+++}
+++
+++static int
+++get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
+++{
+++    struct v4l2_capability cap;
+++
+++    memset(&cap, 0, sizeof(cap));
+++    while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
+++        int err = errno;
+++        if (err == EINTR)
+++            continue;
+++        av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
+++        return AVERROR(err);
+++    }
+++
+++    // Could be made table driven if we have a few more but right now there
+++    // seems no point
+++
+++    // Meson (amlogic) always gives a resolution changed event after output
+++    // streamon and userspace must (re)allocate capture buffers and streamon
+++    // capture to clear the event even if the capture buffers were the right
+++    // size in the first place.
+++    if (strcmp(cap.driver, "meson-vdec") == 0)
+++        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
+++
+++    av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
+++    return 0;
+++}
+++
+++// This heuristic is for H264 but use for everything
+++static uint32_t max_coded_size(const AVCodecContext * const avctx)
+++{
+++    uint32_t wxh = avctx->coded_width * avctx->coded_height;
+++    uint32_t size;
+++
+++    size = wxh * 3 / 2;
+++    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
+++    // unfortunately that doesn't yield an actually useful limit
+++    // and it should be noted that frame 0 is special cased to allow
+++    // a bigger number which really isn't helpful for us. So just pick
+++    // frame_size / 2
+++    size /= 2;
+++    // Add 64k to allow for any overheads and/or encoder hopefulness
+++    // with small WxH
+++    return size + (1 << 16);
+++}
+++
+++static void
+++parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
+++{
+++    s->reorder_size = 0;
+++
+++    if (!avctx->extradata || !avctx->extradata_size)
+++        return;
+++
+++    switch (avctx->codec_id) {
+++#if CONFIG_H264_DECODER
+++        case AV_CODEC_ID_H264:
+++        {
+++            H264ParamSets ps = {{NULL}};
+++            int is_avc = 0;
+++            int nal_length_size = 0;
+++            int ret;
+++
+++            ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
+++                                           &ps, &is_avc, &nal_length_size,
+++                                           avctx->err_recognition, avctx);
+++            if (ret > 0) {
+++                const SPS * sps = NULL;
+++                unsigned int i;
+++                for (i = 0; i != MAX_SPS_COUNT; ++i) {
+++                    if (ps.sps_list[i]) {
+++                        sps = (const SPS *)ps.sps_list[i]->data;
+++                        break;
+++                    }
+++                }
+++                if (sps) {
+++                    avctx->profile = ff_h264_get_profile(sps);
+++                    avctx->level = sps->level_idc;
+++                    s->reorder_size = sps->num_reorder_frames;
+++                }
+++            }
+++            ff_h264_ps_uninit(&ps);
+++            break;
+++        }
+++#endif
+++#if CONFIG_HEVC_DECODER
+++        case AV_CODEC_ID_HEVC:
+++        {
+++            HEVCParamSets ps = {{NULL}};
+++            HEVCSEI sei = {{{{0}}}};
+++            int is_nalff = 0;
+++            int nal_length_size = 0;
+++            int ret;
+++
+++            ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
+++                                           &ps, &sei, &is_nalff, &nal_length_size,
+++                                           avctx->err_recognition, 0, avctx);
+++            if (ret > 0) {
+++                const HEVCSPS * sps = NULL;
+++                unsigned int i;
+++                for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
+++                    if (ps.sps_list[i]) {
+++                        sps = (const HEVCSPS *)ps.sps_list[i]->data;
+++                        break;
+++                    }
+++                }
+++                if (sps) {
+++                    avctx->profile = sps->ptl.general_ptl.profile_idc;
+++                    avctx->level   = sps->ptl.general_ptl.level_idc;
+++                    s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
+++                }
+++            }
+++            ff_hevc_ps_uninit(&ps);
+++            ff_hevc_reset_sei(&sei);
+++            break;
+++        }
+++#endif
+++        default:
+++            break;
+++    }
+++}
++ 
++ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
++ {
++     V4L2Context *capture, *output;
++     V4L2m2mContext *s;
++     V4L2m2mPriv *priv = avctx->priv_data;
+++    int gf_pix_fmt;
++     int ret;
++ 
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+++
+++    if (avctx->codec_id == AV_CODEC_ID_H264) {
+++        if (avctx->ticks_per_frame == 1) {
+++            if(avctx->time_base.den < INT_MAX/2) {
+++                avctx->time_base.den *= 2;
+++            } else
+++                avctx->time_base.num /= 2;
+++        }
+++        avctx->ticks_per_frame = 2;
+++    }
+++
++     ret = ff_v4l2_m2m_create_context(priv, &s);
++     if (ret < 0)
++         return ret;
++ 
+++    parse_extradata(avctx, s);
+++
+++    xlat_init(&s->xlat);
+++    pts_stats_init(&s->pts_stat, avctx, "decoder");
+++
++     capture = &s->capture;
++     output = &s->output;
++ 
++@@ -196,14 +1104,65 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
++      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
++      * the proper values will be retrieved from the kernel driver.
++      */
++-    output->height = capture->height = avctx->coded_height;
++-    output->width = capture->width = avctx->coded_width;
+++//    output->height = capture->height = avctx->coded_height;
+++//    output->width = capture->width = avctx->coded_width;
+++    output->height = capture->height = 0;
+++    output->width = capture->width = 0;
++ 
++     output->av_codec_id = avctx->codec_id;
++     output->av_pix_fmt  = AV_PIX_FMT_NONE;
+++    output->min_buf_size = max_coded_size(avctx);
++ 
++     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
++     capture->av_pix_fmt = avctx->pix_fmt;
+++    capture->min_buf_size = 0;
+++
+++    /* the client requests the codec to generate DRM frames:
+++     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
+++     *       check the ff_v4l2_buffer_to_avframe conversion function.
+++     *   - the DRM frame format is passed in the DRM frame descriptor layer.
+++     *       check the v4l2_get_drm_frame function.
+++     */
+++
+++    avctx->sw_pix_fmt = avctx->pix_fmt;
+++    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
+++    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
+++           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
+++           avctx->coded_width, avctx->coded_height,
+++           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
+++
+++    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
+++        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
+++        s->output_drm = 1;
+++    }
+++    else {
+++        capture->av_pix_fmt = gf_pix_fmt;
+++        s->output_drm = 0;
+++    }
+++
+++    s->db_ctl = NULL;
+++    if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
+++        if (strcmp(priv->dmabuf_alloc, "cma") == 0)
+++            s->db_ctl = dmabufs_ctl_new();
+++        else {
+++            av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
+++            return AVERROR(EINVAL);
+++        }
+++        if (!s->db_ctl) {
+++            av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
+++            return AVERROR(ENOMEM);
+++        }
+++    }
+++
+++    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
+++    if (!s->device_ref) {
+++        ret = AVERROR(ENOMEM);
+++        return ret;
+++    }
+++
+++    ret = av_hwdevice_ctx_init(s->device_ref);
+++    if (ret < 0)
+++        return ret;
++ 
++     s->avctx = avctx;
++     ret = ff_v4l2_m2m_codec_init(priv);
++@@ -212,12 +1171,88 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
++         return ret;
++     }
++ 
++-    return v4l2_prepare_decoder(s);
+++    if (avctx->extradata &&
+++        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
+++#if DUMP_FAILED_EXTRADATA
+++        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
+++#endif
+++        return ret;
+++    }
+++
+++    if ((ret = v4l2_prepare_decoder(s)) < 0)
+++        return ret;
+++
+++    if ((ret = get_quirks(avctx, s)) != 0)
+++        return ret;
+++
+++    if ((ret = check_size(avctx, s)) != 0)
+++        return ret;
+++
+++    if ((ret = check_profile(avctx, s)) != 0) {
+++        av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
+++        return ret;
+++    }
+++    return 0;
++ }
++ 
++ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
++ {
++-    return ff_v4l2_m2m_codec_end(avctx->priv_data);
+++    int rv;
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+++    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
+++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
+++    return rv;
+++}
+++
+++static void v4l2_decode_flush(AVCodecContext *avctx)
+++{
+++    // An alternatve and more drastic form of flush is to simply do this:
+++    //    v4l2_decode_close(avctx);
+++    //    v4l2_decode_init(avctx);
+++    // The downside is that this keeps a decoder open until all the frames
+++    // associated with it have been returned.  This is a bit wasteful on
+++    // possibly limited h/w resources and fails on a Pi for this reason unless
+++    // more GPU mem is allocated than is the default.
+++
+++    V4L2m2mPriv * const priv = avctx->priv_data;
+++    V4L2m2mContext * const s = priv->context;
+++    V4L2Context * const output = &s->output;
+++    V4L2Context * const capture = &s->capture;
+++
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
+++
+++    // Reflushing everything is benign, quick and avoids having to worry about
+++    // states like EOS processing so don't try to optimize out (having got it
+++    // wrong once)
+++
+++    ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
+++
+++    // Clear any buffered input packet
+++    av_packet_unref(&s->buf_pkt);
+++
+++    // Clear a pending EOS
+++    if (ff_v4l2_ctx_eos(capture)) {
+++        // Arguably we could delay this but this is easy and doesn't require
+++        // thought or extra vars
+++        ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
+++        ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+++    }
+++
+++    // V4L2 makes no guarantees about whether decoded frames are flushed or not
+++    // so mark all frames we are tracking to be discarded if they appear
+++    xlat_flush(&s->xlat);
+++
+++    // resend extradata
+++    s->extdata_sent = 0;
+++    // clear status vars
+++    s->running = 0;
+++    s->draining = 0;
+++    output->done = 0;
+++    capture->done = 0;
+++
+++    // Stream on will occur when we actually submit a new frame
+++    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
++ }
++ 
++ #define OFFSET(x) offsetof(V4L2m2mPriv, x)
++@@ -227,9 +1262,16 @@ static const AVOption options[] = {
++     V4L_M2M_DEFAULT_OPTS,
++     { "num_capture_buffers", "Number of buffers in the capture context",
++         OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
+++    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
+++    { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
++     { NULL},
++ };
++ 
+++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
+++    HW_CONFIG_INTERNAL(DRM_PRIME),
+++    NULL
+++};
+++
++ #define M2MDEC_CLASS(NAME) \
++     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
++         .class_name = #NAME "_v4l2m2m_decoder", \
++@@ -250,11 +1292,17 @@ static const AVOption options[] = {
++         .init           = v4l2_decode_init, \
++         FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
++         .close          = v4l2_decode_close, \
+++        .flush          = v4l2_decode_flush, \
++         .bsfs           = bsf_name, \
++         .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
++         .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \
++                           FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \
++         .p.wrapper_name = "v4l2m2m", \
+++        .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
+++                                                         AV_PIX_FMT_NV12, \
+++                                                         AV_PIX_FMT_YUV420P, \
+++                                                         AV_PIX_FMT_NONE}, \
+++        .hw_configs     = v4l2_m2m_hw_configs, \
++     }
++ 
++ M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");
++diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
++index 9a0837ecf3..524e9424a5 100644
++--- a/libavcodec/v4l2_m2m_enc.c
+++++ b/libavcodec/v4l2_m2m_enc.c
++@@ -24,6 +24,8 @@
++ #include <linux/videodev2.h>
++ #include <sys/ioctl.h>
++ #include <search.h>
+++#include <drm_fourcc.h>
+++
++ #include "encode.h"
++ #include "libavcodec/avcodec.h"
++ #include "libavutil/pixdesc.h"
++@@ -38,6 +40,34 @@
++ #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
++ #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
++ 
+++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
+++// in the future but until then...
+++#ifndef DRM_FORMAT_P030
+++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
+++#endif
+++
+++#ifndef DRM_FORMAT_NV15
+++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
+++#endif
+++
+++#ifndef DRM_FORMAT_NV20
+++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
+++#endif
+++
+++#ifndef V4L2_CID_CODEC_BASE
+++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
+++#endif
+++
+++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
+++// in videodev2.h hopefully will be sometime in the future but until then...
+++#ifndef V4L2_PIX_FMT_NV12_10_COL128
+++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
+++#endif
+++
+++#ifndef V4L2_PIX_FMT_NV12_COL128
+++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
+++#endif
+++
++ static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
++ {
++     struct v4l2_streamparm parm = { 0 };
++@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p)
++ static int v4l2_check_b_frame_support(V4L2m2mContext *s)
++ {
++     if (s->avctx->max_b_frames)
++-        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
+++        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
++ 
++-    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
+++    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
++     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
++     if (s->avctx->max_b_frames == 0)
++         return 0;
++ 
++     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
++-
++     return AVERROR_PATCHWELCOME;
++ }
++ 
++@@ -271,17 +300,208 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
++     return 0;
++ }
++ 
+++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
+++{
+++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+++
+++    const uint32_t drm_fmt = src->layers[0].format;
+++    // Treat INVALID as LINEAR
+++    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
+++        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
+++    uint32_t pix_fmt = 0;
+++    uint32_t w = 0;
+++    uint32_t h = 0;
+++    uint32_t bpl = src->layers[0].planes[0].pitch;
+++
+++    // We really don't expect multiple layers
+++    // All formats that we currently cope with are single object
+++
+++    if (src->nb_layers != 1 || src->nb_objects != 1)
+++        return AVERROR(EINVAL);
+++
+++    switch (drm_fmt) {
+++        case DRM_FORMAT_YUV420:
+++            if (mod == DRM_FORMAT_MOD_LINEAR) {
+++                if (src->layers[0].nb_planes != 3)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_YUV420;
+++                h = src->layers[0].planes[1].offset / bpl;
+++                w = bpl;
+++            }
+++            break;
+++
+++        case DRM_FORMAT_NV12:
+++            if (mod == DRM_FORMAT_MOD_LINEAR) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_NV12;
+++                h = src->layers[0].planes[1].offset / bpl;
+++                w = bpl;
+++            }
+++            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
+++                w = bpl;
+++                h = src->layers[0].planes[1].offset / 128;
+++                bpl = fourcc_mod_broadcom_param(mod);
+++            }
+++            break;
+++
+++        case DRM_FORMAT_P030:
+++            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
+++                w = bpl / 2;  // Matching lie to how we construct this
+++                h = src->layers[0].planes[1].offset / 128;
+++                bpl = fourcc_mod_broadcom_param(mod);
+++            }
+++            break;
+++
+++        default:
+++            break;
+++    }
+++
+++    if (!pix_fmt)
+++        return AVERROR(EINVAL);
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
+++        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
+++
+++        pix->width = w;
+++        pix->height = h;
+++        pix->pixelformat = pix_fmt;
+++        pix->plane_fmt[0].bytesperline = bpl;
+++        pix->num_planes = 1;
+++    }
+++    else {
+++        struct v4l2_pix_format *const pix = &format->fmt.pix;
+++
+++        pix->width = w;
+++        pix->height = h;
+++        pix->pixelformat = pix_fmt;
+++        pix->bytesperline = bpl;
+++    }
+++
+++    return 0;
+++}
+++
+++// Do we have similar enough formats to be usable?
+++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
+++{
+++    if (a->type != b->type)
+++        return 0;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
+++        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
+++        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
+++        unsigned int i;
+++        if (pa->pixelformat != pb->pixelformat ||
+++            pa->num_planes != pb->num_planes)
+++            return 0;
+++        for (i = 0; i != pa->num_planes; ++i) {
+++            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
+++                return 0;
+++        }
+++    }
+++    else {
+++        const struct v4l2_pix_format *const pa = &a->fmt.pix;
+++        const struct v4l2_pix_format *const pb = &b->fmt.pix;
+++        if (pa->pixelformat != pb->pixelformat ||
+++            pa->bytesperline != pb->bytesperline)
+++            return 0;
+++    }
+++    return 1;
+++}
+++
+++static inline int q_full(const V4L2Context *const output)
+++{
+++    return ff_v4l2_context_q_count(output) == output->num_buffers;
+++}
+++
++ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
++ {
++     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++     V4L2Context *const output = &s->output;
+++    int rv;
+++    const int needs_slot = q_full(output);
+++
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
+++
+++    // Signal EOF if needed (doesn't need q slot)
+++    if (!frame) {
+++        av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
+++        return ff_v4l2_context_enqueue_frame(output, frame);
+++    }
+++
+++    if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
+++        // We should be able to return AVERROR(EAGAIN) to indicate buffer
+++        // exhaustion, but ffmpeg currently treats that as fatal.
+++        av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
+++        return rv;
+++    }
+++
+++    if (s->input_drm && !output->streamon) {
+++        struct v4l2_format req_format = {.type = output->format.type};
+++
+++        // Set format when we first get a buffer
+++        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
+++            return rv;
+++        }
+++
+++        ff_v4l2_context_release(output);
+++
+++        output->format = req_format;
+++
+++        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
+++            return rv;
+++        }
+++
+++        if (!fmt_eq(&req_format, &output->format)) {
+++            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
+++            return AVERROR(EINVAL);
+++        }
+++
+++        output->selection.top = frame->crop_top;
+++        output->selection.left = frame->crop_left;
+++        output->selection.width = av_frame_cropped_width(frame);
+++        output->selection.height = av_frame_cropped_height(frame);
+++
+++        if ((rv = ff_v4l2_context_init(output)) != 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
+++            return rv;
+++        }
+++
+++        {
+++            struct v4l2_selection selection = {
+++                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
+++                .target = V4L2_SEL_TGT_CROP,
+++                .r = output->selection
+++            };
+++            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
+++                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
+++                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
+++                       av_err2str(AVERROR(errno)));
+++            }
+++            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
+++                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
+++        }
+++    }
++ 
++ #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
++-    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
+++    if (frame->pict_type == AV_PICTURE_TYPE_I)
++         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
++ #endif
++ 
++-    return ff_v4l2_context_enqueue_frame(output, frame);
+++    rv = ff_v4l2_context_enqueue_frame(output, frame);
+++    if (rv) {
+++        av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
+++    }
+++
+++    return rv;
++ }
++ 
++ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
++@@ -292,6 +512,11 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
++     AVFrame *frame = s->frame;
++     int ret;
++ 
+++    av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
+++           ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
+++
+++    ff_v4l2_dq_all(output, 0);
+++
++     if (s->draining)
++         goto dequeue;
++ 
++@@ -328,7 +553,115 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
++     }
++ 
++ dequeue:
++-    return ff_v4l2_context_dequeue_packet(capture, avpkt);
+++    // Dequeue a frame
+++    for (;;) {
+++        int t = q_full(output) ? -1 : s->draining ? 300 : 0;
+++        int rv2;
+++
+++        // If output is full wait for either a packet or output to become not full
+++        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
+++
+++        // If output was full retry packet dequeue
+++        t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
+++        rv2 = ff_v4l2_dq_all(output, t);
+++        if (t == 0 || rv2 != 0)
+++            break;
+++    }
+++    if (ret)
+++        return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
+++
+++    if (capture->first_buf == 1) {
+++        uint8_t * data;
+++        const int len = avpkt->size;
+++
+++        // 1st buffer after streamon should be SPS/PPS
+++        capture->first_buf = 2;
+++
+++        // Clear both possible stores so there is no chance of confusion
+++        av_freep(&s->extdata_data);
+++        s->extdata_size = 0;
+++        av_freep(&avctx->extradata);
+++        avctx->extradata_size = 0;
+++
+++        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
+++            goto fail_no_mem;
+++
+++        memcpy(data, avpkt->data, len);
+++        av_packet_unref(avpkt);
+++
+++        // We need to copy the header, but keep local if not global
+++        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
+++            avctx->extradata = data;
+++            avctx->extradata_size = len;
+++        }
+++        else {
+++            s->extdata_data = data;
+++            s->extdata_size = len;
+++        }
+++
+++        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
+++        ff_v4l2_dq_all(output, 0);
+++        if (ret)
+++            return ret;
+++    }
+++
+++    // First frame must be key so mark as such even if encoder forgot
+++    if (capture->first_buf == 2) {
+++        avpkt->flags |= AV_PKT_FLAG_KEY;
+++
+++        // Add any extradata to the 1st packet we emit as we cannot create it at init
+++        if (avctx->extradata_size > 0 && avctx->extradata) {
+++            void * const side = av_packet_new_side_data(avpkt,
+++                                           AV_PKT_DATA_NEW_EXTRADATA,
+++                                           avctx->extradata_size);
+++            if (!side)
+++                goto fail_no_mem;
+++
+++            memcpy(side, avctx->extradata, avctx->extradata_size);
+++        }
+++    }
+++
+++    // Add SPS/PPS to the start of every key frame if non-global headers
+++    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
+++        const size_t newlen = s->extdata_size + avpkt->size;
+++        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
+++
+++        if (buf == NULL)
+++            goto fail_no_mem;
+++
+++        memcpy(buf->data, s->extdata_data, s->extdata_size);
+++        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
+++
+++        av_buffer_unref(&avpkt->buf);
+++        avpkt->buf = buf;
+++        avpkt->data = buf->data;
+++        avpkt->size = newlen;
+++    }
+++    else if (ff_v4l2_context_q_count(capture) < 2) {
+++        // Avoid running out of capture buffers
+++        // In most cases the buffers will be returned quickly in which case
+++        // we don't copy and can use the v4l2 buffers directly but sometimes
+++        // ffmpeg seems to hold onto all of them for a long time (.mkv
+++        // creation?) so avoid deadlock in those cases.
+++        AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+++        if (buf == NULL)
+++            goto fail_no_mem;
+++
+++        memcpy(buf->data, avpkt->data, avpkt->size);
+++        av_buffer_unref(&avpkt->buf);  // Will recycle the V4L2 buffer
+++
+++        avpkt->buf = buf;
+++        avpkt->data = buf->data;
+++    }
+++
+++    capture->first_buf = 0;
+++    return 0;
+++
+++fail_no_mem:
+++    av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
+++    ret = AVERROR(ENOMEM);
+++    av_packet_unref(avpkt);
+++    return ret;
++ }
++ 
++ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
++@@ -340,6 +673,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
++     uint32_t v4l2_fmt_output;
++     int ret;
++ 
+++    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
+++
++     ret = ff_v4l2_m2m_create_context(priv, &s);
++     if (ret < 0)
++         return ret;
++@@ -347,13 +682,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
++     capture = &s->capture;
++     output  = &s->output;
++ 
+++    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
+++
++     /* common settings output/capture */
++     output->height = capture->height = avctx->height;
++     output->width = capture->width = avctx->width;
++ 
++     /* output context */
++     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
++-    output->av_pix_fmt = avctx->pix_fmt;
+++    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
+++            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
+++            AV_PIX_FMT_YUV420P;
++ 
++     /* capture context */
++     capture->av_codec_id = avctx->codec_id;
++@@ -372,7 +711,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
++         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
++ 
++     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
++-    if (pix_fmt_output != avctx->pix_fmt) {
+++    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
++         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
++         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
++         return AVERROR(EINVAL);
++@@ -390,9 +729,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx)
++ #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
++ 
++ #define V4L_M2M_CAPTURE_OPTS \
++-    V4L_M2M_DEFAULT_OPTS,\
+++    { "num_output_buffers", "Number of buffers in the output context",\
+++        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
++     { "num_capture_buffers", "Number of buffers in the capture context", \
++-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
+++        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
++ 
++ static const AVOption mpeg4_options[] = {
++     V4L_M2M_CAPTURE_OPTS,
++diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c
++new file mode 100644
++index 0000000000..5b3fb958fa
++--- /dev/null
+++++ b/libavcodec/v4l2_req_decode_q.c
++@@ -0,0 +1,84 @@
+++#include <memory.h>
+++#include <semaphore.h>
+++#include <pthread.h>
+++
+++#include "v4l2_req_decode_q.h"
+++
+++int decode_q_in_q(const req_decode_ent * const d)
+++{
+++    return d->in_q;
+++}
+++
+++void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
+++{
+++    pthread_mutex_lock(&q->q_lock);
+++    if (!q->head) {
+++        q->head = d;
+++        q->tail = d;
+++        d->prev = NULL;
+++    }
+++    else {
+++        q->tail->next = d;
+++        d->prev = q->tail;
+++        q->tail = d;
+++    }
+++    d->next = NULL;
+++    d->in_q = 1;
+++    pthread_mutex_unlock(&q->q_lock);
+++}
+++
+++// Remove entry from Q - if head wake-up anything that was waiting
+++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
+++{
+++    int try_signal = 0;
+++
+++    if (!d->in_q)
+++        return;
+++
+++    pthread_mutex_lock(&q->q_lock);
+++    if (d->prev)
+++        d->prev->next = d->next;
+++    else {
+++        try_signal = 1;  // Only need to signal if we were head
+++        q->head = d->next;
+++    }
+++
+++    if (d->next)
+++        d->next->prev = d->prev;
+++    else
+++        q->tail = d->prev;
+++
+++    // Not strictly needed but makes debug easier
+++    d->next = NULL;
+++    d->prev = NULL;
+++    d->in_q = 0;
+++    pthread_mutex_unlock(&q->q_lock);
+++
+++    if (try_signal)
+++        pthread_cond_broadcast(&q->q_cond);
+++}
+++
+++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
+++{
+++    pthread_mutex_lock(&q->q_lock);
+++
+++    while (q->head != d)
+++        pthread_cond_wait(&q->q_cond, &q->q_lock);
+++
+++    pthread_mutex_unlock(&q->q_lock);
+++}
+++
+++void decode_q_uninit(req_decode_q * const q)
+++{
+++    pthread_mutex_destroy(&q->q_lock);
+++    pthread_cond_destroy(&q->q_cond);
+++}
+++
+++void decode_q_init(req_decode_q * const q)
+++{
+++    memset(q, 0, sizeof(*q));
+++    pthread_mutex_init(&q->q_lock, NULL);
+++    pthread_cond_init(&q->q_cond, NULL);
+++}
+++
+++
++diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h
++new file mode 100644
++index 0000000000..af7bbe1de4
++--- /dev/null
+++++ b/libavcodec/v4l2_req_decode_q.h
++@@ -0,0 +1,25 @@
+++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
+++#define AVCODEC_V4L2_REQ_DECODE_Q_H
+++
+++typedef struct req_decode_ent {
+++    struct req_decode_ent * next;
+++    struct req_decode_ent * prev;
+++    int in_q;
+++} req_decode_ent;
+++
+++typedef struct req_decode_q {
+++    pthread_mutex_t q_lock;
+++    pthread_cond_t q_cond;
+++    req_decode_ent * head;
+++    req_decode_ent * tail;
+++} req_decode_q;
+++
+++int decode_q_in_q(const req_decode_ent * const d);
+++void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
+++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
+++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
+++void decode_q_uninit(req_decode_q * const q);
+++void decode_q_init(req_decode_q * const q);
+++
+++#endif
+++
++diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c
++new file mode 100644
++index 0000000000..cfa94d55c4
++--- /dev/null
+++++ b/libavcodec/v4l2_req_devscan.c
++@@ -0,0 +1,449 @@
+++#include <errno.h>
+++#include <fcntl.h>
+++#include <libudev.h>
+++#include <stdlib.h>
+++#include <string.h>
+++#include <unistd.h>
+++
+++#include <sys/ioctl.h>
+++#include <sys/sysmacros.h>
+++
+++#include <linux/media.h>
+++#include <linux/videodev2.h>
+++
+++#include "v4l2_req_devscan.h"
+++#include "v4l2_req_utils.h"
+++
+++struct decdev {
+++    enum v4l2_buf_type src_type;
+++    uint32_t src_fmt_v4l2;
+++    const char * vname;
+++    const char * mname;
+++};
+++
+++struct devscan {
+++    struct decdev env;
+++    unsigned int dev_size;
+++    unsigned int dev_count;
+++    struct decdev *devs;
+++};
+++
+++static int video_src_pixfmt_supported(uint32_t fmt)
+++{
+++    return 1;
+++}
+++
+++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
+++                  unsigned int width, unsigned int height,
+++                  unsigned int pixelformat)
+++{
+++    unsigned int sizeimage;
+++
+++    memset(format, 0, sizeof(*format));
+++    format->type = type;
+++
+++    sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
+++        format->fmt.pix_mp.width = width;
+++        format->fmt.pix_mp.height = height;
+++        format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
+++        format->fmt.pix_mp.pixelformat = pixelformat;
+++    } else {
+++        format->fmt.pix.width = width;
+++        format->fmt.pix.height = height;
+++        format->fmt.pix.sizeimage = sizeimage;
+++        format->fmt.pix.pixelformat = pixelformat;
+++    }
+++}
+++
+++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
+++            unsigned int width, unsigned int height)
+++{
+++    struct v4l2_format format;
+++
+++    v4l2_setup_format(&format, type, width, height, pixelformat);
+++
+++    return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
+++}
+++
+++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
+++{
+++    struct v4l2_capability capability = { 0 };
+++    int rc;
+++
+++    rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
+++    if (rc < 0)
+++        return -errno;
+++
+++    if (capabilities != NULL) {
+++        if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
+++            *capabilities = capability.device_caps;
+++        else
+++            *capabilities = capability.capabilities;
+++    }
+++
+++    return 0;
+++}
+++
+++static int devscan_add(struct devscan *const scan,
+++                       enum v4l2_buf_type src_type,
+++                       uint32_t src_fmt_v4l2,
+++                       const char * vname,
+++                       const char * mname)
+++{
+++    struct decdev *d;
+++
+++    if (scan->dev_size <= scan->dev_count) {
+++        unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
+++        d = realloc(scan->devs, n * sizeof(*d));
+++        if (!d)
+++            return -ENOMEM;
+++        scan->devs = d;
+++        scan->dev_size = n;
+++    }
+++
+++    d = scan->devs + scan->dev_count;
+++    d->src_type = src_type;
+++    d->src_fmt_v4l2 = src_fmt_v4l2;
+++    d->vname = strdup(vname);
+++    if (!d->vname)
+++        return -ENOMEM;
+++    d->mname = strdup(mname);
+++    if (!d->mname) {
+++        free((char *)d->vname);
+++        return -ENOMEM;
+++    }
+++    ++scan->dev_count;
+++    return 0;
+++}
+++
+++void devscan_delete(struct devscan **const pScan)
+++{
+++    unsigned int i;
+++    struct devscan * const scan = *pScan;
+++
+++    if (!scan)
+++        return;
+++    *pScan = NULL;
+++
+++    for (i = 0; i < scan->dev_count; ++i) {
+++        free((char*)scan->devs[i].mname);
+++        free((char*)scan->devs[i].vname);
+++    }
+++    free(scan->devs);
+++    free(scan);
+++}
+++
+++#define REQ_BUF_CAPS (\
+++    V4L2_BUF_CAP_SUPPORTS_DMABUF |\
+++    V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
+++    V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
+++
+++static void probe_formats(void * const dc,
+++              struct devscan *const scan,
+++              const int fd,
+++              const unsigned int type_v4l2,
+++              const char *const mpath,
+++              const char *const vpath)
+++{
+++    unsigned int i;
+++    for (i = 0;; ++i) {
+++        struct v4l2_fmtdesc fmtdesc = {
+++            .index = i,
+++            .type = type_v4l2
+++        };
+++        struct v4l2_requestbuffers rbufs = {
+++            .count = 0,
+++            .type = type_v4l2,
+++            .memory = V4L2_MEMORY_MMAP
+++        };
+++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
+++            if (errno == EINTR)
+++                continue;
+++            if (errno != EINVAL)
+++                request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
+++            return;
+++        }
+++        if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
+++            continue;
+++
+++        if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
+++            request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
+++            continue;
+++        }
+++
+++        while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
+++            if (errno != EINTR) {
+++                request_debug(dc, "%s: Reqbufs failed\n", vpath);
+++                continue;
+++            }
+++        }
+++
+++        if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
+++            request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
+++            continue;
+++        }
+++
+++        request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
+++                 mpath, vpath, fmtdesc.pixelformat, type_v4l2);
+++        devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
+++    }
+++}
+++
+++
+++static int probe_video_device(void * const dc,
+++                   struct udev_device *const device,
+++                   struct devscan *const scan,
+++                   const char *const mpath)
+++{
+++    int ret;
+++    unsigned int capabilities = 0;
+++    int video_fd = -1;
+++
+++    const char *path = udev_device_get_devnode(device);
+++    if (!path) {
+++        request_err(dc, "%s: get video device devnode failed\n", __func__);
+++        ret = -EINVAL;
+++        goto fail;
+++    }
+++
+++    video_fd = open(path, O_RDWR, 0);
+++    if (video_fd == -1) {
+++        ret = -errno;
+++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
+++        goto fail;
+++    }
+++
+++    ret = v4l2_query_capabilities(video_fd, &capabilities);
+++    if (ret < 0) {
+++        request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
+++        goto fail;
+++    }
+++
+++    request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
+++
+++    if (!(capabilities & V4L2_CAP_STREAMING)) {
+++        request_debug(dc, "%s: missing required streaming capability\n", __func__);
+++        ret = -EINVAL;
+++        goto fail;
+++    }
+++
+++    if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
+++        request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
+++        ret = -EINVAL;
+++        goto fail;
+++    }
+++
+++    /* Should check capture formats too... */
+++    if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
+++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
+++    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
+++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
+++
+++    close(video_fd);
+++    return 0;
+++
+++fail:
+++    if (video_fd >= 0)
+++        close(video_fd);
+++    return ret;
+++}
+++
+++static int probe_media_device(void * const dc,
+++                   struct udev_device *const device,
+++                   struct devscan *const scan)
+++{
+++    int ret;
+++    int rv;
+++    struct media_device_info device_info = { 0 };
+++    struct media_v2_topology topology = { 0 };
+++    struct media_v2_interface *interfaces = NULL;
+++    struct udev *udev = udev_device_get_udev(device);
+++    struct udev_device *video_device;
+++    dev_t devnum;
+++    int media_fd = -1;
+++
+++    const char *path = udev_device_get_devnode(device);
+++    if (!path) {
+++        request_err(dc, "%s: get media device devnode failed\n", __func__);
+++        ret = -EINVAL;
+++        goto fail;
+++    }
+++
+++    media_fd = open(path, O_RDWR, 0);
+++    if (media_fd < 0) {
+++        ret = -errno;
+++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
+++        goto fail;
+++    }
+++
+++    rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
+++    if (rv < 0) {
+++        ret = -errno;
+++        request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
+++        goto fail;
+++    }
+++
+++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
+++    if (rv < 0) {
+++        ret = -errno;
+++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
+++        goto fail;
+++    }
+++
+++    if (topology.num_interfaces <= 0) {
+++        request_err(dc, "%s: media device has no interfaces\n", __func__);
+++        ret = -EINVAL;
+++        goto fail;
+++    }
+++
+++    interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
+++    if (!interfaces) {
+++        request_err(dc, "%s: allocating media interface struct failed\n", __func__);
+++        ret = -ENOMEM;
+++        goto fail;
+++    }
+++
+++    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
+++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
+++    if (rv < 0) {
+++        ret = -errno;
+++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
+++        goto fail;
+++    }
+++
+++    for (int i = 0; i < topology.num_interfaces; i++) {
+++        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
+++            continue;
+++
+++        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
+++        video_device = udev_device_new_from_devnum(udev, 'c', devnum);
+++        if (!video_device) {
+++            ret = -errno;
+++            request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
+++            continue;
+++        }
+++
+++        ret = probe_video_device(dc, video_device, scan, path);
+++        udev_device_unref(video_device);
+++
+++        if (ret != 0)
+++            goto fail;
+++    }
+++
+++fail:
+++    free(interfaces);
+++    if (media_fd != -1)
+++        close(media_fd);
+++    return ret;
+++}
+++
+++const char *decdev_media_path(const struct decdev *const dev)
+++{
+++    return !dev ? NULL : dev->mname;
+++}
+++
+++const char *decdev_video_path(const struct decdev *const dev)
+++{
+++    return !dev ? NULL : dev->vname;
+++}
+++
+++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
+++{
+++    return !dev ? 0 : dev->src_type;
+++}
+++
+++uint32_t decdev_src_pixelformat(const struct decdev *const dev)
+++{
+++    return !dev ? 0 : dev->src_fmt_v4l2;
+++}
+++
+++
+++const struct decdev *devscan_find(struct devscan *const scan,
+++                  const uint32_t src_fmt_v4l2)
+++{
+++    unsigned int i;
+++
+++    if (scan->env.mname && scan->env.vname)
+++        return &scan->env;
+++
+++    if (!src_fmt_v4l2)
+++        return scan->dev_count ? scan->devs + 0 : NULL;
+++
+++    for (i = 0; i != scan->dev_count; ++i) {
+++        if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
+++            return scan->devs + i;
+++    }
+++    return NULL;
+++}
+++
+++int devscan_build(void * const dc, struct devscan **pscan)
+++{
+++    int ret;
+++    struct udev *udev;
+++    struct udev_enumerate *enumerate;
+++    struct udev_list_entry *devices;
+++    struct udev_list_entry *entry;
+++    struct udev_device *device;
+++    struct devscan * scan;
+++
+++    *pscan = NULL;
+++
+++    scan = calloc(1, sizeof(*scan));
+++    if (!scan) {
+++        ret = -ENOMEM;
+++        goto fail;
+++    }
+++
+++    scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
+++    scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
+++    if (scan->env.mname && scan->env.vname) {
+++        request_info(dc, "Media/video device env overrides found: %s,%s\n",
+++                 scan->env.mname, scan->env.vname);
+++        *pscan = scan;
+++        return 0;
+++    }
+++
+++    udev = udev_new();
+++    if (!udev) {
+++        request_err(dc, "%s: allocating udev context failed\n", __func__);
+++        ret = -ENOMEM;
+++        goto fail;
+++    }
+++
+++    enumerate = udev_enumerate_new(udev);
+++    if (!enumerate) {
+++        request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
+++        ret = -ENOMEM;
+++        goto fail;
+++    }
+++
+++    udev_enumerate_add_match_subsystem(enumerate, "media");
+++    udev_enumerate_scan_devices(enumerate);
+++
+++    devices = udev_enumerate_get_list_entry(enumerate);
+++    udev_list_entry_foreach(entry, devices) {
+++        const char *path = udev_list_entry_get_name(entry);
+++        if (!path)
+++            continue;
+++
+++        device = udev_device_new_from_syspath(udev, path);
+++        if (!device)
+++            continue;
+++
+++        probe_media_device(dc, device, scan);
+++        udev_device_unref(device);
+++    }
+++
+++    udev_enumerate_unref(enumerate);
+++
+++    *pscan = scan;
+++    return 0;
+++
+++fail:
+++    udev_unref(udev);
+++    devscan_delete(&scan);
+++    return ret;
+++}
+++
++diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h
++new file mode 100644
++index 0000000000..956d9234f1
++--- /dev/null
+++++ b/libavcodec/v4l2_req_devscan.h
++@@ -0,0 +1,23 @@
+++#ifndef _DEVSCAN_H_
+++#define _DEVSCAN_H_
+++
+++#include <stdint.h>
+++
+++struct devscan;
+++struct decdev;
+++enum v4l2_buf_type;
+++
+++/* These return pointers to data in the devscan structure and so are vaild
+++ * for the lifetime of that
+++ */
+++const char *decdev_media_path(const struct decdev *const dev);
+++const char *decdev_video_path(const struct decdev *const dev);
+++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
+++uint32_t decdev_src_pixelformat(const struct decdev *const dev);
+++
+++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
+++
+++int devscan_build(void * const dc, struct devscan **pscan);
+++void devscan_delete(struct devscan **const pScan);
+++
+++#endif
++diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
++new file mode 100644
++index 0000000000..acc0366e76
++--- /dev/null
+++++ b/libavcodec/v4l2_req_dmabufs.c
++@@ -0,0 +1,369 @@
+++#include <stdatomic.h>
+++#include <stdio.h>
+++#include <stdlib.h>
+++#include <unistd.h>
+++#include <inttypes.h>
+++#include <fcntl.h>
+++#include <errno.h>
+++#include <string.h>
+++#include <sys/ioctl.h>
+++#include <sys/mman.h>
+++#include <linux/mman.h>
+++#include <linux/dma-buf.h>
+++#include <linux/dma-heap.h>
+++
+++#include "v4l2_req_dmabufs.h"
+++#include "v4l2_req_utils.h"
+++
+++#define DMABUF_NAME1  "/dev/dma_heap/linux,cma"
+++#define DMABUF_NAME2  "/dev/dma_heap/reserved"
+++
+++#define TRACE_ALLOC 0
+++
+++struct dmabufs_ctl;
+++struct dmabuf_h;
+++
+++struct dmabuf_fns {
+++    int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
+++    void (*buf_free)(struct dmabuf_h * dh);
+++    int (*ctl_new)(struct dmabufs_ctl * dbsc);
+++    void (*ctl_free)(struct dmabufs_ctl * dbsc);
+++};
+++
+++struct dmabufs_ctl {
+++    atomic_int ref_count;
+++    int fd;
+++    size_t page_size;
+++    void * v;
+++    const struct dmabuf_fns * fns;
+++};
+++
+++struct dmabuf_h {
+++    int fd;
+++    size_t size;
+++    size_t len;
+++    void * mapptr;
+++    void * v;
+++    const struct dmabuf_fns * fns;
+++};
+++
+++#if TRACE_ALLOC
+++static unsigned int total_bufs = 0;
+++static size_t total_size = 0;
+++#endif
+++
+++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size)
+++{
+++    struct dmabuf_h *dh;
+++
+++    if (mapptr == MAP_FAILED)
+++        return NULL;
+++
+++    dh = malloc(sizeof(*dh));
+++    if (!dh)
+++        return NULL;
+++
+++    *dh = (struct dmabuf_h) {
+++        .fd = -1,
+++        .size = size,
+++        .mapptr = mapptr
+++    };
+++
+++    return dh;
+++}
+++
+++struct dmabuf_h * dmabuf_import(int fd, size_t size)
+++{
+++    struct dmabuf_h *dh;
+++
+++    fd = dup(fd);
+++    if (fd < 0  || size == 0)
+++        return NULL;
+++
+++    dh = malloc(sizeof(*dh));
+++    if (!dh) {
+++        close(fd);
+++        return NULL;
+++    }
+++
+++    *dh = (struct dmabuf_h) {
+++        .fd = fd,
+++        .size = size,
+++        .mapptr = MAP_FAILED
+++    };
+++
+++#if TRACE_ALLOC
+++    ++total_bufs;
+++    total_size += dh->size;
+++    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+++#endif
+++
+++    return dh;
+++}
+++
+++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
+++{
+++    struct dmabuf_h * dh;
+++    if (old != NULL) {
+++        if (old->size >= size) {
+++            return old;
+++        }
+++        dmabuf_free(old);
+++    }
+++
+++    if (size == 0 ||
+++        (dh = malloc(sizeof(*dh))) == NULL)
+++        return NULL;
+++
+++    *dh = (struct dmabuf_h){
+++        .fd = -1,
+++        .mapptr = MAP_FAILED,
+++        .fns = dbsc->fns
+++    };
+++
+++    if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
+++        goto fail;
+++
+++
+++#if TRACE_ALLOC
+++    ++total_bufs;
+++    total_size += dh->size;
+++    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+++#endif
+++
+++    return dh;
+++
+++fail:
+++    free(dh);
+++    return NULL;
+++}
+++
+++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
+++{
+++    struct dma_buf_sync sync = {
+++        .flags = flags
+++    };
+++    if (dh->fd == -1)
+++        return 0;
+++    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
+++        const int err = errno;
+++        if (errno == EINTR)
+++            continue;
+++        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
+++        return -err;
+++    }
+++    return 0;
+++}
+++
+++int dmabuf_write_start(struct dmabuf_h * const dh)
+++{
+++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
+++}
+++
+++int dmabuf_write_end(struct dmabuf_h * const dh)
+++{
+++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
+++}
+++
+++int dmabuf_read_start(struct dmabuf_h * const dh)
+++{
+++    if (!dmabuf_map(dh))
+++        return -1;
+++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
+++}
+++
+++int dmabuf_read_end(struct dmabuf_h * const dh)
+++{
+++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
+++}
+++
+++
+++void * dmabuf_map(struct dmabuf_h * const dh)
+++{
+++    if (!dh)
+++        return NULL;
+++    if (dh->mapptr != MAP_FAILED)
+++        return dh->mapptr;
+++    dh->mapptr = mmap(NULL, dh->size,
+++              PROT_READ | PROT_WRITE,
+++              MAP_SHARED | MAP_POPULATE,
+++              dh->fd, 0);
+++    if (dh->mapptr == MAP_FAILED) {
+++        request_log("%s: Map failed\n", __func__);
+++        return NULL;
+++    }
+++    return dh->mapptr;
+++}
+++
+++int dmabuf_fd(const struct dmabuf_h * const dh)
+++{
+++    if (!dh)
+++        return -1;
+++    return dh->fd;
+++}
+++
+++size_t dmabuf_size(const struct dmabuf_h * const dh)
+++{
+++    if (!dh)
+++        return 0;
+++    return dh->size;
+++}
+++
+++size_t dmabuf_len(const struct dmabuf_h * const dh)
+++{
+++    if (!dh)
+++        return 0;
+++    return dh->len;
+++}
+++
+++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
+++{
+++    dh->len = len;
+++}
+++
+++void dmabuf_free(struct dmabuf_h * dh)
+++{
+++    if (!dh)
+++        return;
+++
+++#if TRACE_ALLOC
+++    --total_bufs;
+++    total_size -= dh->size;
+++    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+++#endif
+++
+++    dh->fns->buf_free(dh);
+++
+++    if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
+++        munmap(dh->mapptr, dh->size);
+++    if (dh->fd != -1)
+++        while (close(dh->fd) == -1 && errno == EINTR)
+++            /* loop */;
+++    free(dh);
+++}
+++
+++static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
+++{
+++    struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
+++
+++    if (!dbsc)
+++        return NULL;
+++
+++    dbsc->fd = -1;
+++    dbsc->fns = fns;
+++    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
+++
+++    if (fns->ctl_new(dbsc) != 0)
+++        goto fail;
+++
+++    return dbsc;
+++
+++fail:
+++    free(dbsc);
+++    return NULL;
+++}
+++
+++static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
+++{
+++    request_debug(NULL, "Free dmabuf ctl\n");
+++
+++    dbsc->fns->ctl_free(dbsc);
+++
+++    free(dbsc);
+++}
+++
+++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
+++{
+++    struct dmabufs_ctl * const dbsc = *pDbsc;
+++
+++    if (!dbsc)
+++        return;
+++    *pDbsc = NULL;
+++
+++    if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
+++        return;
+++
+++    dmabufs_ctl_free(dbsc);
+++}
+++
+++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
+++{
+++    atomic_fetch_add(&dbsc->ref_count, 1);
+++    return dbsc;
+++}
+++
+++//-----------------------------------------------------------------------------
+++//
+++// Alloc dmabuf via CMA
+++
+++static int ctl_cma_new(struct dmabufs_ctl * dbsc)
+++{
+++    while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 &&
+++           errno == EINTR)
+++        /* Loop */;
+++
+++    if (dbsc->fd == -1) {
+++        while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 &&
+++               errno == EINTR)
+++            /* Loop */;
+++        if (dbsc->fd == -1) {
+++            request_log("Unable to open either %s or %s\n",
+++                    DMABUF_NAME1, DMABUF_NAME2);
+++            return -1;
+++        }
+++    }
+++    return 0;
+++}
+++
+++static void ctl_cma_free(struct dmabufs_ctl * dbsc)
+++{
+++    if (dbsc->fd != -1)
+++        while (close(dbsc->fd) == -1 && errno == EINTR)
+++            /* loop */;
+++
+++}
+++
+++static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
+++{
+++    struct dma_heap_allocation_data data = {
+++        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
+++        .fd = 0,
+++        .fd_flags = O_RDWR,
+++        .heap_flags = 0
+++    };
+++
+++    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
+++        int err = errno;
+++        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
+++                (uint64_t)data.len,
+++                dbsc->fd,
+++                err,
+++                strerror(err));
+++        if (err == EINTR)
+++            continue;
+++        return -err;
+++    }
+++
+++    dh->fd = data.fd;
+++    dh->size = (size_t)data.len;
+++    return 0;
+++}
+++
+++static void buf_cma_free(struct dmabuf_h * dh)
+++{
+++    // Nothing needed
+++}
+++
+++static const struct dmabuf_fns dmabuf_cma_fns = {
+++    .buf_alloc  = buf_cma_alloc,
+++    .buf_free   = buf_cma_free,
+++    .ctl_new    = ctl_cma_new,
+++    .ctl_free   = ctl_cma_free,
+++};
+++
+++struct dmabufs_ctl * dmabufs_ctl_new(void)
+++{
+++    request_debug(NULL, "Dmabufs using CMA\n");;
+++    return dmabufs_ctl_new2(&dmabuf_cma_fns);
+++}
+++
++diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
++new file mode 100644
++index 0000000000..381ba2708d
++--- /dev/null
+++++ b/libavcodec/v4l2_req_dmabufs.h
++@@ -0,0 +1,44 @@
+++#ifndef DMABUFS_H
+++#define DMABUFS_H
+++
+++#include <stddef.h>
+++
+++struct dmabufs_ctl;
+++struct dmabuf_h;
+++
+++struct dmabufs_ctl * dmabufs_ctl_new(void);
+++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
+++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
+++
+++// Need not preserve old contents
+++// On NULL return old buffer is freed
+++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
+++
+++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
+++    return dmabuf_realloc(dbsc, NULL, size);
+++}
+++/* Create from existing fd - dups(fd) */
+++struct dmabuf_h * dmabuf_import(int fd, size_t size);
+++/* Import an MMAP - return NULL if mapptr = MAP_FAIL */
+++struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size);
+++
+++void * dmabuf_map(struct dmabuf_h * const dh);
+++
+++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
+++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
+++
+++int dmabuf_write_start(struct dmabuf_h * const dh);
+++int dmabuf_write_end(struct dmabuf_h * const dh);
+++int dmabuf_read_start(struct dmabuf_h * const dh);
+++int dmabuf_read_end(struct dmabuf_h * const dh);
+++
+++int dmabuf_fd(const struct dmabuf_h * const dh);
+++/* Allocated size */
+++size_t dmabuf_size(const struct dmabuf_h * const dh);
+++/* Bytes in use */
+++size_t dmabuf_len(const struct dmabuf_h * const dh);
+++/* Set bytes in use */
+++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
+++void dmabuf_free(struct dmabuf_h * dh);
+++
+++#endif
++diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c
++new file mode 100644
++index 0000000000..169b532832
++--- /dev/null
+++++ b/libavcodec/v4l2_req_hevc_v1.c
++@@ -0,0 +1,3 @@
+++#define HEVC_CTRLS_VERSION 1
+++#include "v4l2_req_hevc_vx.c"
+++
++diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c
++new file mode 100644
++index 0000000000..42af98e156
++--- /dev/null
+++++ b/libavcodec/v4l2_req_hevc_v2.c
++@@ -0,0 +1,3 @@
+++#define HEVC_CTRLS_VERSION 2
+++#include "v4l2_req_hevc_vx.c"
+++
++diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c
++new file mode 100644
++index 0000000000..dcc8d95632
++--- /dev/null
+++++ b/libavcodec/v4l2_req_hevc_v3.c
++@@ -0,0 +1,3 @@
+++#define HEVC_CTRLS_VERSION 3
+++#include "v4l2_req_hevc_vx.c"
+++
++diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c
++new file mode 100644
++index 0000000000..c35579d8e0
++--- /dev/null
+++++ b/libavcodec/v4l2_req_hevc_v4.c
++@@ -0,0 +1,3 @@
+++#define HEVC_CTRLS_VERSION 4
+++#include "v4l2_req_hevc_vx.c"
+++
++diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
++new file mode 100644
++index 0000000000..e1bd5c6a1f
++--- /dev/null
+++++ b/libavcodec/v4l2_req_hevc_vx.c
++@@ -0,0 +1,1362 @@
+++// File included by v4l2_req_hevc_v* - not compiled on its own
+++
+++#include "decode.h"
+++#include "hevcdec.h"
+++#include "hwconfig.h"
+++#include "internal.h"
+++#include "thread.h"
+++
+++#if HEVC_CTRLS_VERSION == 1
+++#include "hevc-ctrls-v1.h"
+++
+++// Fixup renamed entries
+++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
+++
+++#elif HEVC_CTRLS_VERSION == 2
+++#include "hevc-ctrls-v2.h"
+++#elif HEVC_CTRLS_VERSION == 3
+++#include "hevc-ctrls-v3.h"
+++#elif HEVC_CTRLS_VERSION == 4
+++#include <linux/v4l2-controls.h>
+++#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
+++#include "hevc-ctrls-v4.h"
+++#endif
+++#else
+++#error Unknown HEVC_CTRLS_VERSION
+++#endif
+++
+++#ifndef V4L2_CID_STATELESS_HEVC_SPS
+++#define V4L2_CID_STATELESS_HEVC_SPS                     V4L2_CID_MPEG_VIDEO_HEVC_SPS
+++#define V4L2_CID_STATELESS_HEVC_PPS                     V4L2_CID_MPEG_VIDEO_HEVC_PPS
+++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS            V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
+++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX          V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
+++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS           V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
+++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE             V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
+++#define V4L2_CID_STATELESS_HEVC_START_CODE              V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
+++
+++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
+++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
+++#define V4L2_STATELESS_HEVC_START_CODE_NONE             V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
+++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
+++#endif
+++
+++#include "v4l2_request_hevc.h"
+++
+++#include "libavutil/hwcontext_drm.h"
+++
+++#include <semaphore.h>
+++#include <pthread.h>
+++
+++#include "v4l2_req_devscan.h"
+++#include "v4l2_req_dmabufs.h"
+++#include "v4l2_req_pollqueue.h"
+++#include "v4l2_req_media.h"
+++#include "v4l2_req_utils.h"
+++
+++// Attached to buf[0] in frame
+++// Pooled in hwcontext so generally create once - 1/frame
+++typedef struct V4L2MediaReqDescriptor {
+++    AVDRMFrameDescriptor drm;
+++
+++    // Media
+++    uint64_t timestamp;
+++    struct qent_dst * qe_dst;
+++
+++    // Decode only - should be NULL by the time we emit the frame
+++    struct req_decode_ent decode_ent;
+++
+++    struct media_request *req;
+++    struct qent_src *qe_src;
+++
+++#if HEVC_CTRLS_VERSION >= 2
+++    struct v4l2_ctrl_hevc_decode_params dec;
+++#endif
+++
+++    size_t num_slices;
+++    size_t alloced_slices;
+++    struct v4l2_ctrl_hevc_slice_params * slice_params;
+++    struct slice_info * slices;
+++
+++    size_t num_offsets;
+++    size_t alloced_offsets;
+++    uint32_t *offsets;
+++
+++} V4L2MediaReqDescriptor;
+++
+++struct slice_info {
+++    const uint8_t * ptr;
+++    size_t len; // bytes
+++    size_t n_offsets;
+++};
+++
+++// Handy container for accumulating controls before setting
+++struct req_controls {
+++    int has_scaling;
+++    struct timeval tv;
+++    struct v4l2_ctrl_hevc_sps sps;
+++    struct v4l2_ctrl_hevc_pps pps;
+++    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
+++};
+++
+++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
+++
+++
+++// Get an FFmpeg format from the v4l2 format
+++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
+++{
+++    switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
+++            format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
+++    case V4L2_PIX_FMT_YUV420:
+++        return AV_PIX_FMT_YUV420P;
+++    case V4L2_PIX_FMT_NV12:
+++        return AV_PIX_FMT_NV12;
+++#if CONFIG_SAND
+++    case V4L2_PIX_FMT_NV12_COL128:
+++        return AV_PIX_FMT_RPI4_8;
+++    case V4L2_PIX_FMT_NV12_10_COL128:
+++        return AV_PIX_FMT_RPI4_10;
+++#endif
+++    default:
+++        break;
+++    }
+++    return AV_PIX_FMT_NONE;
+++}
+++
+++static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
+++{
+++    const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
+++    return rd->timestamp;
+++}
+++
+++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
+++{
+++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
+++    rd->timestamp = dpb_stamp;
+++}
+++
+++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
+++{
+++    int32_t luma_weight_denom, chroma_weight_denom;
+++    const SliceHeader *sh = &h->sh;
+++
+++    if (sh->slice_type == HEVC_SLICE_I ||
+++        (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) ||
+++        (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag))
+++        return;
+++
+++    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
+++
+++    if (h->ps.sps->chroma_format_idc)
+++        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
+++
+++    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
+++    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
+++
+++    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
+++        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
+++        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
+++        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
+++        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
+++        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
+++        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
+++    }
+++
+++    if (sh->slice_type != HEVC_SLICE_B)
+++        return;
+++
+++    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
+++        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
+++        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
+++        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
+++        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
+++        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
+++        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
+++    }
+++}
+++
+++#if HEVC_CTRLS_VERSION <= 2
+++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
+++{
+++    const HEVCFrame *frame;
+++    int i;
+++
+++    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
+++        frame = h->rps[ST_CURR_BEF].ref[i];
+++        if (frame && timestamp == frame_capture_dpb(frame->frame))
+++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
+++    }
+++
+++    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
+++        frame = h->rps[ST_CURR_AFT].ref[i];
+++        if (frame && timestamp == frame_capture_dpb(frame->frame))
+++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
+++    }
+++
+++    for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
+++        frame = h->rps[LT_CURR].ref[i];
+++        if (frame && timestamp == frame_capture_dpb(frame->frame))
+++            return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
+++    }
+++
+++    return 0;
+++}
+++#endif
+++
+++static unsigned int
+++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
+++                  const struct v4l2_hevc_dpb_entry * const entries,
+++                  const unsigned int num_entries)
+++{
+++    uint64_t timestamp;
+++
+++    if (!frame)
+++        return 0;
+++
+++    timestamp = frame_capture_dpb(frame->frame);
+++
+++    for (unsigned int i = 0; i < num_entries; i++) {
+++        if (entries[i].timestamp == timestamp)
+++            return i;
+++    }
+++
+++    return 0;
+++}
+++
+++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
+++{
+++    unsigned int z = 0;
+++    while (idx--) {
+++        if (*b++ == 0) {
+++            ++z;
+++            if (z >= 2 && *b == 3) {
+++                ++b;
+++                z = 0;
+++            }
+++        }
+++        else {
+++            z = 0;
+++        }
+++    }
+++    return b;
+++}
+++
+++static int slice_add(V4L2MediaReqDescriptor * const rd)
+++{
+++    if (rd->num_slices >= rd->alloced_slices) {
+++        struct v4l2_ctrl_hevc_slice_params * p2;
+++        struct slice_info * s2;
+++        size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
+++
+++        p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
+++        if (p2 == NULL)
+++            return AVERROR(ENOMEM);
+++        rd->slice_params = p2;
+++
+++        s2 = av_realloc_array(rd->slices, n2, sizeof(*s2));
+++        if (s2 == NULL)
+++            return AVERROR(ENOMEM);
+++        rd->slices = s2;
+++
+++        rd->alloced_slices = n2;
+++    }
+++    ++rd->num_slices;
+++    return 0;
+++}
+++
+++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
+++{
+++    if (rd->num_offsets + n > rd->alloced_offsets) {
+++        size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
+++        void * p2;
+++        while (rd->num_offsets + n > n2)
+++            n2 *= 2;
+++        if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL)
+++            return AVERROR(ENOMEM);
+++        rd->offsets = p2;
+++        rd->alloced_offsets = n2;
+++    }
+++    for (size_t i = 0; i != n; ++i)
+++        rd->offsets[rd->num_offsets++] = offsets[i] - 1;
+++    return 0;
+++}
+++
+++static unsigned int
+++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
+++{
+++    unsigned int i;
+++    unsigned int n = 0;
+++    const HEVCFrame * const pic = h->ref;
+++
+++    for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
+++        const HEVCFrame * const frame = &h->DPB[i];
+++        if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
+++            struct v4l2_hevc_dpb_entry * const entry = entries + n++;
+++
+++            entry->timestamp = frame_capture_dpb(frame->frame);
+++#if HEVC_CTRLS_VERSION <= 2
+++            entry->rps = find_frame_rps_type(h, entry->timestamp);
+++#else
+++            entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 :
+++                V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
+++#endif
+++            entry->field_pic = frame->frame->interlaced_frame;
+++
+++#if HEVC_CTRLS_VERSION <= 3
+++            /* TODO: Interleaved: Get the POC for each field. */
+++            entry->pic_order_cnt[0] = frame->poc;
+++            entry->pic_order_cnt[1] = frame->poc;
+++#else
+++            entry->pic_order_cnt_val = frame->poc;
+++#endif
+++        }
+++    }
+++    return n;
+++}
+++
+++static void fill_slice_params(const HEVCContext * const h,
+++#if HEVC_CTRLS_VERSION >= 2
+++                              const struct v4l2_ctrl_hevc_decode_params * const dec,
+++#endif
+++                              struct v4l2_ctrl_hevc_slice_params *slice_params,
+++                              uint32_t bit_size, uint32_t bit_offset)
+++{
+++    const SliceHeader * const sh = &h->sh;
+++#if HEVC_CTRLS_VERSION >= 2
+++    const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
+++    const unsigned int dpb_n = dec->num_active_dpb_entries;
+++#else
+++    struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
+++    unsigned int dpb_n;
+++#endif
+++    unsigned int i;
+++    RefPicList *rpl;
+++
+++    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
+++        .bit_size = bit_size,
+++#if HEVC_CTRLS_VERSION <= 3
+++        .data_bit_offset = bit_offset,
+++#else
+++        .data_byte_offset = bit_offset / 8 + 1,
+++#endif
+++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++        .slice_segment_addr = sh->slice_segment_addr,
+++
+++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
+++        .nal_unit_type = h->nal_unit_type,
+++        .nuh_temporal_id_plus1 = h->temporal_id + 1,
+++
+++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++        .slice_type = sh->slice_type,
+++        .colour_plane_id = sh->colour_plane_id,
+++        .slice_pic_order_cnt = h->ref->poc,
+++        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
+++        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
+++        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
+++        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
+++        .slice_qp_delta = sh->slice_qp_delta,
+++        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
+++        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
+++        .slice_act_y_qp_offset = 0,
+++        .slice_act_cb_qp_offset = 0,
+++        .slice_act_cr_qp_offset = 0,
+++        .slice_beta_offset_div2 = sh->beta_offset / 2,
+++        .slice_tc_offset_div2 = sh->tc_offset / 2,
+++
+++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
+++        .pic_struct = h->sei.picture_timing.picture_struct,
+++
+++#if HEVC_CTRLS_VERSION < 2
+++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
+++        .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
+++        .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
+++        .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
+++#endif
+++    };
+++
+++    if (sh->slice_sample_adaptive_offset_flag[0])
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
+++
+++    if (sh->slice_sample_adaptive_offset_flag[1])
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
+++
+++    if (sh->slice_temporal_mvp_enabled_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
+++
+++    if (sh->mvd_l1_zero_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
+++
+++    if (sh->cabac_init_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
+++
+++    if (sh->collocated_list == L0)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
+++
+++    if (sh->disable_deblocking_filter_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
+++
+++    if (sh->slice_loop_filter_across_slices_enabled_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
+++
+++    if (sh->dependent_slice_segment_flag)
+++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
+++
+++#if HEVC_CTRLS_VERSION < 2
+++    dpb_n = fill_dpb_entries(h, dpb);
+++    slice_params->num_active_dpb_entries = dpb_n;
+++#endif
+++
+++    if (sh->slice_type != HEVC_SLICE_I) {
+++        rpl = &h->ref->refPicList[0];
+++        for (i = 0; i < rpl->nb_refs; i++)
+++            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
+++    }
+++
+++    if (sh->slice_type == HEVC_SLICE_B) {
+++        rpl = &h->ref->refPicList[1];
+++        for (i = 0; i < rpl->nb_refs; i++)
+++            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
+++    }
+++
+++    fill_pred_table(h, &slice_params->pred_weight_table);
+++
+++    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
+++#if HEVC_CTRLS_VERSION <= 3
+++    if (slice_params->num_entry_point_offsets > 256) {
+++        slice_params->num_entry_point_offsets = 256;
+++        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
+++    }
+++
+++    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
+++        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
+++#endif
+++}
+++
+++#if HEVC_CTRLS_VERSION >= 2
+++static void
+++fill_decode_params(const HEVCContext * const h,
+++                   struct v4l2_ctrl_hevc_decode_params * const dec)
+++{
+++    unsigned int i;
+++
+++    *dec = (struct v4l2_ctrl_hevc_decode_params){
+++        .pic_order_cnt_val = h->poc,
+++        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
+++        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
+++        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
+++    };
+++
+++    dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
+++
+++    // The docn does seem to ask that we fit our 32 bit signed POC into
+++    // a U8 so... (To be fair 16 bits would be enough)
+++    // Luckily we (Pi) don't use these fields
+++    for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
+++        dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
+++    for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
+++        dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
+++    for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
+++        dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
+++
+++    if (IS_IRAP(h))
+++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
+++    if (IS_IDR(h))
+++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
+++    if (h->sh.no_output_of_prior_pics_flag)
+++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
+++
+++}
+++#endif
+++
+++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
+++{
+++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
+++    *ctrl = (struct v4l2_ctrl_hevc_sps) {
+++        .chroma_format_idc = sps->chroma_format_idc,
+++        .pic_width_in_luma_samples = sps->width,
+++        .pic_height_in_luma_samples = sps->height,
+++        .bit_depth_luma_minus8 = sps->bit_depth - 8,
+++        .bit_depth_chroma_minus8 = sps->bit_depth - 8,
+++        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+++        .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
+++        .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
+++        .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
+++        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
+++        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
+++        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
+++        .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
+++        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
+++        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
+++        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
+++        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
+++        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
+++        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+++        .num_short_term_ref_pic_sets = sps->nb_st_rps,
+++        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
+++        .chroma_format_idc = sps->chroma_format_idc,
+++        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
+++    };
+++
+++    if (sps->separate_colour_plane_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+++
+++    if (sps->scaling_list_enable_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
+++
+++    if (sps->amp_enabled_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
+++
+++    if (sps->sao_enabled)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
+++
+++    if (sps->pcm_enabled_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
+++
+++    if (sps->pcm.loop_filter_disable_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
+++
+++    if (sps->long_term_ref_pics_present_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
+++
+++    if (sps->sps_temporal_mvp_enabled_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
+++
+++    if (sps->sps_strong_intra_smoothing_enable_flag)
+++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
+++}
+++
+++static void fill_scaling_matrix(const ScalingList * const sl,
+++                                struct v4l2_ctrl_hevc_scaling_matrix * const sm)
+++{
+++    unsigned int i;
+++
+++    for (i = 0; i < 6; i++) {
+++        unsigned int j;
+++
+++        for (j = 0; j < 16; j++)
+++            sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
+++        for (j = 0; j < 64; j++) {
+++            sm->scaling_list_8x8[i][j]   = sl->sl[1][i][j];
+++            sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
+++            if (i < 2)
+++                sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
+++        }
+++        sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
+++        if (i < 2)
+++            sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
+++    }
+++}
+++
+++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
+++{
+++    uint64_t flags = 0;
+++
+++    if (pps->dependent_slice_segments_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
+++
+++    if (pps->output_flag_present_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
+++
+++    if (pps->sign_data_hiding_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
+++
+++    if (pps->cabac_init_present_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
+++
+++    if (pps->constrained_intra_pred_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
+++
+++    if (pps->transform_skip_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
+++
+++    if (pps->cu_qp_delta_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
+++
+++    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
+++
+++    if (pps->weighted_pred_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
+++
+++    if (pps->weighted_bipred_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
+++
+++    if (pps->transquant_bypass_enable_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
+++
+++    if (pps->tiles_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
+++
+++    if (pps->entropy_coding_sync_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
+++
+++    if (pps->loop_filter_across_tiles_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
+++
+++    if (pps->seq_loop_filter_across_slices_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
+++
+++    if (pps->deblocking_filter_override_enabled_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
+++
+++    if (pps->disable_dbf)
+++        flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
+++
+++    if (pps->lists_modification_present_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
+++
+++    if (pps->slice_header_extension_present_flag)
+++        flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
+++
+++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
+++    *ctrl = (struct v4l2_ctrl_hevc_pps) {
+++        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
+++        .init_qp_minus26 = pps->pic_init_qp_minus26,
+++        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+++        .pps_cb_qp_offset = pps->cb_qp_offset,
+++        .pps_cr_qp_offset = pps->cr_qp_offset,
+++        .pps_beta_offset_div2 = pps->beta_offset / 2,
+++        .pps_tc_offset_div2 = pps->tc_offset / 2,
+++        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
+++        .flags = flags
+++    };
+++
+++
+++    if (pps->tiles_enabled_flag) {
+++        ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
+++        ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
+++
+++        for (int i = 0; i < pps->num_tile_columns; i++)
+++            ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
+++
+++        for (int i = 0; i < pps->num_tile_rows; i++)
+++            ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
+++    }
+++}
+++
+++// Called before finally returning the frame to the user
+++// Set corrupt flag here as this is actually the frame structure that
+++// is going to the user (in MT land each thread has its own pool)
+++static int frame_post_process(void *logctx, AVFrame *frame)
+++{
+++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
+++
+++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
+++    frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
+++    if (rd->qe_dst) {
+++        MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
+++        if (stat != MEDIABUFS_STATUS_SUCCESS) {
+++            av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
+++            frame->flags |= AV_FRAME_FLAG_CORRUPT;
+++        }
+++    }
+++
+++    return 0;
+++}
+++
+++static inline struct timeval cvt_dpb_to_tv(uint64_t t)
+++{
+++    t /= 1000;
+++    return (struct timeval){
+++        .tv_usec = t % 1000000,
+++        .tv_sec = t / 1000000
+++    };
+++}
+++
+++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
+++{
+++    return (uint64_t)t * 1000;
+++}
+++
+++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
+++                                         av_unused const uint8_t *buffer,
+++                                         av_unused uint32_t size)
+++{
+++    const HEVCContext *h = avctx->priv_data;
+++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++
+++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
+++    decode_q_add(&ctx->decode_q, &rd->decode_ent);
+++
+++    rd->num_slices = 0;
+++    ctx->timestamp++;
+++    rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
+++
+++    {
+++        FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data;
+++        fdd->post_process = frame_post_process;
+++    }
+++
+++    // qe_dst needs to be bound to the data buffer and only returned when that is
+++    if (!rd->qe_dst)
+++    {
+++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
+++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
+++            return AVERROR(ENOMEM);
+++        }
+++    }
+++
+++    ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
+++
+++    return 0;
+++}
+++
+++// Object fd & size will be zapped by this & need setting later
+++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
+++{
+++    AVDRMLayerDescriptor *layer = &desc->layers[0];
+++    unsigned int width;
+++    unsigned int height;
+++    unsigned int bpl;
+++    uint32_t pixelformat;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
+++        width       = format->fmt.pix_mp.width;
+++        height      = format->fmt.pix_mp.height;
+++        pixelformat = format->fmt.pix_mp.pixelformat;
+++        bpl         = format->fmt.pix_mp.plane_fmt[0].bytesperline;
+++    }
+++    else {
+++        width       = format->fmt.pix.width;
+++        height      = format->fmt.pix.height;
+++        pixelformat = format->fmt.pix.pixelformat;
+++        bpl         = format->fmt.pix.bytesperline;
+++    }
+++
+++    switch (pixelformat) {
+++    case V4L2_PIX_FMT_NV12:
+++        layer->format = DRM_FORMAT_NV12;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
+++        break;
+++#if CONFIG_SAND
+++    case V4L2_PIX_FMT_NV12_COL128:
+++        layer->format = DRM_FORMAT_NV12;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
+++        break;
+++    case V4L2_PIX_FMT_NV12_10_COL128:
+++        layer->format = DRM_FORMAT_P030;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
+++        break;
+++#endif
+++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
+++    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
+++        layer->format = DRM_FORMAT_NV12;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
+++        break;
+++#endif
+++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
+++    case V4L2_PIX_FMT_NV15:
+++        layer->format = DRM_FORMAT_NV15;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
+++        break;
+++#endif
+++    case V4L2_PIX_FMT_NV16:
+++        layer->format = DRM_FORMAT_NV16;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
+++        break;
+++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
+++    case V4L2_PIX_FMT_NV20:
+++        layer->format = DRM_FORMAT_NV20;
+++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
+++        break;
+++#endif
+++    default:
+++        return -1;
+++    }
+++
+++    desc->nb_objects = 1;
+++    desc->objects[0].fd = -1;
+++    desc->objects[0].size = 0;
+++
+++    desc->nb_layers = 1;
+++    layer->nb_planes = 2;
+++
+++    layer->planes[0].object_index = 0;
+++    layer->planes[0].offset = 0;
+++    layer->planes[0].pitch = bpl;
+++#if CONFIG_SAND
+++    if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
+++        layer->planes[1].object_index = 0;
+++        layer->planes[1].offset = height * 128;
+++        layer->planes[0].pitch = width;
+++        layer->planes[1].pitch = width;
+++    }
+++    else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
+++        layer->planes[1].object_index = 0;
+++        layer->planes[1].offset = height * 128;
+++        layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
+++        layer->planes[1].pitch = width * 2;
+++    }
+++    else
+++#endif
+++    {
+++        layer->planes[1].object_index = 0;
+++        layer->planes[1].offset = layer->planes[0].pitch * height;
+++        layer->planes[1].pitch = layer->planes[0].pitch;
+++    }
+++
+++    return 0;
+++}
+++
+++static int
+++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
+++    struct req_controls *const controls,
+++#if HEVC_CTRLS_VERSION >= 2
+++    struct v4l2_ctrl_hevc_decode_params * const dec,
+++#endif
+++    struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
+++    void * const offsets, const size_t offset_count)
+++{
+++    int rv;
+++#if HEVC_CTRLS_VERSION >= 2
+++    unsigned int n = 3;
+++#else
+++    unsigned int n = 2;
+++#endif
+++
+++    struct v4l2_ext_control control[6] = {
+++        {
+++            .id = V4L2_CID_STATELESS_HEVC_SPS,
+++            .ptr = &controls->sps,
+++            .size = sizeof(controls->sps),
+++        },
+++        {
+++            .id = V4L2_CID_STATELESS_HEVC_PPS,
+++            .ptr = &controls->pps,
+++            .size = sizeof(controls->pps),
+++        },
+++#if HEVC_CTRLS_VERSION >= 2
+++        {
+++            .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
+++            .ptr = dec,
+++            .size = sizeof(*dec),
+++        },
+++#endif
+++    };
+++
+++    if (slices)
+++        control[n++] = (struct v4l2_ext_control) {
+++            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
+++            .ptr = slices,
+++            .size = sizeof(*slices) * slice_count,
+++        };
+++
+++    if (controls->has_scaling)
+++        control[n++] = (struct v4l2_ext_control) {
+++            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
+++            .ptr = &controls->scaling_matrix,
+++            .size = sizeof(controls->scaling_matrix),
+++        };
+++
+++#if HEVC_CTRLS_VERSION >= 4
+++    if (offsets)
+++        control[n++] = (struct v4l2_ext_control) {
+++            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
+++            .ptr = offsets,
+++            .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
+++        };
+++#endif
+++
+++    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
+++
+++    return rv;
+++}
+++
+++// This only works because we started out from a single coded frame buffer
+++// that will remain intact until after end_frame
+++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+++{
+++    const HEVCContext * const h = avctx->priv_data;
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
+++    int bcount = get_bits_count(&h->HEVClc->gb);
+++    uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
+++
+++    const unsigned int n = rd->num_slices;
+++    const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
+++
+++    int rv;
+++    struct slice_info * si;
+++
+++    // This looks dodgy but we know that FFmpeg has parsed this from a buffer
+++    // that contains the entire frame including the start code
+++    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
+++        buffer -= 3;
+++        size += 3;
+++        boff += 24;
+++        if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
+++            av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
+++                   buffer[0], buffer[1], buffer[2]);
+++        }
+++    }
+++
+++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
+++        if (rd->slices == NULL) {
+++            if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
+++                return AVERROR(ENOMEM);
+++            rd->slices->ptr = buffer;
+++            rd->num_slices = 1;
+++        }
+++        rd->slices->len = buffer - rd->slices->ptr + size;
+++        return 0;
+++    }
+++
+++    if ((rv = slice_add(rd)) != 0)
+++        return rv;
+++
+++    si = rd->slices + n;
+++    si->ptr = buffer;
+++    si->len = size;
+++    si->n_offsets = rd->num_offsets;
+++
+++    if (n != block_start) {
+++        struct slice_info *const si0 = rd->slices + block_start;
+++        const size_t offset = (buffer - si0->ptr);
+++        boff += offset * 8;
+++        size += offset;
+++        si0->len = si->len + offset;
+++    }
+++
+++#if HEVC_CTRLS_VERSION >= 2
+++    if (n == 0)
+++        fill_decode_params(h, &rd->dec);
+++    fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
+++#else
+++    fill_slice_params(h, rd->slice_params + n, size * 8, boff);
+++#endif
+++    if (ctx->max_offsets != 0 &&
+++        (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
+++        return rv;
+++
+++    return 0;
+++}
+++
+++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx)
+++{
+++    const HEVCContext * const h = avctx->priv_data;
+++    if (h->ref != NULL) {
+++        V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
+++        V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++
+++        media_request_abort(&rd->req);
+++        mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
+++
+++        decode_q_remove(&ctx->decode_q, &rd->decode_ent);
+++    }
+++}
+++
+++static int send_slice(AVCodecContext * const avctx,
+++                      V4L2MediaReqDescriptor * const rd,
+++                      struct req_controls *const controls,
+++                      const unsigned int i, const unsigned int j)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++
+++    const int is_last = (j == rd->num_slices);
+++    struct slice_info *const si = rd->slices + i;
+++    struct media_request * req = NULL;
+++    struct qent_src * src = NULL;
+++    MediaBufsStatus stat;
+++    void * offsets = rd->offsets + rd->slices[i].n_offsets;
+++    size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
+++
+++    if ((req = media_request_get(ctx->mpool)) == NULL) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
+++        return AVERROR(ENOMEM);
+++    }
+++
+++    if (set_req_ctls(ctx, req,
+++                     controls,
+++#if HEVC_CTRLS_VERSION >= 2
+++                     &rd->dec,
+++#endif
+++                     rd->slice_params + i, j - i,
+++                     offsets, n_offsets)) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
+++        goto fail1;
+++    }
+++
+++    if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
+++        goto fail1;
+++    }
+++
+++    if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
+++        goto fail2;
+++    }
+++
+++    if (qent_src_params_set(src, &controls->tv)) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
+++        goto fail2;
+++    }
+++
+++    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
+++                                   i == 0 ? rd->qe_dst : NULL,
+++                                   is_last);
+++
+++    if (stat != MEDIABUFS_STATUS_SUCCESS) {
+++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
+++        return AVERROR_UNKNOWN;
+++    }
+++    return 0;
+++
+++fail2:
+++    mediabufs_src_qent_abort(ctx->mbufs, &src);
+++fail1:
+++    media_request_abort(&req);
+++    return AVERROR_UNKNOWN;
+++}
+++
+++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
+++{
+++    const HEVCContext * const h = avctx->priv_data;
+++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
+++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+++    struct req_controls rc;
+++    unsigned int i;
+++    int rv;
+++
+++    // It is possible, though maybe a bug, to get an end_frame without
+++    // a previous start_frame.  If we do then give up.
+++    if (!decode_q_in_q(&rd->decode_ent)) {
+++        av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
+++        return AVERROR_INVALIDDATA;
+++    }
+++
+++    {
+++        const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
+++                                    &h->ps.pps->scaling_list :
+++                                h->ps.sps->scaling_list_enable_flag ?
+++                                    &h->ps.sps->scaling_list : NULL;
+++
+++
+++        memset(&rc, 0, sizeof(rc));
+++        rc.tv = cvt_dpb_to_tv(rd->timestamp);
+++        fill_sps(&rc.sps, h->ps.sps);
+++        fill_pps(&rc.pps, h->ps.pps);
+++        if (sl) {
+++            rc.has_scaling = 1;
+++            fill_scaling_matrix(sl, &rc.scaling_matrix);
+++        }
+++    }
+++
+++    decode_q_wait(&ctx->decode_q, &rd->decode_ent);
+++
+++    // qe_dst needs to be bound to the data buffer and only returned when that is
+++    // Alloc almost certainly wants to be serialised if there is any chance of blocking
+++    // so we get the next frame to be free in the thread that needs it for decode first.
+++    //
+++    // In our current world this probably isn't a concern but put it here anyway
+++    if (!rd->qe_dst)
+++    {
+++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
+++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
+++            rv = AVERROR(ENOMEM);
+++            goto fail;
+++        }
+++    }
+++
+++    // Send as slices
+++    for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
+++        const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
+++        if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0)
+++            goto fail;
+++    }
+++
+++    // Set the drm_prime desriptor
+++    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
+++    rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
+++    rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
+++
+++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
+++    return 0;
+++
+++fail:
+++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
+++    return rv;
+++}
+++
+++static inline int
+++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
+++{
+++    return v >= c->minimum && v <= c->maximum;
+++}
+++
+++// Initial check & init
+++static int
+++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
+++{
+++    const HEVCContext *h = avctx->priv_data;
+++    const HEVCSPS * const sps = h->ps.sps;
+++    struct v4l2_ctrl_hevc_sps ctrl_sps;
+++    unsigned int i;
+++
+++    // Check for var slice array
+++    struct v4l2_query_ext_ctrl qc[] = {
+++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
+++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
+++        { .id = V4L2_CID_STATELESS_HEVC_SPS },
+++        { .id = V4L2_CID_STATELESS_HEVC_PPS },
+++        { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
+++#if HEVC_CTRLS_VERSION >= 2
+++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
+++#endif
+++    };
+++    // Order & size must match!
+++    static const size_t ctrl_sizes[] = {
+++        sizeof(struct v4l2_ctrl_hevc_slice_params),
+++        sizeof(int32_t),
+++        sizeof(struct v4l2_ctrl_hevc_sps),
+++        sizeof(struct v4l2_ctrl_hevc_pps),
+++        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
+++#if HEVC_CTRLS_VERSION >= 2
+++        sizeof(struct v4l2_ctrl_hevc_decode_params),
+++#endif
+++    };
+++    const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
+++
+++#if HEVC_CTRLS_VERSION == 2
+++    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0))
+++        return AVERROR(EINVAL);
+++#elif HEVC_CTRLS_VERSION == 3
+++    if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0))
+++        return AVERROR(EINVAL);
+++#endif
+++
+++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
+++    i = 0;
+++#if HEVC_CTRLS_VERSION >= 4
+++    // Skip slice check if no slice mode
+++    if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
+++        i = 1;
+++#else
+++    // Fail frame mode silently for anything prior to V4
+++    if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
+++        return AVERROR(EINVAL);
+++#endif
+++    for (; i != noof_ctrls; ++i) {
+++        if (qc[i].type == 0) {
+++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
+++            return AVERROR(EINVAL);
+++        }
+++        if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
+++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
+++                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
+++            return AVERROR(EINVAL);
+++        }
+++    }
+++
+++    fill_sps(&ctrl_sps, sps);
+++
+++    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    return 0;
+++}
+++
+++// Final init
+++static int
+++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
+++{
+++    int ret;
+++
+++    struct v4l2_query_ext_ctrl querys[] = {
+++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
+++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
+++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
+++#if HEVC_CTRLS_VERSION >= 4
+++        { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
+++#endif
+++    };
+++
+++    struct v4l2_ext_control ctrls[] = {
+++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
+++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
+++    };
+++
+++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
+++
+++    ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
+++                       querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
+++        1 : querys[2].dims[0];
+++    av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
+++
+++#if HEVC_CTRLS_VERSION >= 4
+++    ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
+++        0 : querys[3].dims[0];
+++    av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
+++#else
+++    ctx->max_offsets = 0;
+++#endif
+++
+++    if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
+++        querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
+++        ctx->decode_mode = querys[0].default_value;
+++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
+++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
+++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
+++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
+++    else {
+++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
+++        querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
+++        ctx->start_code = querys[1].default_value;
+++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
+++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
+++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
+++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
+++    else {
+++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    // If we are in slice mode & START_CODE_NONE supported then pick that
+++    // as it doesn't require the slightly dodgy look backwards in our raw buffer
+++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
+++        ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
+++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
+++
+++    ctrls[0].value = ctx->decode_mode;
+++    ctrls[1].value = ctx->start_code;
+++
+++    ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
+++    return !ret ? 0 : AVERROR(-ret);
+++}
+++
+++static void v4l2_req_frame_free(void *opaque, uint8_t *data)
+++{
+++    AVCodecContext *avctx = opaque;
+++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
+++
+++    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
+++
+++    qent_dst_unref(&rd->qe_dst);
+++
+++    // We don't expect req or qe_src to be set
+++    if (rd->req || rd->qe_src)
+++        av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
+++
+++    av_freep(&rd->slices);
+++    av_freep(&rd->slice_params);
+++    av_freep(&rd->offsets);
+++
+++    av_free(rd);
+++}
+++
+++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
+++{
+++    AVCodecContext *avctx = opaque;
+++//    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+++//    V4L2MediaReqDescriptor *req;
+++    AVBufferRef *ref;
+++    uint8_t *data;
+++//    int ret;
+++
+++    data = av_mallocz(size);
+++    if (!data)
+++        return NULL;
+++
+++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
+++    ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
+++    if (!ref) {
+++        av_freep(&data);
+++        return NULL;
+++    }
+++    return ref;
+++}
+++
+++#if 0
+++static void v4l2_req_pool_free(void *opaque)
+++{
+++    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
+++}
+++
+++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
+++{
+++    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
+++
+++    av_buffer_pool_uninit(&hwfc->pool);
+++}
+++#endif
+++
+++static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+++{
+++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+++    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
+++    const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
+++
+++    hwfc->format = AV_PIX_FMT_DRM_PRIME;
+++    hwfc->sw_format = pixel_format_from_format(vfmt);
+++    if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
+++        hwfc->width = vfmt->fmt.pix_mp.width;
+++        hwfc->height = vfmt->fmt.pix_mp.height;
+++    } else {
+++        hwfc->width = vfmt->fmt.pix.width;
+++        hwfc->height = vfmt->fmt.pix.height;
+++    }
+++#if 0
+++    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
+++    if (!hwfc->pool)
+++        return AVERROR(ENOMEM);
+++
+++    hwfc->free = v4l2_req_hwframe_ctx_free;
+++
+++    hwfc->initial_pool_size = 1;
+++
+++    switch (avctx->codec_id) {
+++    case AV_CODEC_ID_VP9:
+++        hwfc->initial_pool_size += 8;
+++        break;
+++    case AV_CODEC_ID_VP8:
+++        hwfc->initial_pool_size += 3;
+++        break;
+++    default:
+++        hwfc->initial_pool_size += 2;
+++    }
+++#endif
+++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
+++
+++    return 0;
+++}
+++
+++static int alloc_frame(AVCodecContext * avctx, AVFrame *frame)
+++{
+++    int rv;
+++
+++    frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor));
+++    if (!frame->buf[0])
+++        return AVERROR(ENOMEM);
+++
+++    frame->data[0] = frame->buf[0]->data;
+++
+++    frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
+++
+++    if ((rv = ff_attach_decode_data(frame)) != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n");
+++        av_frame_unref(frame);
+++        return rv;
+++    }
+++
+++    return 0;
+++}
+++
+++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
+++    .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
+++    .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
+++    .probe = probe,
+++    .set_controls = set_controls,
+++
+++    .start_frame    = v4l2_request_hevc_start_frame,
+++    .decode_slice   = v4l2_request_hevc_decode_slice,
+++    .end_frame      = v4l2_request_hevc_end_frame,
+++    .abort_frame    = v4l2_request_hevc_abort_frame,
+++    .frame_params   = frame_params,
+++    .alloc_frame    = alloc_frame,
+++};
+++
++diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
++new file mode 100644
++index 0000000000..1a9944774a
++--- /dev/null
+++++ b/libavcodec/v4l2_req_media.c
++@@ -0,0 +1,1802 @@
+++/*
+++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+++ *
+++ * Permission is hereby granted, free of charge, to any person obtaining a
+++ * copy of this software and associated documentation files (the
+++ * "Software"), to deal in the Software without restriction, including
+++ * without limitation the rights to use, copy, modify, merge, publish,
+++ * distribute, sub license, and/or sell copies of the Software, and to
+++ * permit persons to whom the Software is furnished to do so, subject to
+++ * the following conditions:
+++ *
+++ * The above copyright notice and this permission notice (including the
+++ * next paragraph) shall be included in all copies or substantial portions
+++ * of the Software.
+++ *
+++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+++ */
+++
+++#include <errno.h>
+++#include <fcntl.h>
+++#include <poll.h>
+++#include <pthread.h>
+++#include <semaphore.h>
+++#include <stdatomic.h>
+++#include <stdbool.h>
+++#include <stdlib.h>
+++#include <string.h>
+++#include <unistd.h>
+++#include <linux/media.h>
+++#include <linux/mman.h>
+++#include <sys/ioctl.h>
+++#include <sys/select.h>
+++#include <sys/ioctl.h>
+++#include <sys/mman.h>
+++
+++#include <linux/videodev2.h>
+++
+++#include "v4l2_req_dmabufs.h"
+++#include "v4l2_req_media.h"
+++#include "v4l2_req_pollqueue.h"
+++#include "v4l2_req_utils.h"
+++#include "weak_link.h"
+++
+++
+++/* floor(log2(x)) */
+++static unsigned int log2_size(size_t x)
+++{
+++    unsigned int n = 0;
+++
+++    if (x & ~0xffff) {
+++        n += 16;
+++        x >>= 16;
+++    }
+++    if (x & ~0xff) {
+++        n += 8;
+++        x >>= 8;
+++    }
+++    if (x & ~0xf) {
+++        n += 4;
+++        x >>= 4;
+++    }
+++    if (x & ~3) {
+++        n += 2;
+++        x >>= 2;
+++    }
+++    return (x & ~1) ? n + 1 : n;
+++}
+++
+++static size_t round_up_size(const size_t x)
+++{
+++    /* Admit no size < 256 */
+++    const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
+++
+++    return x >= (3 << n) ? 4 << n : (3 << n);
+++}
+++
+++struct media_request;
+++
+++struct media_pool {
+++    int fd;
+++    sem_t sem;
+++    pthread_mutex_t lock;
+++    struct media_request * free_reqs;
+++    struct pollqueue * pq;
+++};
+++
+++struct media_request {
+++    struct media_request * next;
+++    struct media_pool * mp;
+++    int fd;
+++    struct polltask * pt;
+++};
+++
+++static inline enum v4l2_memory
+++mediabufs_memory_to_v4l2(const enum mediabufs_memory m)
+++{
+++    return (enum v4l2_memory)m;
+++}
+++
+++const char *
+++mediabufs_memory_name(const enum mediabufs_memory m)
+++{
+++    switch (m) {
+++    case MEDIABUFS_MEMORY_UNSET:
+++        return "Unset";
+++    case MEDIABUFS_MEMORY_MMAP:
+++        return "MMap";
+++    case MEDIABUFS_MEMORY_USERPTR:
+++        return "UserPtr";
+++    case MEDIABUFS_MEMORY_OVERLAY:
+++        return "Overlay";
+++    case MEDIABUFS_MEMORY_DMABUF:
+++        return "DMABuf";
+++    default:
+++        break;
+++    }
+++    return "Unknown";
+++}
+++
+++
+++static inline int do_trywait(sem_t *const sem)
+++{
+++    while (sem_trywait(sem)) {
+++        if (errno != EINTR)
+++            return -errno;
+++    }
+++    return 0;
+++}
+++
+++static inline int do_wait(sem_t *const sem)
+++{
+++    while (sem_wait(sem)) {
+++        if (errno != EINTR)
+++            return -errno;
+++    }
+++    return 0;
+++}
+++
+++static int request_buffers(int video_fd, unsigned int type,
+++                           enum mediabufs_memory memory, unsigned int buffers_count)
+++{
+++    struct v4l2_requestbuffers buffers;
+++    int rc;
+++
+++    memset(&buffers, 0, sizeof(buffers));
+++    buffers.type = type;
+++    buffers.memory = mediabufs_memory_to_v4l2(memory);
+++    buffers.count = buffers_count;
+++
+++    rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
+++    if (rc < 0) {
+++        rc = -errno;
+++        request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
+++        return rc;
+++    }
+++
+++    return 0;
+++}
+++
+++
+++static int set_stream(int video_fd, unsigned int type, bool enable)
+++{
+++    enum v4l2_buf_type buf_type = type;
+++    int rc;
+++
+++    rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
+++           &buf_type);
+++    if (rc < 0) {
+++        rc = -errno;
+++        request_log("Unable to %sable stream: %s\n",
+++                enable ? "en" : "dis", strerror(-rc));
+++        return rc;
+++    }
+++
+++    return 0;
+++}
+++
+++
+++
+++struct media_request * media_request_get(struct media_pool * const mp)
+++{
+++    struct media_request *req = NULL;
+++
+++    /* Timeout handled by poll code */
+++    if (do_wait(&mp->sem))
+++        return NULL;
+++
+++    pthread_mutex_lock(&mp->lock);
+++    req = mp->free_reqs;
+++    if (req) {
+++        mp->free_reqs = req->next;
+++        req->next = NULL;
+++    }
+++    pthread_mutex_unlock(&mp->lock);
+++    return req;
+++}
+++
+++int media_request_fd(const struct media_request * const req)
+++{
+++    return req->fd;
+++}
+++
+++int media_request_start(struct media_request * const req)
+++{
+++    while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
+++    {
+++        const int err = errno;
+++        if (err == EINTR)
+++            continue;
+++        request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
+++        return -err;
+++    }
+++
+++    pollqueue_add_task(req->pt, 2000);
+++    return 0;
+++}
+++
+++static void media_request_done(void *v, short revents)
+++{
+++    struct media_request *const req = v;
+++    struct media_pool *const mp = req->mp;
+++
+++    /* ** Not sure what to do about timeout */
+++
+++    if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
+++        request_log("Unable to reinit media request: %s\n",
+++                strerror(errno));
+++
+++    pthread_mutex_lock(&mp->lock);
+++    req->next = mp->free_reqs;
+++    mp->free_reqs = req;
+++    pthread_mutex_unlock(&mp->lock);
+++    sem_post(&mp->sem);
+++}
+++
+++int media_request_abort(struct media_request ** const preq)
+++{
+++    struct media_request * const req = *preq;
+++
+++    if (req == NULL)
+++        return 0;
+++    *preq = NULL;
+++
+++    media_request_done(req, 0);
+++    return 0;
+++}
+++
+++static void delete_req_chain(struct media_request * const chain)
+++{
+++    struct media_request * next = chain;
+++    while (next) {
+++        struct media_request * const req = next;
+++        next = req->next;
+++        if (req->pt)
+++            polltask_delete(&req->pt);
+++        if (req->fd != -1)
+++            close(req->fd);
+++        free(req);
+++    }
+++}
+++
+++struct media_pool * media_pool_new(const char * const media_path,
+++                   struct pollqueue * const pq,
+++                   const unsigned int n)
+++{
+++    struct media_pool * const mp = calloc(1, sizeof(*mp));
+++    unsigned int i;
+++
+++    if (!mp)
+++        goto fail0;
+++
+++    mp->pq = pq;
+++    pthread_mutex_init(&mp->lock, NULL);
+++    mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
+++    if (mp->fd == -1) {
+++        request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
+++        goto fail1;
+++    }
+++
+++    for (i = 0; i != n; ++i) {
+++        struct media_request * req = malloc(sizeof(*req));
+++        if (!req)
+++            goto fail4;
+++
+++        *req = (struct media_request){
+++            .next = mp->free_reqs,
+++            .mp = mp,
+++            .fd = -1
+++        };
+++        mp->free_reqs = req;
+++
+++        if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
+++            request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
+++            goto fail4;
+++        }
+++
+++        req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
+++        if (!req->pt)
+++            goto fail4;
+++    }
+++
+++    sem_init(&mp->sem, 0, n);
+++
+++    return mp;
+++
+++fail4:
+++    delete_req_chain(mp->free_reqs);
+++    close(mp->fd);
+++    pthread_mutex_destroy(&mp->lock);
+++fail1:
+++    free(mp);
+++fail0:
+++    return NULL;
+++}
+++
+++void media_pool_delete(struct media_pool ** pMp)
+++{
+++    struct media_pool * const mp = *pMp;
+++
+++    if (!mp)
+++        return;
+++    *pMp = NULL;
+++
+++    delete_req_chain(mp->free_reqs);
+++    close(mp->fd);
+++    sem_destroy(&mp->sem);
+++    pthread_mutex_destroy(&mp->lock);
+++    free(mp);
+++}
+++
+++
+++#define INDEX_UNSET (~(uint32_t)0)
+++
+++enum qent_status {
+++    QENT_NEW = 0,       // Initial state - shouldn't last
+++    QENT_FREE,          // On free chain
+++    QENT_PENDING,       // User has ent
+++    QENT_WAITING,       // On inuse
+++    QENT_DONE,          // Frame rx
+++    QENT_ERROR,         // Error
+++    QENT_IMPORT
+++};
+++
+++struct qent_base {
+++    atomic_int ref_count;
+++    struct qent_base *next;
+++    struct qent_base *prev;
+++    enum qent_status status;
+++    enum mediabufs_memory memtype;
+++    uint32_t index;
+++    struct dmabuf_h *dh[VIDEO_MAX_PLANES];
+++    struct timeval timestamp;
+++};
+++
+++struct qent_src {
+++    struct qent_base base;
+++    int fixed_size;
+++};
+++
+++struct qent_dst {
+++    struct qent_base base;
+++    bool waiting;
+++    pthread_mutex_t lock;
+++    pthread_cond_t cond;
+++    struct ff_weak_link_client * mbc_wl;
+++};
+++
+++struct qe_list_head {
+++    struct qent_base *head;
+++    struct qent_base *tail;
+++};
+++
+++struct buf_pool {
+++    enum mediabufs_memory memtype;
+++    pthread_mutex_t lock;
+++    sem_t free_sem;
+++    struct qe_list_head free;
+++    struct qe_list_head inuse;
+++};
+++
+++
+++static inline struct qent_dst *base_to_dst(struct qent_base *be)
+++{
+++    return (struct qent_dst *)be;
+++}
+++
+++static inline struct qent_src *base_to_src(struct qent_base *be)
+++{
+++    return (struct qent_src *)be;
+++}
+++
+++
+++#define QENT_BASE_INITIALIZER(mtype) {\
+++    .ref_count = ATOMIC_VAR_INIT(0),\
+++    .status = QENT_NEW,\
+++    .memtype = (mtype),\
+++    .index  = INDEX_UNSET\
+++}
+++
+++static void qe_base_uninit(struct qent_base *const be)
+++{
+++    unsigned int i;
+++    for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
+++        dmabuf_free(be->dh[i]);
+++        be->dh[i] = NULL;
+++    }
+++}
+++
+++static void qe_src_free(struct qent_src *const be_src)
+++{
+++    if (!be_src)
+++        return;
+++    qe_base_uninit(&be_src->base);
+++    free(be_src);
+++}
+++
+++static struct qent_src * qe_src_new(enum mediabufs_memory mtype)
+++{
+++    struct qent_src *const be_src = malloc(sizeof(*be_src));
+++    if (!be_src)
+++        return NULL;
+++    *be_src = (struct qent_src){
+++        .base = QENT_BASE_INITIALIZER(mtype)
+++    };
+++    return be_src;
+++}
+++
+++static void qe_dst_free(struct qent_dst *const be_dst)
+++{
+++    if (!be_dst)
+++        return;
+++
+++    ff_weak_link_unref(&be_dst->mbc_wl);
+++    pthread_cond_destroy(&be_dst->cond);
+++    pthread_mutex_destroy(&be_dst->lock);
+++    qe_base_uninit(&be_dst->base);
+++    free(be_dst);
+++}
+++
+++static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype)
+++{
+++    struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
+++    if (!be_dst)
+++        return NULL;
+++    *be_dst = (struct qent_dst){
+++        .base = QENT_BASE_INITIALIZER(memtype),
+++        .lock = PTHREAD_MUTEX_INITIALIZER,
+++        .cond = PTHREAD_COND_INITIALIZER,
+++        .mbc_wl = ff_weak_link_ref(wl)
+++    };
+++    return be_dst;
+++}
+++
+++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
+++{
+++    if (ql->tail)
+++        ql->tail->next = be;
+++    else
+++        ql->head = be;
+++    be->prev = ql->tail;
+++    be->next = NULL;
+++    ql->tail = be;
+++}
+++
+++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
+++{
+++    if (!be)
+++        return NULL;
+++
+++    if (be->next)
+++        be->next->prev = be->prev;
+++    else
+++        ql->tail = be->prev;
+++    if (be->prev)
+++        be->prev->next = be->next;
+++    else
+++        ql->head = be->next;
+++    be->next = NULL;
+++    be->prev = NULL;
+++    return be;
+++}
+++
+++
+++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
+++{
+++    ql_add_tail(&bp->free, be);
+++}
+++
+++static struct qent_base * bq_get_free(struct buf_pool *const bp)
+++{
+++    return ql_extract(&bp->free, bp->free.head);
+++}
+++
+++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
+++{
+++    return ql_extract(&bp->inuse, be);
+++}
+++
+++static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
+++{
+++    return ql_extract(&bp->inuse, bp->inuse.head);
+++}
+++
+++static void bq_free_all_free_src(struct buf_pool *const bp)
+++{
+++    struct qent_base *be;
+++    while ((be = bq_get_free(bp)) != NULL)
+++        qe_src_free(base_to_src(be));
+++}
+++
+++static void bq_free_all_inuse_src(struct buf_pool *const bp)
+++{
+++    struct qent_base *be;
+++    while ((be = bq_get_inuse(bp)) != NULL)
+++        qe_src_free(base_to_src(be));
+++}
+++
+++static void bq_free_all_free_dst(struct buf_pool *const bp)
+++{
+++    struct qent_base *be;
+++    while ((be = bq_get_free(bp)) != NULL)
+++        qe_dst_free(base_to_dst(be));
+++}
+++
+++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
+++{
+++    unsigned int i;
+++
+++    pthread_mutex_lock(&bp->lock);
+++    /* Clear out state vars */
+++    be->timestamp.tv_sec = 0;
+++    be->timestamp.tv_usec = 0;
+++    be->status = QENT_FREE;
+++    for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
+++        dmabuf_len_set(be->dh[i], 0);
+++    bq_put_free(bp, be);
+++    pthread_mutex_unlock(&bp->lock);
+++    sem_post(&bp->free_sem);
+++}
+++
+++static bool queue_is_inuse(const struct buf_pool *const bp)
+++{
+++    return bp->inuse.tail != NULL;
+++}
+++
+++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
+++{
+++    if (!be)
+++        return;
+++    pthread_mutex_lock(&bp->lock);
+++    ql_add_tail(&bp->inuse, be);
+++    be->status = QENT_WAITING;
+++    pthread_mutex_unlock(&bp->lock);
+++}
+++
+++static struct qent_base *queue_get_free(struct buf_pool *const bp)
+++{
+++    struct qent_base *buf;
+++
+++    if (do_wait(&bp->free_sem))
+++        return NULL;
+++    pthread_mutex_lock(&bp->lock);
+++    buf = bq_get_free(bp);
+++    pthread_mutex_unlock(&bp->lock);
+++    return buf;
+++}
+++
+++static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
+++{
+++    struct qent_base *buf;
+++
+++    if (do_trywait(&bp->free_sem))
+++        return NULL;
+++    pthread_mutex_lock(&bp->lock);
+++    buf = bq_get_free(bp);
+++    pthread_mutex_unlock(&bp->lock);
+++    return buf;
+++}
+++
+++static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index)
+++{
+++    struct qent_base *be;
+++
+++    pthread_mutex_lock(&bp->lock);
+++    /* Expect 1st in Q, but allow anywhere */
+++    for (be = bp->inuse.head; be; be = be->next) {
+++        if (be->index == index) {
+++            bq_extract_inuse(bp, be);
+++            break;
+++        }
+++    }
+++    pthread_mutex_unlock(&bp->lock);
+++
+++    return be;
+++}
+++
+++static void queue_delete(struct buf_pool *const bp)
+++{
+++    sem_destroy(&bp->free_sem);
+++    pthread_mutex_destroy(&bp->lock);
+++    free(bp);
+++}
+++
+++static struct buf_pool* queue_new(const int vfd)
+++{
+++    struct buf_pool *bp = calloc(1, sizeof(*bp));
+++    if (!bp)
+++        return NULL;
+++    pthread_mutex_init(&bp->lock, NULL);
+++    sem_init(&bp->free_sem, 0, 0);
+++    return bp;
+++}
+++
+++
+++struct mediabufs_ctl {
+++    atomic_int ref_count;  /* 0 is single ref for easier atomics */
+++    void * dc;
+++    int vfd;
+++    bool stream_on;
+++    bool polling;
+++    bool dst_fixed;             // Dst Q is fixed size
+++    pthread_mutex_t lock;
+++    struct buf_pool * src;
+++    struct buf_pool * dst;
+++    struct polltask * pt;
+++    struct pollqueue * pq;
+++    struct ff_weak_link_master * this_wlm;
+++
+++    enum mediabufs_memory src_memtype;
+++    enum mediabufs_memory dst_memtype;
+++    struct v4l2_format src_fmt;
+++    struct v4l2_format dst_fmt;
+++    struct v4l2_capability capability;
+++};
+++
+++static int qe_v4l2_queue(struct qent_base *const be,
+++               const int vfd, struct media_request *const mreq,
+++               const struct v4l2_format *const fmt,
+++               const bool is_dst, const bool hold_flag)
+++{
+++    struct v4l2_buffer buffer = {
+++        .type = fmt->type,
+++        .memory = mediabufs_memory_to_v4l2(be->memtype),
+++        .index = be->index
+++    };
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        unsigned int i;
+++        for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
+++            if (is_dst)
+++                dmabuf_len_set(be->dh[i], 0);
+++
+++            /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
+++            planes[i].length = dmabuf_size(be->dh[i]);
+++            planes[i].bytesused = dmabuf_len(be->dh[i]);
+++            if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
+++                planes[i].m.fd = dmabuf_fd(be->dh[i]);
+++            else
+++                planes[i].m.mem_offset = 0;
+++        }
+++        buffer.m.planes = planes;
+++        buffer.length = i;
+++    }
+++    else {
+++        if (is_dst)
+++            dmabuf_len_set(be->dh[0], 0);
+++
+++        buffer.bytesused = dmabuf_len(be->dh[0]);
+++        buffer.length = dmabuf_size(be->dh[0]);
+++        if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
+++            buffer.m.fd = dmabuf_fd(be->dh[0]);
+++        else
+++            buffer.m.offset = 0;
+++    }
+++
+++    if (!is_dst && mreq) {
+++        buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
+++        buffer.request_fd = media_request_fd(mreq);
+++        if (hold_flag)
+++            buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+++    }
+++
+++    if (is_dst)
+++        be->timestamp = (struct timeval){0,0};
+++
+++    buffer.timestamp = be->timestamp;
+++
+++    while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
+++        const int err = errno;
+++        if (err != EINTR) {
+++            request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
+++            return -err;
+++        }
+++    }
+++    return 0;
+++}
+++
+++static struct qent_base * qe_dequeue(struct buf_pool *const bp,
+++                     const int vfd,
+++                     const struct v4l2_format * const f)
+++{
+++    struct qent_base *be;
+++    int rc;
+++    const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
+++    struct v4l2_buffer buffer = {
+++        .type =  f->type,
+++        .memory = mediabufs_memory_to_v4l2(bp->memtype)
+++    };
+++    if (mp) {
+++        buffer.length = f->fmt.pix_mp.num_planes;
+++        buffer.m.planes = planes;
+++    }
+++
+++    while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
+++           errno == EINTR)
+++        /* Loop */;
+++    if (rc) {
+++        request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
+++        return NULL;
+++    }
+++
+++    be = queue_find_extract_index(bp, buffer.index);
+++    if (!be) {
+++        request_log("Failed to find index %d in Q\n", buffer.index);
+++        return NULL;
+++    }
+++
+++    if (mp) {
+++        unsigned int i;
+++        for (i = 0; i != buffer.length; ++i)
+++            dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0);
+++    }
+++    else
+++        dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0);
+++
+++    be->timestamp = buffer.timestamp;
+++    be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
+++    return be;
+++}
+++
+++static void qe_dst_done(struct qent_dst * dst_be)
+++{
+++    pthread_mutex_lock(&dst_be->lock);
+++    dst_be->waiting = false;
+++    pthread_cond_broadcast(&dst_be->cond);
+++    pthread_mutex_unlock(&dst_be->lock);
+++
+++    qent_dst_unref(&dst_be);
+++}
+++
+++static bool qe_dst_waiting(struct qent_dst *const dst_be)
+++{
+++    bool waiting;
+++    pthread_mutex_lock(&dst_be->lock);
+++    waiting = dst_be->waiting;
+++    dst_be->waiting = true;
+++    pthread_mutex_unlock(&dst_be->lock);
+++    return waiting;
+++}
+++
+++
+++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
+++{
+++    return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
+++}
+++
+++static void mediabufs_poll_cb(void * v, short revents)
+++{
+++    struct mediabufs_ctl *mbc = v;
+++    struct qent_src *src_be = NULL;
+++    struct qent_dst *dst_be = NULL;
+++
+++    if (!revents)
+++        request_err(mbc->dc, "%s: Timeout\n", __func__);
+++
+++    pthread_mutex_lock(&mbc->lock);
+++    mbc->polling = false;
+++
+++    if ((revents & POLLOUT) != 0)
+++        src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
+++    if ((revents & POLLIN) != 0)
+++        dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
+++
+++    /* Reschedule */
+++    if (mediabufs_wants_poll(mbc)) {
+++        mbc->polling = true;
+++        pollqueue_add_task(mbc->pt, 2000);
+++    }
+++    pthread_mutex_unlock(&mbc->lock);
+++
+++    if (src_be)
+++        queue_put_free(mbc->src, &src_be->base);
+++    if (dst_be)
+++        qe_dst_done(dst_be);
+++}
+++
+++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
+++{
+++    struct qent_base *const be = &be_src->base;
+++
+++    be->timestamp = *timestamp;
+++    return 0;
+++}
+++
+++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
+++{
+++    return be_dst->base.timestamp;
+++}
+++
+++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
+++{
+++    if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
+++        size_t newsize = round_up_size(len);
+++        request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
+++        if (!dbsc) {
+++            request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
+++            return -ENOMEM;
+++        }
+++        if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
+++            request_log("%s: Realloc %zd failed\n", __func__, newsize);
+++            return -ENOMEM;
+++        }
+++    }
+++    return 0;
+++}
+++
+++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
+++{
+++    struct qent_base *const be = &be_src->base;
+++    return qent_base_realloc(be, len, dbsc);
+++}
+++
+++
+++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
+++{
+++    void * dst;
+++    struct qent_base *const be = &be_src->base;
+++    int rv;
+++
+++    // Realloc doesn't copy so don't alloc if offset != 0
+++    if ((rv = qent_base_realloc(be, offset + len,
+++                                be_src->fixed_size || offset ? NULL : dbsc)) != 0)
+++        return rv;
+++
+++    dmabuf_write_start(be->dh[0]);
+++    dst = dmabuf_map(be->dh[0]);
+++    if (!dst)
+++        return -1;
+++    memcpy((char*)dst + offset, src, len);
+++    dmabuf_len_set(be->dh[0], len);
+++    dmabuf_write_end(be->dh[0]);
+++    return 0;
+++}
+++
+++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
+++{
+++    const struct qent_base *const be = &be_dst->base;
+++
+++    return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
+++}
+++
+++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
+++{
+++    return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
+++}
+++
+++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
+++                struct media_request **const pmreq,
+++                struct qent_src **const psrc_be,
+++                struct qent_dst *const dst_be,
+++                const bool is_final)
+++{
+++    struct media_request * mreq = *pmreq;
+++    struct qent_src *const src_be = *psrc_be;
+++
+++    // Req & src are always both "consumed"
+++    *pmreq = NULL;
+++    *psrc_be = NULL;
+++
+++    pthread_mutex_lock(&mbc->lock);
+++
+++    if (!src_be)
+++        goto fail1;
+++
+++    if (dst_be) {
+++        if (qe_dst_waiting(dst_be)) {
+++            request_info(mbc->dc, "Request buffer already waiting on start\n");
+++            goto fail1;
+++        }
+++        dst_be->base.timestamp = (struct timeval){0,0};
+++        if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
+++            goto fail1;
+++
+++        qent_dst_ref(dst_be);
+++        queue_put_inuse(mbc->dst, &dst_be->base);
+++    }
+++
+++    if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
+++        goto fail1;
+++    queue_put_inuse(mbc->src, &src_be->base);
+++
+++    if (!mbc->polling && mediabufs_wants_poll(mbc)) {
+++        mbc->polling = true;
+++        pollqueue_add_task(mbc->pt, 2000);
+++    }
+++    pthread_mutex_unlock(&mbc->lock);
+++
+++    if (media_request_start(mreq))
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++
+++    return MEDIABUFS_STATUS_SUCCESS;
+++
+++fail1:
+++    media_request_abort(&mreq);
+++    if (src_be)
+++        queue_put_free(mbc->src, &src_be->base);
+++
+++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
+++    if (dst_be) {
+++        dst_be->base.status = QENT_ERROR;
+++        qe_dst_done(dst_be);
+++    }
+++    pthread_mutex_unlock(&mbc->lock);
+++    return MEDIABUFS_ERROR_OPERATION_FAILED;
+++}
+++
+++
+++static int qe_alloc_from_fmt(struct qent_base *const be,
+++                   struct dmabufs_ctl *const dbsc,
+++                   const struct v4l2_format *const fmt)
+++{
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        unsigned int i;
+++        for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
+++            be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
+++                fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
+++            /* On failure tidy up and die */
+++            if (!be->dh[i]) {
+++                while (i--) {
+++                    dmabuf_free(be->dh[i]);
+++                    be->dh[i] = NULL;
+++                }
+++                return -1;
+++            }
+++        }
+++    }
+++    else {
+++//      be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
+++        size_t size = fmt->fmt.pix.sizeimage;
+++        be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
+++        if (!be->dh[0])
+++            return -1;
+++    }
+++    return 0;
+++}
+++
+++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
+++            const enum v4l2_buf_type buftype,
+++            uint32_t pixfmt,
+++            const unsigned int width, const unsigned int height,
+++                               const size_t bufsize)
+++{
+++    *fmt = (struct v4l2_format){.type = buftype};
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
+++        fmt->fmt.pix_mp.width = width;
+++        fmt->fmt.pix_mp.height = height;
+++        fmt->fmt.pix_mp.pixelformat = pixfmt;
+++        if (bufsize) {
+++            fmt->fmt.pix_mp.num_planes = 1;
+++            fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
+++        }
+++    }
+++    else {
+++        fmt->fmt.pix.width = width;
+++        fmt->fmt.pix.height = height;
+++        fmt->fmt.pix.pixelformat = pixfmt;
+++        fmt->fmt.pix.sizeimage = bufsize;
+++    }
+++
+++    while (ioctl(fd, VIDIOC_S_FMT, fmt))
+++        if (errno != EINTR)
+++            return MEDIABUFS_ERROR_OPERATION_FAILED;
+++
+++    // Treat anything where we don't get at least what we asked for as a fail
+++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
+++        if (fmt->fmt.pix_mp.width < width ||
+++            fmt->fmt.pix_mp.height < height ||
+++            fmt->fmt.pix_mp.pixelformat != pixfmt) {
+++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
+++        }
+++    }
+++    else {
+++        if (fmt->fmt.pix.width < width ||
+++            fmt->fmt.pix.height < height ||
+++            fmt->fmt.pix.pixelformat != pixfmt) {
+++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
+++        }
+++    }
+++
+++    return MEDIABUFS_STATUS_SUCCESS;
+++}
+++
+++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
+++                   const int fd,
+++                   const unsigned int type_v4l2,
+++                   const uint32_t flags_must,
+++                   const uint32_t flags_not,
+++                   const unsigned int width,
+++                   const unsigned int height,
+++                   mediabufs_dst_fmt_accept_fn *const accept_fn,
+++                   void *const accept_v)
+++{
+++    unsigned int i;
+++
+++    for (i = 0;; ++i) {
+++        struct v4l2_fmtdesc fmtdesc = {
+++            .index = i,
+++            .type = type_v4l2
+++        };
+++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
+++            if (errno != EINTR)
+++                return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
+++        }
+++        if ((fmtdesc.flags & flags_must) != flags_must ||
+++            (fmtdesc.flags & flags_not))
+++            continue;
+++        if (!accept_fn(accept_v, &fmtdesc))
+++            continue;
+++
+++        if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
+++                width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
+++            return MEDIABUFS_STATUS_SUCCESS;
+++    }
+++    return 0;
+++}
+++
+++
+++/* Wait for qent done */
+++
+++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
+++{
+++    struct qent_base *const be = &be_dst->base;
+++    enum qent_status estat;
+++
+++    pthread_mutex_lock(&be_dst->lock);
+++    while (be_dst->waiting &&
+++           !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
+++        /* Loop */;
+++    estat = be->status;
+++    pthread_mutex_unlock(&be_dst->lock);
+++
+++    return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
+++        estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
+++            MEDIABUFS_ERROR_OPERATION_FAILED;
+++}
+++
+++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
+++{
+++    struct qent_base *const be = &be_dst->base;
+++    return dmabuf_map(be->dh[buf_no]);
+++}
+++
+++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
+++{
+++    struct qent_base *const be = &be_dst->base;
+++    unsigned int i;
+++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
+++        if (dmabuf_read_start(be->dh[i])) {
+++            while (i--)
+++                dmabuf_read_end(be->dh[i]);
+++            return MEDIABUFS_ERROR_ALLOCATION_FAILED;
+++        }
+++    }
+++    return MEDIABUFS_STATUS_SUCCESS;
+++}
+++
+++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
+++{
+++    struct qent_base *const be = &be_dst->base;
+++    unsigned int i;
+++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
+++
+++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
+++        if (dmabuf_read_end(be->dh[i]))
+++            status = MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++    return status;
+++}
+++
+++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
+++{
+++    if (be_dst)
+++        atomic_fetch_add(&be_dst->base.ref_count, 1);
+++    return be_dst;
+++}
+++
+++void qent_dst_unref(struct qent_dst ** const pbe_dst)
+++{
+++    struct qent_dst * const be_dst = *pbe_dst;
+++    struct mediabufs_ctl * mbc;
+++    if (!be_dst)
+++        return;
+++    *pbe_dst = NULL;
+++
+++    if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
+++        return;
+++
+++    if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
+++        queue_put_free(mbc->dst, &be_dst->base);
+++        ff_weak_link_unlock(be_dst->mbc_wl);
+++    }
+++    else {
+++        qe_dst_free(be_dst);
+++    }
+++}
+++
+++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
+++                unsigned int plane,
+++                int fd, size_t size)
+++{
+++    struct qent_base *const be = &be_dst->base;
+++    struct dmabuf_h * dh;
+++
+++    if (be->status != QENT_IMPORT || be->dh[plane])
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++
+++    dh = dmabuf_import(fd, size);
+++    if (!dh)
+++        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
+++
+++    be->dh[plane] = dh;
+++    return MEDIABUFS_STATUS_SUCCESS;
+++}
+++
+++// Returns noof buffers created, -ve for error
+++static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[])
+++{
+++    unsigned int i;
+++
+++    struct v4l2_create_buffers cbuf = {
+++        .count = n,
+++        .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype),
+++        .format = mbc->dst_fmt,
+++    };
+++
+++    while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
+++        const int err = -errno;
+++        if (err != EINTR) {
+++            request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
+++            return -err;
+++        }
+++    }
+++
+++    if (cbuf.count != n)
+++        request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n);
+++
+++    for (i = 0; i != cbuf.count; ++i)
+++        qes[i]->base.index = cbuf.index + i;
+++
+++    return cbuf.count;
+++}
+++
+++static MediaBufsStatus
+++qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt,
+++                   const unsigned int n, const bool x_dmabuf)
+++{
+++    struct v4l2_buffer buf = {
+++        .index = n,
+++        .type = fmt->type,
+++    };
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+++    int ret;
+++
+++    if (be->dh[0])
+++        return 0;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        memset(planes, 0, sizeof(planes));
+++        buf.m.planes = planes;
+++        buf.length = VIDEO_MAX_PLANES;
+++    }
+++
+++    if ((ret = ioctl(mbc->vfd, VIDIOC_QUERYBUF, &buf)) != 0) {
+++        request_err(mbc->dc, "VIDIOC_QUERYBUF failed");
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type))
+++    {
+++        unsigned int i;
+++        for (i = 0; i != buf.length; ++i) {
+++            if (x_dmabuf) {
+++                struct v4l2_exportbuffer xbuf = {
+++                    .type = buf.type,
+++                    .index = buf.index,
+++                    .plane = i,
+++                    .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
+++                };
+++                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
+++                    be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length);
+++            }
+++            else {
+++                be->dh[i] = dmabuf_import_mmap(
+++                    mmap(NULL, planes[i].length,
+++                        PROT_READ | PROT_WRITE,
+++                        MAP_SHARED | MAP_POPULATE,
+++                        mbc->vfd, planes[i].m.mem_offset),
+++                    planes[i].length);
+++            }
+++            /* On failure tidy up and die */
+++            if (!be->dh[i]) {
+++                while (i--) {
+++                    dmabuf_free(be->dh[i]);
+++                    be->dh[i] = NULL;
+++                }
+++                return MEDIABUFS_ERROR_OPERATION_FAILED;
+++            }
+++        }
+++    }
+++    else
+++    {
+++        if (x_dmabuf) {
+++            struct v4l2_exportbuffer xbuf = {
+++                .type = buf.type,
+++                .index = buf.index,
+++                .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
+++            };
+++            if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
+++                be->dh[0] = dmabuf_import(xbuf.fd, buf.length);
+++        }
+++        else {
+++            be->dh[0] = dmabuf_import_mmap(
+++                mmap(NULL, buf.length,
+++                    PROT_READ | PROT_WRITE,
+++                    MAP_SHARED | MAP_POPULATE,
+++                    mbc->vfd, buf.m.offset),
+++                buf.length);
+++        }
+++        /* On failure tidy up and die */
+++        if (!be->dh[0]) {
+++            return MEDIABUFS_ERROR_OPERATION_FAILED;
+++        }
+++    }
+++
+++    return 0;
+++}
+++
+++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
+++{
+++    struct qent_dst * be_dst;
+++
+++    if (mbc == NULL) {
+++        be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF);
+++        if (be_dst)
+++            be_dst->base.status = QENT_IMPORT;
+++        return be_dst;
+++    }
+++
+++    if (mbc->dst_fixed) {
+++        be_dst = base_to_dst(queue_get_free(mbc->dst));
+++        if (!be_dst)
+++            return NULL;
+++    }
+++    else {
+++        be_dst = base_to_dst(queue_tryget_free(mbc->dst));
+++        if (!be_dst) {
+++            be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype);
+++            if (!be_dst)
+++                return NULL;
+++
+++            if (create_dst_bufs(mbc, 1, &be_dst) != 1) {
+++                qe_dst_free(be_dst);
+++                return NULL;
+++            }
+++        }
+++    }
+++
+++    if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) {
+++        if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) {
+++            request_err(mbc->dc, "Failed to export as dmabuf\n");
+++            queue_put_free(mbc->dst, &be_dst->base);
+++            return NULL;
+++        }
+++    }
+++    else {
+++        if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
+++            /* Given  how create buf works we can't uncreate it on alloc failure
+++             * all we can do is put it on the free Q
+++            */
+++            queue_put_free(mbc->dst, &be_dst->base);
+++            return NULL;
+++        }
+++    }
+++
+++    be_dst->base.status = QENT_PENDING;
+++    atomic_store(&be_dst->base.ref_count, 0);
+++    return be_dst;
+++}
+++
+++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
+++{
+++    return &mbc->dst_fmt;
+++}
+++
+++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
+++               const unsigned int width,
+++               const unsigned int height,
+++               mediabufs_dst_fmt_accept_fn *const accept_fn,
+++               void *const accept_v)
+++{
+++    MediaBufsStatus status;
+++    unsigned int i;
+++    const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
+++    static const struct {
+++        unsigned int flags_must;
+++        unsigned int flags_not;
+++    } trys[] = {
+++        {0, V4L2_FMT_FLAG_EMULATED},
+++        {V4L2_FMT_FLAG_EMULATED, 0},
+++    };
+++    for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
+++        status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
+++                                buf_type,
+++                                trys[i].flags_must,
+++                                trys[i].flags_not,
+++                                width, height, accept_fn, accept_v);
+++        if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
+++            return status;
+++    }
+++
+++    if (status != MEDIABUFS_STATUS_SUCCESS)
+++        return status;
+++
+++    /* Try to create a buffer - don't alloc */
+++    return status;
+++}
+++
+++// ** This is a mess if we get partial alloc but without any way to remove
+++//    individual V4L2 Q members we are somewhat stuffed
+++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype)
+++{
+++    unsigned int i;
+++    int a = 0;
+++    unsigned int qc;
+++    struct qent_dst * qes[32];
+++
+++    if (n > 32)
+++        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
+++
+++    mbc->dst->memtype = memtype;
+++
+++    // Create qents first as it is hard to get rid of the V4L2 buffers on error
+++    for (qc = 0; qc != n; ++qc)
+++    {
+++        if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL)
+++            goto fail;
+++    }
+++
+++    if ((a = create_dst_bufs(mbc, n, qes)) < 0)
+++        goto fail;
+++
+++    for (i = 0; i != a; ++i)
+++        queue_put_free(mbc->dst, &qes[i]->base);
+++
+++    if (a != n)
+++        goto fail;
+++
+++    mbc->dst_fixed = fixed;
+++    return MEDIABUFS_STATUS_SUCCESS;
+++
+++fail:
+++    for (i = (a < 0 ? 0 : a); i != qc; ++i)
+++        qe_dst_free(qes[i]);
+++
+++    return MEDIABUFS_ERROR_ALLOCATION_FAILED;
+++}
+++
+++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
+++{
+++    struct qent_base * buf = queue_get_free(mbc->src);
+++    buf->status = QENT_PENDING;
+++    return base_to_src(buf);
+++}
+++
+++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
+++{
+++    struct qent_src *const qe_src = *pqe_src;
+++    if (!qe_src)
+++        return;
+++    *pqe_src = NULL;
+++    queue_put_free(mbc->src, &qe_src->base);
+++}
+++
+++static MediaBufsStatus
+++chk_memory_type(struct mediabufs_ctl *const mbc,
+++    const struct v4l2_format * const f,
+++    const enum mediabufs_memory m)
+++{
+++    struct v4l2_create_buffers cbuf = {
+++        .count = 0,
+++        .memory = V4L2_MEMORY_MMAP,
+++        .format = *f
+++    };
+++
+++    if (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf) != 0)
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++
+++    switch (m) {
+++    case MEDIABUFS_MEMORY_DMABUF:
+++        // 0 = Unknown but assume not in that case
+++        if ((cbuf.capabilities & V4L2_BUF_CAP_SUPPORTS_DMABUF) == 0)
+++            return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
+++        break;
+++    case MEDIABUFS_MEMORY_MMAP:
+++        break;
+++    default:
+++        return MEDIABUFS_ERROR_UNSUPPORTED_MEMORY;
+++    }
+++
+++    return MEDIABUFS_STATUS_SUCCESS;
+++}
+++
+++MediaBufsStatus
+++mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
+++{
+++    return chk_memory_type(mbc, &mbc->src_fmt, memtype);
+++}
+++
+++MediaBufsStatus
+++mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype)
+++{
+++    return chk_memory_type(mbc, &mbc->dst_fmt, memtype);
+++}
+++
+++/* src format must have been set up before this */
+++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
+++                  struct dmabufs_ctl * const dbsc,
+++                  unsigned int n, const enum mediabufs_memory memtype)
+++{
+++    unsigned int i;
+++    struct v4l2_requestbuffers req = {
+++        .count = n,
+++        .type = mbc->src_fmt.type,
+++        .memory = mediabufs_memory_to_v4l2(memtype)
+++    };
+++
+++    bq_free_all_free_src(mbc->src);
+++
+++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
+++        if (errno != EINTR) {
+++            request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
+++            return MEDIABUFS_ERROR_OPERATION_FAILED;
+++        }
+++    }
+++
+++    if (n > req.count) {
+++        request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
+++        n = req.count;
+++    }
+++
+++    for (i = 0; i != n; ++i) {
+++        struct qent_src *const be_src = qe_src_new(memtype);
+++        if (!be_src) {
+++            request_err(mbc->dc, "Failed to create src be %d\n", i);
+++            goto fail;
+++        }
+++        switch (memtype) {
+++        case MEDIABUFS_MEMORY_MMAP:
+++            if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) {
+++                qe_src_free(be_src);
+++                goto fail;
+++            }
+++            be_src->fixed_size = 1;
+++            break;
+++        case MEDIABUFS_MEMORY_DMABUF:
+++            if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
+++                qe_src_free(be_src);
+++                goto fail;
+++            }
+++            be_src->fixed_size = !mediabufs_src_resizable(mbc);
+++            break;
+++        default:
+++            request_err(mbc->dc, "Unexpected memorty type\n");
+++            goto fail;
+++        }
+++        be_src->base.index = i;
+++
+++        queue_put_free(mbc->src, &be_src->base);
+++    }
+++
+++    mbc->src->memtype = memtype;
+++    return MEDIABUFS_STATUS_SUCCESS;
+++
+++fail:
+++    bq_free_all_free_src(mbc->src);
+++    req.count = 0;
+++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
+++           errno == EINTR)
+++        /* Loop */;
+++
+++    return MEDIABUFS_ERROR_OPERATION_FAILED;
+++}
+++
+++
+++
+++/*
+++ * Set stuff order:
+++ *  Set src fmt
+++ *  Set parameters (sps) on vfd
+++ *  Negotiate dst format (dst_fmt_set)
+++ *  Create src buffers
+++ *  Alloc a dst buffer or Create dst slots
+++*/
+++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
+++{
+++    if (mbc->stream_on)
+++        return MEDIABUFS_STATUS_SUCCESS;
+++
+++    if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
+++        request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++
+++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
+++        request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
+++        set_stream(mbc->vfd, mbc->src_fmt.type, false);
+++        return MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++
+++    mbc->stream_on = true;
+++    return MEDIABUFS_STATUS_SUCCESS;
+++}
+++
+++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
+++{
+++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
+++
+++    if (!mbc->stream_on)
+++        return MEDIABUFS_STATUS_SUCCESS;
+++
+++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
+++        request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
+++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++
+++    if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
+++        request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
+++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
+++    }
+++
+++    mbc->stream_on = false;
+++    return status;
+++}
+++
+++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
+++{
+++    struct v4l2_ext_controls controls = {
+++        .controls = control_array,
+++        .count = n
+++    };
+++
+++    if (mreq) {
+++        controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
+++        controls.request_fd = media_request_fd(mreq);
+++    }
+++
+++    while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
+++    {
+++        const int err = errno;
+++        if (err != EINTR) {
+++            request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
+++            return -err;
+++        }
+++    }
+++
+++    return 0;
+++}
+++
+++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
+++                struct media_request * const mreq,
+++                unsigned int id, void *data,
+++                unsigned int size)
+++{
+++    struct v4l2_ext_control control = {
+++        .id = id,
+++        .ptr = data,
+++        .size = size
+++    };
+++
+++    int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
+++    return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
+++}
+++
+++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
+++                                      enum v4l2_buf_type buf_type,
+++                   const uint32_t pixfmt,
+++                   const uint32_t width, const uint32_t height,
+++                                      const size_t bufsize)
+++{
+++    MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
+++    if (rv != MEDIABUFS_STATUS_SUCCESS)
+++        request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
+++
+++    return rv;
+++}
+++
+++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
+++{
+++    int rv = 0;
+++    while (n--) {
+++        while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
+++            const int err = errno;
+++            if (err != EINTR) {
+++                // Often used for probing - errors are to be expected
+++                request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
+++                ctrls->type = 0; // 0 is invalid
+++                rv = -err;
+++                break;
+++            }
+++        }
+++        ++ctrls;
+++    }
+++    return rv;
+++}
+++
+++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
+++{
+++#if 1
+++    return 0;
+++#else
+++    // Single planar OUTPUT can only take exact size buffers
+++    // Multiplanar will take larger than negotiated
+++    return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
+++#endif
+++}
+++
+++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
+++{
+++    if (!mbc)
+++        return;
+++
+++    // Break the weak link first
+++    ff_weak_link_break(&mbc->this_wlm);
+++
+++    polltask_delete(&mbc->pt);
+++
+++    mediabufs_stream_off(mbc);
+++
+++    // Empty v4l2 buffer stash
+++    request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
+++    request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
+++
+++    bq_free_all_free_src(mbc->src);
+++    bq_free_all_inuse_src(mbc->src);
+++    bq_free_all_free_dst(mbc->dst);
+++
+++    {
+++        struct qent_dst *dst_be;
+++        while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
+++            dst_be->base.timestamp = (struct timeval){0};
+++            dst_be->base.status = QENT_ERROR;
+++            qe_dst_done(dst_be);
+++        }
+++    }
+++
+++    queue_delete(mbc->dst);
+++    queue_delete(mbc->src);
+++    close(mbc->vfd);
+++    pthread_mutex_destroy(&mbc->lock);
+++
+++    free(mbc);
+++}
+++
+++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
+++{
+++    atomic_fetch_add(&mbc->ref_count, 1);
+++    return mbc;
+++}
+++
+++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
+++{
+++    struct mediabufs_ctl *const mbc = *pmbc;
+++    int n;
+++
+++    if (!mbc)
+++        return;
+++    *pmbc = NULL;
+++    n = atomic_fetch_sub(&mbc->ref_count, 1);
+++    if (n)
+++        return;
+++    mediabufs_ctl_delete(mbc);
+++}
+++
+++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc)
+++{
+++    return mbc->capability.version;
+++}
+++
+++static int set_capabilities(struct mediabufs_ctl *const mbc)
+++{
+++    uint32_t caps;
+++
+++    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) {
+++        int err = errno;
+++        request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
+++        return -err;
+++    }
+++
+++    caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
+++            mbc->capability.device_caps :
+++            mbc->capability.capabilities;
+++
+++    if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
+++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+++    }
+++    else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
+++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+++    }
+++    else {
+++        request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
+++        return -EINVAL;
+++    }
+++
+++    return 0;
+++}
+++
+++/* One of these per context */
+++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
+++{
+++    struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
+++
+++    if (!mbc)
+++        return NULL;
+++
+++    mbc->dc = dc;
+++    // Default mono planar
+++    mbc->pq = pq;
+++    pthread_mutex_init(&mbc->lock, NULL);
+++
+++    /* Pick a default  - could we scan for this? */
+++    if (vpath == NULL)
+++        vpath = "/dev/media0";
+++
+++    while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
+++    {
+++        const int err = errno;
+++        if (err != EINTR) {
+++            request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
+++            goto fail0;
+++        }
+++    }
+++
+++    if (set_capabilities(mbc)) {
+++        request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
+++        goto fail1;
+++    }
+++
+++    mbc->src = queue_new(mbc->vfd);
+++    if (!mbc->src)
+++        goto fail1;
+++    mbc->dst = queue_new(mbc->vfd);
+++    if (!mbc->dst)
+++        goto fail2;
+++    mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
+++    if (!mbc->pt)
+++        goto fail3;
+++    mbc->this_wlm = ff_weak_link_new(mbc);
+++    if (!mbc->this_wlm)
+++        goto fail4;
+++
+++    /* Cannot add polltask now - polling with nothing pending
+++     * generates infinite error polls
+++    */
+++    return mbc;
+++
+++fail4:
+++    polltask_delete(&mbc->pt);
+++fail3:
+++    queue_delete(mbc->dst);
+++fail2:
+++    queue_delete(mbc->src);
+++fail1:
+++    close(mbc->vfd);
+++fail0:
+++    free(mbc);
+++    request_info(dc, "%s: FAILED\n", __func__);
+++    return NULL;
+++}
+++
+++
+++
++diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h
++new file mode 100644
++index 0000000000..890947b2e2
++--- /dev/null
+++++ b/libavcodec/v4l2_req_media.h
++@@ -0,0 +1,171 @@
+++/*
+++e.h
+++*
+++ * Permission is hereby granted, free of charge, to any person obtaining a
+++ * copy of this software and associated documentation files (the
+++ * "Software"), to deal in the Software without restriction, including
+++ * without limitation the rights to use, copy, modify, merge, publish,
+++ * distribute, sub license, and/or sell copies of the Software, and to
+++ * permit persons to whom the Software is furnished to do so, subject to
+++ * the following conditions:
+++ *
+++ * The above copyright notice and this permission notice (including the
+++ * next paragraph) shall be included in all copies or substantial portions
+++ * of the Software.
+++ *
+++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+++ */
+++
+++#ifndef _MEDIA_H_
+++#define _MEDIA_H_
+++
+++#include <stdbool.h>
+++#include <stdint.h>
+++
+++struct v4l2_format;
+++struct v4l2_fmtdesc;
+++struct v4l2_query_ext_ctrl;
+++
+++struct pollqueue;
+++struct media_request;
+++struct media_pool;
+++
+++typedef enum media_buf_status {
+++    MEDIABUFS_STATUS_SUCCESS = 0,
+++    MEDIABUFS_ERROR_OPERATION_FAILED,
+++    MEDIABUFS_ERROR_DECODING_ERROR,
+++    MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
+++    MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
+++    MEDIABUFS_ERROR_ALLOCATION_FAILED,
+++    MEDIABUFS_ERROR_UNSUPPORTED_MEMORY,
+++} MediaBufsStatus;
+++
+++struct media_pool * media_pool_new(const char * const media_path,
+++                   struct pollqueue * const pq,
+++                   const unsigned int n);
+++void media_pool_delete(struct media_pool ** pmp);
+++
+++// Obtain a media request
+++// Will block if none availible - has a 2sec timeout
+++struct media_request * media_request_get(struct media_pool * const mp);
+++int media_request_fd(const struct media_request * const req);
+++
+++// Start this request
+++// Request structure is returned to pool once done
+++int media_request_start(struct media_request * const req);
+++
+++// Return an *unstarted* media_request to the pool
+++// May later be upgraded to allow for aborting a started req
+++int media_request_abort(struct media_request ** const preq);
+++
+++
+++struct mediabufs_ctl;
+++struct qent_src;
+++struct qent_dst;
+++struct dmabuf_h;
+++struct dmabufs_ctl;
+++
+++// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties
+++enum mediabufs_memory {
+++   MEDIABUFS_MEMORY_UNSET            = 0,
+++   MEDIABUFS_MEMORY_MMAP             = 1,
+++   MEDIABUFS_MEMORY_USERPTR          = 2,
+++   MEDIABUFS_MEMORY_OVERLAY          = 3,
+++   MEDIABUFS_MEMORY_DMABUF           = 4,
+++};
+++
+++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
+++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
+++
+++// prealloc
+++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
+++// dbsc may be NULL if realloc not required
+++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
+++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
+++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
+++MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
+++void qent_dst_delete(struct qent_dst *const be);
+++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
+++void qent_dst_unref(struct qent_dst ** const pbe_dst);
+++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
+++
+++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
+++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
+++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
+++/* Import an fd unattached to any mediabuf */
+++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
+++                unsigned int plane,
+++                int fd, size_t size);
+++
+++const char * mediabufs_memory_name(const enum mediabufs_memory m);
+++
+++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
+++                struct media_request **const pmreq,
+++                struct qent_src **const psrc_be,
+++                struct qent_dst *const dst_be,
+++                const bool is_final);
+++// Get / alloc a dst buffer & associate with a slot
+++// If the dst pool is empty then behaviour depends on the fixed flag passed to
+++// dst_slots_create.  Default is !fixed = unlimited alloc
+++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
+++                           struct dmabufs_ctl *const dbsc);
+++// Create dst slots without alloc
+++// If fixed true then qent_alloc will only get slots from this pool and will
+++// block until a qent has been unrefed
+++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype);
+++
+++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
+++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
+++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
+++
+++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
+++
+++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
+++               const unsigned int width,
+++               const unsigned int height,
+++               mediabufs_dst_fmt_accept_fn *const accept_fn,
+++               void *const accept_v);
+++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
+++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
+++
+++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
+++                                struct v4l2_ext_control control_array[], unsigned int n);
+++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
+++                struct media_request * const mreq,
+++                unsigned int id, void *data,
+++                unsigned int size);
+++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
+++
+++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
+++
+++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
+++                                      enum v4l2_buf_type buf_type,
+++                                      const uint32_t pixfmt,
+++                                      const uint32_t width, const uint32_t height,
+++                                      const size_t bufsize);
+++
+++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
+++                  struct dmabufs_ctl * const dbsc,
+++                  unsigned int n,
+++                  const enum mediabufs_memory memtype);
+++
+++// Want to have appropriate formats set first
+++MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
+++MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
+++
+++#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
+++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
+++
+++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
+++                     const char *vpath, struct pollqueue *const pq);
+++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
+++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
+++
+++
+++#endif
++diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c
++new file mode 100644
++index 0000000000..cc8a5d4001
++--- /dev/null
+++++ b/libavcodec/v4l2_req_pollqueue.c
++@@ -0,0 +1,361 @@
+++#include <errno.h>
+++#include <limits.h>
+++#include <poll.h>
+++#include <pthread.h>
+++#include <semaphore.h>
+++#include <stdatomic.h>
+++#include <stdbool.h>
+++#include <stdlib.h>
+++#include <stdint.h>
+++#include <stdio.h>
+++#include <string.h>
+++#include <unistd.h>
+++#include <sys/eventfd.h>
+++
+++#include "v4l2_req_pollqueue.h"
+++#include "v4l2_req_utils.h"
+++
+++
+++struct pollqueue;
+++
+++enum polltask_state {
+++    POLLTASK_UNQUEUED = 0,
+++    POLLTASK_QUEUED,
+++    POLLTASK_RUNNING,
+++    POLLTASK_Q_KILL,
+++    POLLTASK_RUN_KILL,
+++};
+++
+++struct polltask {
+++    struct polltask *next;
+++    struct polltask *prev;
+++    struct pollqueue *q;
+++    enum polltask_state state;
+++
+++    int fd;
+++    short events;
+++
+++    void (*fn)(void *v, short revents);
+++    void * v;
+++
+++    uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
+++    sem_t kill_sem;
+++};
+++
+++struct pollqueue {
+++    atomic_int ref_count;
+++    pthread_mutex_t lock;
+++
+++    struct polltask *head;
+++    struct polltask *tail;
+++
+++    bool kill;
+++    bool no_prod;
+++    int prod_fd;
+++    struct polltask *prod_pt;
+++    pthread_t worker;
+++};
+++
+++struct polltask *polltask_new(struct pollqueue *const pq,
+++                              const int fd, const short events,
+++                  void (*const fn)(void *v, short revents),
+++                  void *const v)
+++{
+++    struct polltask *pt;
+++
+++    if (!events)
+++        return NULL;
+++
+++    pt = malloc(sizeof(*pt));
+++    if (!pt)
+++        return NULL;
+++
+++    *pt = (struct polltask){
+++        .next = NULL,
+++        .prev = NULL,
+++        .q = pollqueue_ref(pq),
+++        .fd = fd,
+++        .events = events,
+++        .fn = fn,
+++        .v = v
+++    };
+++
+++    sem_init(&pt->kill_sem, 0, 0);
+++
+++    return pt;
+++}
+++
+++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
+++{
+++    if (pt->prev)
+++        pt->prev->next = pt->next;
+++    else
+++        pq->head = pt->next;
+++    if (pt->next)
+++        pt->next->prev = pt->prev;
+++    else
+++        pq->tail = pt->prev;
+++    pt->next = NULL;
+++    pt->prev = NULL;
+++}
+++
+++static void polltask_free(struct polltask * const pt)
+++{
+++    sem_destroy(&pt->kill_sem);
+++    free(pt);
+++}
+++
+++static int pollqueue_prod(const struct pollqueue *const pq)
+++{
+++    static const uint64_t one = 1;
+++    return write(pq->prod_fd, &one, sizeof(one));
+++}
+++
+++void polltask_delete(struct polltask **const ppt)
+++{
+++    struct polltask *const pt = *ppt;
+++    struct pollqueue * pq;
+++    enum polltask_state state;
+++    bool prodme;
+++
+++    if (!pt)
+++        return;
+++
+++    pq = pt->q;
+++    pthread_mutex_lock(&pq->lock);
+++    state = pt->state;
+++    pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
+++    prodme = !pq->no_prod;
+++    pthread_mutex_unlock(&pq->lock);
+++
+++    if (state != POLLTASK_UNQUEUED) {
+++        if (prodme)
+++            pollqueue_prod(pq);
+++        while (sem_wait(&pt->kill_sem) && errno == EINTR)
+++            /* loop */;
+++    }
+++
+++    // Leave zapping the ref until we have DQed the PT as might well be
+++    // legitimately used in it
+++    *ppt = NULL;
+++    polltask_free(pt);
+++    pollqueue_unref(&pq);
+++}
+++
+++static uint64_t pollqueue_now(int timeout)
+++{
+++    struct timespec now;
+++    uint64_t now_ms;
+++
+++    if (clock_gettime(CLOCK_MONOTONIC, &now))
+++        return 0;
+++    now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
+++    return now_ms ? now_ms : (uint64_t)1;
+++}
+++
+++void pollqueue_add_task(struct polltask *const pt, const int timeout)
+++{
+++    bool prodme = false;
+++    struct pollqueue * const pq = pt->q;
+++
+++    pthread_mutex_lock(&pq->lock);
+++    if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
+++        if (pq->tail)
+++            pq->tail->next = pt;
+++        else
+++            pq->head = pt;
+++        pt->prev = pq->tail;
+++        pt->next = NULL;
+++        pt->state = POLLTASK_QUEUED;
+++        pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
+++        pq->tail = pt;
+++        prodme = !pq->no_prod;
+++    }
+++    pthread_mutex_unlock(&pq->lock);
+++    if (prodme)
+++        pollqueue_prod(pq);
+++}
+++
+++static void *poll_thread(void *v)
+++{
+++    struct pollqueue *const pq = v;
+++    struct pollfd *a = NULL;
+++    size_t asize = 0;
+++
+++    pthread_mutex_lock(&pq->lock);
+++    do {
+++        unsigned int i;
+++        unsigned int n = 0;
+++        struct polltask *pt;
+++        struct polltask *pt_next;
+++        uint64_t now = pollqueue_now(0);
+++        int timeout = -1;
+++        int rv;
+++
+++        for (pt = pq->head; pt; pt = pt_next) {
+++            int64_t t;
+++
+++            pt_next = pt->next;
+++
+++            if (pt->state == POLLTASK_Q_KILL) {
+++                pollqueue_rem_task(pq, pt);
+++                sem_post(&pt->kill_sem);
+++                continue;
+++            }
+++
+++            if (n >= asize) {
+++                asize = asize ? asize * 2 : 4;
+++                a = realloc(a, asize * sizeof(*a));
+++                if (!a) {
+++                    request_log("Failed to realloc poll array to %zd\n", asize);
+++                    goto fail_locked;
+++                }
+++            }
+++
+++            a[n++] = (struct pollfd){
+++                .fd = pt->fd,
+++                .events = pt->events
+++            };
+++
+++            t = (int64_t)(pt->timeout - now);
+++            if (pt->timeout && t < INT_MAX &&
+++                (timeout < 0 || (int)t < timeout))
+++                timeout = (t < 0) ? 0 : (int)t;
+++        }
+++        pthread_mutex_unlock(&pq->lock);
+++
+++        if ((rv = poll(a, n, timeout)) == -1) {
+++            if (errno != EINTR) {
+++                request_log("Poll error: %s\n", strerror(errno));
+++                goto fail_unlocked;
+++            }
+++        }
+++
+++        pthread_mutex_lock(&pq->lock);
+++        now = pollqueue_now(0);
+++
+++        /* Prodding in this loop is pointless and might lead to
+++         * infinite looping
+++        */
+++        pq->no_prod = true;
+++        for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) {
+++            pt_next = pt->next;
+++
+++            /* Pending? */
+++            if (a[i].revents ||
+++                (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
+++                pollqueue_rem_task(pq, pt);
+++                if (pt->state == POLLTASK_QUEUED)
+++                    pt->state = POLLTASK_RUNNING;
+++                if (pt->state == POLLTASK_Q_KILL)
+++                    pt->state = POLLTASK_RUN_KILL;
+++                pthread_mutex_unlock(&pq->lock);
+++
+++                /* This can add new entries to the Q but as
+++                 * those are added to the tail our existing
+++                 * chain remains intact
+++                */
+++                pt->fn(pt->v, a[i].revents);
+++
+++                pthread_mutex_lock(&pq->lock);
+++                if (pt->state == POLLTASK_RUNNING)
+++                    pt->state = POLLTASK_UNQUEUED;
+++                if (pt->state == POLLTASK_RUN_KILL)
+++                    sem_post(&pt->kill_sem);
+++            }
+++        }
+++        pq->no_prod = false;
+++
+++    } while (!pq->kill);
+++
+++fail_locked:
+++    pthread_mutex_unlock(&pq->lock);
+++fail_unlocked:
+++    free(a);
+++    return NULL;
+++}
+++
+++static void prod_fn(void *v, short revents)
+++{
+++    struct pollqueue *const pq = v;
+++    char buf[8];
+++    if (revents)
+++        read(pq->prod_fd, buf, 8);
+++    if (!pq->kill)
+++        pollqueue_add_task(pq->prod_pt, -1);
+++}
+++
+++struct pollqueue * pollqueue_new(void)
+++{
+++    struct pollqueue *pq = malloc(sizeof(*pq));
+++    if (!pq)
+++        return NULL;
+++    *pq = (struct pollqueue){
+++        .ref_count = ATOMIC_VAR_INIT(0),
+++        .lock = PTHREAD_MUTEX_INITIALIZER,
+++        .head = NULL,
+++        .tail = NULL,
+++        .kill = false,
+++        .prod_fd = -1
+++    };
+++
+++    pq->prod_fd = eventfd(0, EFD_NONBLOCK);
+++    if (pq->prod_fd == 1)
+++        goto fail1;
+++    pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
+++    if (!pq->prod_pt)
+++        goto fail2;
+++    pollqueue_add_task(pq->prod_pt, -1);
+++    if (pthread_create(&pq->worker, NULL, poll_thread, pq))
+++        goto fail3;
+++    // Reset ref count which will have been inced by the add_task
+++    atomic_store(&pq->ref_count, 0);
+++    return pq;
+++
+++fail3:
+++    polltask_free(pq->prod_pt);
+++fail2:
+++    close(pq->prod_fd);
+++fail1:
+++    free(pq);
+++    return NULL;
+++}
+++
+++static void pollqueue_free(struct pollqueue *const pq)
+++{
+++    void *rv;
+++
+++    pthread_mutex_lock(&pq->lock);
+++    pq->kill = true;
+++    pollqueue_prod(pq);
+++    pthread_mutex_unlock(&pq->lock);
+++
+++    pthread_join(pq->worker, &rv);
+++    polltask_free(pq->prod_pt);
+++    pthread_mutex_destroy(&pq->lock);
+++    close(pq->prod_fd);
+++    free(pq);
+++}
+++
+++struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
+++{
+++    atomic_fetch_add(&pq->ref_count, 1);
+++    return pq;
+++}
+++
+++void pollqueue_unref(struct pollqueue **const ppq)
+++{
+++    struct pollqueue * const pq = *ppq;
+++
+++    if (!pq)
+++        return;
+++    *ppq = NULL;
+++
+++    if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
+++        return;
+++
+++    pollqueue_free(pq);
+++}
+++
+++
+++
++diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h
++new file mode 100644
++index 0000000000..e1182cb2fc
++--- /dev/null
+++++ b/libavcodec/v4l2_req_pollqueue.h
++@@ -0,0 +1,18 @@
+++#ifndef POLLQUEUE_H_
+++#define POLLQUEUE_H_
+++
+++struct polltask;
+++struct pollqueue;
+++
+++struct polltask *polltask_new(struct pollqueue *const pq,
+++			      const int fd, const short events,
+++			      void (*const fn)(void *v, short revents),
+++			      void *const v);
+++void polltask_delete(struct polltask **const ppt);
+++
+++void pollqueue_add_task(struct polltask *const pt, const int timeout);
+++struct pollqueue * pollqueue_new(void);
+++void pollqueue_unref(struct pollqueue **const ppq);
+++struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
+++
+++#endif /* POLLQUEUE_H_ */
++diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h
++new file mode 100644
++index 0000000000..a31cc1f4ec
++--- /dev/null
+++++ b/libavcodec/v4l2_req_utils.h
++@@ -0,0 +1,27 @@
+++#ifndef AVCODEC_V4L2_REQ_UTILS_H
+++#define AVCODEC_V4L2_REQ_UTILS_H
+++
+++#include <stdint.h>
+++#include "libavutil/log.h"
+++
+++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
+++
+++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
+++#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
+++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
+++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
+++
+++static inline char safechar(char c) {
+++    return c > 0x20 && c < 0x7f ? c : '.';
+++}
+++
+++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
+++    tbuf[0] = safechar((fcc >>  0) & 0xff);
+++    tbuf[1] = safechar((fcc >>  8) & 0xff);
+++    tbuf[2] = safechar((fcc >> 16) & 0xff);
+++    tbuf[3] = safechar((fcc >> 24) & 0xff);
+++    tbuf[4] = '\0';
+++    return tbuf;
+++}
+++
+++#endif
++diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
++new file mode 100644
++index 0000000000..db7ed13b6d
++--- /dev/null
+++++ b/libavcodec/v4l2_request_hevc.c
++@@ -0,0 +1,348 @@
+++/*
+++ * This file is part of FFmpeg.
+++ *
+++ * FFmpeg is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * FFmpeg is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with FFmpeg; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++
+++#include "config.h"
+++#include "decode.h"
+++#include "hevcdec.h"
+++#include "hwconfig.h"
+++#include "internal.h"
+++
+++#include "v4l2_request_hevc.h"
+++
+++#include "libavutil/hwcontext_drm.h"
+++#include "libavutil/pixdesc.h"
+++
+++#include "v4l2_req_devscan.h"
+++#include "v4l2_req_dmabufs.h"
+++#include "v4l2_req_pollqueue.h"
+++#include "v4l2_req_media.h"
+++#include "v4l2_req_utils.h"
+++
+++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
+++{
+++    const size_t wxh = w * h;
+++    size_t bits_alloc;
+++
+++    /* Annex A gives a min compression of 2 @ lvl 3.1
+++     * (wxh <= 983040) and min 4 thereafter but avoid
+++     * the odity of 983041 having a lower limit than
+++     * 983040.
+++     * Multiply by 3/2 for 4:2:0
+++     */
+++    bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
+++        wxh < 983040 * 2 ? 983040 * 3 / 4 :
+++        wxh * 3 / 8;
+++    /* Allow for bit depth */
+++    bits_alloc += (bits_alloc * bits_minus8) / 8;
+++    /* Add a few bytes (16k) for overhead */
+++    bits_alloc += 0x4000;
+++    return bits_alloc;
+++}
+++
+++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
+++                                     av_unused const uint8_t *buffer,
+++                                     av_unused uint32_t size)
+++{
+++    const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    return ctx->fns->start_frame(avctx, buffer, size);
+++}
+++
+++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    return ctx->fns->decode_slice(avctx, buffer, size);
+++}
+++
+++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
+++{
+++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
+++    return ctx->fns->end_frame(avctx);
+++}
+++
+++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    ctx->fns->abort_frame(avctx);
+++}
+++
+++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    return ctx->fns->frame_params(avctx, hw_frames_ctx);
+++}
+++
+++static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    return ctx->fns->alloc_frame(avctx, frame);
+++}
+++
+++
+++static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
+++{
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++
+++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    decode_q_wait(&ctx->decode_q, NULL);  // Wait for all other threads to be out of decode
+++
+++    mediabufs_ctl_unref(&ctx->mbufs);
+++    media_pool_delete(&ctx->mpool);
+++    pollqueue_unref(&ctx->pq);
+++    dmabufs_ctl_unref(&ctx->dbufs);
+++    devscan_delete(&ctx->devscan);
+++
+++    decode_q_uninit(&ctx->decode_q);
+++
+++//    if (avctx->hw_frames_ctx) {
+++//        AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+++//        av_buffer_pool_flush(hwfc->pool);
+++//    }
+++    return 0;
+++}
+++
+++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
+++{
+++    AVCodecContext *const avctx = v;
+++    const HEVCContext *const h = avctx->priv_data;
+++
+++    if (h->ps.sps->bit_depth == 8) {
+++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
+++            fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
+++            return 1;
+++        }
+++    }
+++    else if (h->ps.sps->bit_depth == 10) {
+++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
+++            return 1;
+++        }
+++    }
+++    return 0;
+++}
+++
+++static int v4l2_request_hevc_init(AVCodecContext *avctx)
+++{
+++    const HEVCContext *h = avctx->priv_data;
+++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
+++    const HEVCSPS * const sps = h->ps.sps;
+++    int ret;
+++    const struct decdev * decdev;
+++    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
+++    size_t src_size;
+++    enum mediabufs_memory src_memtype;
+++    enum mediabufs_memory dst_memtype;
+++
+++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    // Give up immediately if this is something that we have no code to deal with
+++    if (h->ps.sps->chroma_format_idc != 1) {
+++        av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc);
+++        return AVERROR_PATCHWELCOME;
+++    }
+++    if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) ||
+++        h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) {
+++        av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma);
+++        return AVERROR_PATCHWELCOME;
+++    }
+++
+++    if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
+++        av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
+++        return (AVERROR(-ret));
+++    }
+++    ret = AVERROR(ENOMEM);  // Assume mem fail by default for these
+++
+++    if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
+++    {
+++        av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
+++        ret = AVERROR(ENODEV);
+++        goto fail0;
+++    }
+++    av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
+++           decdev_media_path(decdev), decdev_video_path(decdev));
+++
+++    if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) {
+++        av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
+++        src_memtype = MEDIABUFS_MEMORY_MMAP;
+++        dst_memtype = MEDIABUFS_MEMORY_MMAP;
+++    }
+++    else {
+++        av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
+++        src_memtype = MEDIABUFS_MEMORY_DMABUF;
+++        dst_memtype = MEDIABUFS_MEMORY_DMABUF;
+++    }
+++
+++    if ((ctx->pq = pollqueue_new()) == NULL) {
+++        av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
+++        goto fail1;
+++    }
+++
+++    if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
+++        av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
+++        goto fail2;
+++    }
+++
+++    if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
+++        av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
+++        goto fail3;
+++    }
+++
+++    // Ask for an initial bitbuf size of max size / 4
+++    // We will realloc if we need more
+++    // Must use sps->h/w as avctx contains cropped size
+++retry_src_memtype:
+++    src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
+++    if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs))
+++        src_size /= 4;
+++    // Kludge for conformance tests which break Annex A limits
+++    else if (src_size < 0x40000)
+++        src_size = 0x40000;
+++
+++    if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
+++                              sps->width, sps->height, src_size)) {
+++        char tbuf1[5];
+++        av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
+++        goto fail4;
+++    }
+++
+++    if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) {
+++        if (src_memtype == MEDIABUFS_MEMORY_DMABUF) {
+++            src_memtype = MEDIABUFS_MEMORY_MMAP;
+++            goto retry_src_memtype;
+++        }
+++        av_log(avctx, AV_LOG_ERROR, "Failed to get src memory type\n");
+++        goto fail4;
+++    }
+++
+++    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) {
+++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
+++        ctx->fns = &V2(ff_v4l2_req_hevc, 4);
+++    }
+++#if CONFIG_V4L2_REQ_HEVC_VX
+++    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
+++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
+++        ctx->fns = &V2(ff_v4l2_req_hevc, 3);
+++    }
+++    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
+++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n");
+++        ctx->fns = &V2(ff_v4l2_req_hevc, 2);
+++    }
+++    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) {
+++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n");
+++        ctx->fns = &V2(ff_v4l2_req_hevc, 1);
+++    }
+++#endif
+++    else {
+++        av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
+++        ret = AVERROR(EINVAL);
+++        goto fail4;
+++    }
+++
+++    if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
+++        char tbuf1[5];
+++        av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
+++        goto fail4;
+++    }
+++
+++    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
+++        goto fail4;
+++    }
+++
+++    {
+++        unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering +
+++            avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6);
+++        av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots,
+++               sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
+++               avctx->thread_count, avctx->extra_hw_frames);
+++
+++        if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) {
+++            if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) {
+++                av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n");
+++                goto fail4;
+++            }
+++            av_log(avctx, AV_LOG_DEBUG, "Dst DMABUF not supported - trying mmap\n");
+++            dst_memtype = MEDIABUFS_MEMORY_MMAP;
+++        }
+++
+++        // extra_hw_frames is -1 if unset
+++        if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) {
+++            av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
+++            goto fail4;
+++        }
+++    }
+++
+++    if (mediabufs_stream_on(ctx->mbufs)) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
+++        goto fail4;
+++    }
+++
+++    if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
+++        goto fail4;
+++    }
+++
+++    if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
+++        goto fail5;
+++    }
+++
+++    decode_q_init(&ctx->decode_q);
+++
+++    // Set our s/w format
+++    avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
+++
+++    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n",
+++           ctx->fns->name,
+++           decdev_media_path(decdev), decdev_video_path(decdev),
+++           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype),
+++           av_get_pix_fmt_name(avctx->sw_pix_fmt));
+++
+++    return 0;
+++
+++fail5:
+++    av_buffer_unref(&avctx->hw_frames_ctx);
+++fail4:
+++    mediabufs_ctl_unref(&ctx->mbufs);
+++fail3:
+++    media_pool_delete(&ctx->mpool);
+++fail2:
+++    pollqueue_unref(&ctx->pq);
+++fail1:
+++    dmabufs_ctl_unref(&ctx->dbufs);
+++fail0:
+++    devscan_delete(&ctx->devscan);
+++    return ret;
+++}
+++
+++const AVHWAccel ff_hevc_v4l2request_hwaccel = {
+++    .name           = "hevc_v4l2request",
+++    .type           = AVMEDIA_TYPE_VIDEO,
+++    .id             = AV_CODEC_ID_HEVC,
+++    .pix_fmt        = AV_PIX_FMT_DRM_PRIME,
+++    .alloc_frame    = v4l2_req_hevc_alloc_frame,
+++    .start_frame    = v4l2_req_hevc_start_frame,
+++    .decode_slice   = v4l2_req_hevc_decode_slice,
+++    .end_frame      = v4l2_req_hevc_end_frame,
+++    .abort_frame    = v4l2_req_hevc_abort_frame,
+++    .init           = v4l2_request_hevc_init,
+++    .uninit         = v4l2_request_hevc_uninit,
+++    .priv_data_size = sizeof(V4L2RequestContextHEVC),
+++    .frame_params   = v4l2_req_hevc_frame_params,
+++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
+++};
++diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
++new file mode 100644
++index 0000000000..99c90064ea
++--- /dev/null
+++++ b/libavcodec/v4l2_request_hevc.h
++@@ -0,0 +1,102 @@
+++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
+++#define AVCODEC_V4L2_REQUEST_HEVC_H
+++
+++#include <stdint.h>
+++#include <drm_fourcc.h>
+++#include "v4l2_req_decode_q.h"
+++
+++#ifndef DRM_FORMAT_NV15
+++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
+++#endif
+++
+++#ifndef DRM_FORMAT_NV20
+++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
+++#endif
+++
+++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
+++// in the future but until then...
+++#ifndef DRM_FORMAT_P030
+++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
+++#endif
+++
+++#ifndef DRM_FORMAT_NV15
+++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
+++#endif
+++
+++#ifndef DRM_FORMAT_NV20
+++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
+++#endif
+++
+++#include <linux/videodev2.h>
+++#ifndef V4L2_CID_CODEC_BASE
+++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
+++#endif
+++
+++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
+++// in drm_fourcc.h hopefully will be sometime in the future but until then...
+++#ifndef V4L2_PIX_FMT_NV12_10_COL128
+++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
+++#endif
+++
+++#ifndef V4L2_PIX_FMT_NV12_COL128
+++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
+++#endif
+++
+++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
+++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
+++#endif
+++
+++#define VCAT(name, version) name##_v##version
+++#define V2(n,v) VCAT(n, v)
+++#define V(n) V2(n, HEVC_CTRLS_VERSION)
+++
+++#define S2(x) #x
+++#define STR(x) S2(x)
+++
+++// 1 per decoder
+++struct v4l2_req_decode_fns;
+++
+++typedef struct V4L2RequestContextHEVC {
+++//    V4L2RequestContext base;
+++    const struct v4l2_req_decode_fns * fns;
+++
+++    unsigned int timestamp;  // ?? maybe uint64_t
+++
+++    int decode_mode;
+++    int start_code;
+++    unsigned int max_slices;    // 0 => not wanted (frame mode)
+++    unsigned int max_offsets;   // 0 => not wanted
+++
+++    req_decode_q decode_q;
+++
+++    struct devscan *devscan;
+++    struct dmabufs_ctl *dbufs;
+++    struct pollqueue *pq;
+++    struct media_pool * mpool;
+++    struct mediabufs_ctl *mbufs;
+++} V4L2RequestContextHEVC;
+++
+++typedef struct v4l2_req_decode_fns {
+++    int src_pix_fmt_v4l2;
+++    const char * name;
+++
+++    // Init setup
+++    int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
+++    int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
+++
+++    // Passthrough of hwaccel fns
+++    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+++    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+++    int (*end_frame)(AVCodecContext *avctx);
+++    void (*abort_frame)(AVCodecContext *avctx);
+++    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+++    int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame);
+++} v4l2_req_decode_fns;
+++
+++
+++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
+++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
+++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
+++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
+++
+++#endif
++diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c
++new file mode 100644
++index 0000000000..f234a985b9
++--- /dev/null
+++++ b/libavcodec/weak_link.c
++@@ -0,0 +1,102 @@
+++#include <stdlib.h>
+++#include <pthread.h>
+++#include <stdatomic.h>
+++#include "weak_link.h"
+++
+++struct ff_weak_link_master {
+++    atomic_int ref_count;    /* 0 is single ref for easier atomics */
+++    pthread_rwlock_t lock;
+++    void * ptr;
+++};
+++
+++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
+++{
+++    return (struct ff_weak_link_master *)c;
+++}
+++
+++struct ff_weak_link_master * ff_weak_link_new(void * p)
+++{
+++    struct ff_weak_link_master * w = malloc(sizeof(*w));
+++    if (!w)
+++        return NULL;
+++    w->ptr = p;
+++    if (pthread_rwlock_init(&w->lock, NULL)) {
+++        free(w);
+++        return NULL;
+++    }
+++    return w;
+++}
+++
+++static void weak_link_do_unref(struct ff_weak_link_master * const w)
+++{
+++    int n = atomic_fetch_sub(&w->ref_count, 1);
+++    if (n)
+++        return;
+++
+++    pthread_rwlock_destroy(&w->lock);
+++    free(w);
+++}
+++
+++// Unref & break link
+++void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
+++{
+++    struct ff_weak_link_master * const w = *ppLink;
+++    if (!w)
+++        return;
+++
+++    *ppLink = NULL;
+++    pthread_rwlock_wrlock(&w->lock);
+++    w->ptr = NULL;
+++    pthread_rwlock_unlock(&w->lock);
+++
+++    weak_link_do_unref(w);
+++}
+++
+++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
+++{
+++    if (!w)
+++        return NULL;
+++    atomic_fetch_add(&w->ref_count, 1);
+++    return (struct ff_weak_link_client*)w;
+++}
+++
+++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
+++{
+++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
+++    if (!w)
+++        return;
+++
+++    *ppLink = NULL;
+++    weak_link_do_unref(w);
+++}
+++
+++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
+++{
+++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
+++
+++    if (!w)
+++        return NULL;
+++
+++    if (pthread_rwlock_rdlock(&w->lock))
+++        goto broken;
+++
+++    if (w->ptr)
+++        return w->ptr;
+++
+++    pthread_rwlock_unlock(&w->lock);
+++
+++broken:
+++    *ppLink = NULL;
+++    weak_link_do_unref(w);
+++    return NULL;
+++}
+++
+++// Ignores a NULL c (so can be on the return path of both broken & live links)
+++void ff_weak_link_unlock(struct ff_weak_link_client * c)
+++{
+++    struct ff_weak_link_master * const w = weak_link_x(c);
+++    if (w)
+++        pthread_rwlock_unlock(&w->lock);
+++}
+++
+++
++diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h
++new file mode 100644
++index 0000000000..415b6a27a0
++--- /dev/null
+++++ b/libavcodec/weak_link.h
++@@ -0,0 +1,23 @@
+++struct ff_weak_link_master;
+++struct ff_weak_link_client;
+++
+++struct ff_weak_link_master * ff_weak_link_new(void * p);
+++void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
+++
+++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
+++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
+++
+++// Returns NULL if link broken - in this case it will also zap
+++//   *ppLink and unref the weak_link.
+++// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
+++//
+++// The above does mean that there is a race if this is called simultainiously
+++// by two threads using the same weak_link_client (so don't do that)
+++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
+++void ff_weak_link_unlock(struct ff_weak_link_client * c);
+++
+++
+++
+++
+++
+++
++diff --git a/libavdevice/Makefile b/libavdevice/Makefile
++index 8a62822b69..0989cb895f 100644
++--- a/libavdevice/Makefile
+++++ b/libavdevice/Makefile
++@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)              += sndio_enc.o sndio.o
++ OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
++ OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
++ OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
+++OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
+++OBJS-$(CONFIG_VOUT_EGL_OUTDEV)           += egl_vout.o
++ OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
++ OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
++ 
++diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
++index 8a90fcb5d7..ffb410b92d 100644
++--- a/libavdevice/alldevices.c
+++++ b/libavdevice/alldevices.c
++@@ -52,6 +52,8 @@ extern const FFOutputFormat ff_sndio_muxer;
++ extern const AVInputFormat  ff_v4l2_demuxer;
++ extern const FFOutputFormat ff_v4l2_muxer;
++ extern const AVInputFormat  ff_vfwcap_demuxer;
+++extern const FFOutputFormat ff_vout_drm_muxer;
+++extern const FFOutputFormat ff_vout_egl_muxer;
++ extern const AVInputFormat  ff_xcbgrab_demuxer;
++ extern const FFOutputFormat ff_xv_muxer;
++ 
++diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
++new file mode 100644
++index 0000000000..491e1dc608
++--- /dev/null
+++++ b/libavdevice/drm_vout.c
++@@ -0,0 +1,675 @@
+++/*
+++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
+++ *
+++ * This file is part of FFmpeg.
+++ *
+++ * FFmpeg is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * FFmpeg is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with FFmpeg; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++
+++// *** This module is a work in progress and its utility is strictly
+++//     limited to testing.
+++
+++#include "libavutil/opt.h"
+++#include "libavutil/pixdesc.h"
+++#include "libavutil/hwcontext_drm.h"
+++#include "libavformat/mux.h"
+++#include "avdevice.h"
+++
+++#include "pthread.h"
+++#include <semaphore.h>
+++#include <unistd.h>
+++
+++#include <xf86drm.h>
+++#include <xf86drmMode.h>
+++#include <drm_fourcc.h>
+++
+++#define TRACE_ALL 0
+++
+++#define DRM_MODULE "vc4"
+++
+++#define ERRSTR strerror(errno)
+++
+++struct drm_setup {
+++   int conId;
+++   uint32_t crtcId;
+++   int crtcIdx;
+++   uint32_t planeId;
+++   unsigned int out_fourcc;
+++   struct {
+++       int x, y, width, height;
+++   } compose;
+++};
+++
+++typedef struct drm_aux_s {
+++    unsigned int fb_handle;
+++    uint32_t bo_handles[AV_DRM_MAX_PLANES];
+++    AVFrame * frame;
+++} drm_aux_t;
+++
+++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
+++// we get initial flicker probably due to dodgy drm timing
+++#define AUX_SIZE 3
+++typedef struct drm_display_env_s
+++{
+++    AVClass *class;
+++
+++    int drm_fd;
+++    uint32_t con_id;
+++    struct drm_setup setup;
+++    enum AVPixelFormat avfmt;
+++
+++    int show_all;
+++    const char * drm_module;
+++
+++    unsigned int ano;
+++    drm_aux_t aux[AUX_SIZE];
+++
+++    pthread_t q_thread;
+++    sem_t q_sem_in;
+++    sem_t q_sem_out;
+++    int q_terminate;
+++    AVFrame * q_next;
+++
+++} drm_display_env_t;
+++
+++
+++static int drm_vout_write_trailer(AVFormatContext *s)
+++{
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
+++#endif
+++
+++    return 0;
+++}
+++
+++static int drm_vout_write_header(AVFormatContext *s)
+++{
+++    const AVCodecParameters * const par = s->streams[0]->codecpar;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
+++#endif
+++    if (   s->nb_streams > 1
+++        || par->codec_type != AVMEDIA_TYPE_VIDEO
+++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
+++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    return 0;
+++}
+++
+++static int find_plane(struct AVFormatContext * const avctx,
+++                      const int drmfd, const int crtcidx, const uint32_t format,
+++                      uint32_t * const pplane_id)
+++{
+++   drmModePlaneResPtr planes;
+++   drmModePlanePtr plane;
+++   drmModeObjectPropertiesPtr props = NULL;
+++   drmModePropertyPtr prop = NULL;
+++   unsigned int i;
+++   unsigned int j;
+++   int ret = -1;
+++
+++   planes = drmModeGetPlaneResources(drmfd);
+++   if (!planes)
+++   {
+++       av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
+++       return -1;
+++   }
+++
+++   for (i = 0; i < planes->count_planes; ++i) {
+++      plane = drmModeGetPlane(drmfd, planes->planes[i]);
+++      if (!planes)
+++      {
+++          av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
+++          break;
+++      }
+++
+++      if (!(plane->possible_crtcs & (1 << crtcidx))) {
+++         drmModeFreePlane(plane);
+++         continue;
+++      }
+++
+++      for (j = 0; j < plane->count_formats; ++j) {
+++         if (plane->formats[j] == format)
+++            break;
+++      }
+++
+++      if (j == plane->count_formats) {
+++         drmModeFreePlane(plane);
+++         continue;
+++      }
+++
+++      *pplane_id = plane->plane_id;
+++      drmModeFreePlane(plane);
+++      break;
+++   }
+++
+++   if (i == planes->count_planes) {
+++       ret = -1;
+++       goto fail;
+++   }
+++
+++    props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE);
+++    if (!props)
+++        goto fail;
+++    for (i = 0; i != props->count_props; ++i) {
+++        if (prop)
+++            drmModeFreeProperty(prop);
+++        prop = drmModeGetProperty(drmfd, props->props[i]);
+++        if (!prop)
+++            goto fail;
+++        if (strcmp("zpos", prop->name) == 0) {
+++            if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0)
+++                av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]);
+++            else
+++                av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n");
+++            break;
+++        }
+++    }
+++
+++    ret = 0;
+++fail:
+++    if (props)
+++        drmModeFreeObjectProperties(props);
+++    if (prop)
+++        drmModeFreeProperty(prop);
+++    drmModeFreePlaneResources(planes);
+++    return ret;
+++}
+++
+++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
+++{
+++    if (da->fb_handle != 0) {
+++        drmModeRmFB(de->drm_fd, da->fb_handle);
+++        da->fb_handle = 0;
+++    }
+++
+++    for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
+++        if (da->bo_handles[i]) {
+++            struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
+++            drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+++            da->bo_handles[i] = 0;
+++        }
+++    }
+++    av_frame_free(&da->frame);
+++}
+++
+++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
+++{
+++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
+++    drm_aux_t * da = de->aux + de->ano;
+++    const uint32_t format = desc->layers[0].format;
+++    int ret = 0;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
+++#endif
+++
+++    if (de->setup.out_fourcc != format) {
+++        if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
+++            av_frame_free(&frame);
+++            av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
+++            return -1;
+++        }
+++        de->setup.out_fourcc = format;
+++    }
+++
+++    {
+++        drmVBlank vbl = {
+++            .request = {
+++                .type = DRM_VBLANK_RELATIVE,
+++                .sequence = 0
+++            }
+++        };
+++
+++        while (drmWaitVBlank(de->drm_fd, &vbl)) {
+++            if (errno != EINTR) {
+++//                av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
+++                break;
+++            }
+++        }
+++    }
+++
+++    da_uninit(de, da);
+++
+++    {
+++        uint32_t pitches[4] = {0};
+++        uint32_t offsets[4] = {0};
+++        uint64_t modifiers[4] = {0};
+++        uint32_t bo_handles[4] = {0};
+++        int has_mods = 0;
+++        int i, j, n;
+++
+++        da->frame = frame;
+++
+++        for (i = 0; i < desc->nb_objects; ++i) {
+++            if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
+++                av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
+++                return -1;
+++            }
+++            if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR &&
+++                desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID)
+++                has_mods = 1;
+++        }
+++
+++        n = 0;
+++        for (i = 0; i < desc->nb_layers; ++i) {
+++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
+++                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
+++                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
+++                pitches[n] = p->pitch;
+++                offsets[n] = p->offset;
+++                modifiers[n] = obj->format_modifier;
+++                bo_handles[n] = da->bo_handles[p->object_index];
+++                ++n;
+++            }
+++        }
+++
+++#if 1 && TRACE_ALL
+++        av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
+++               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
+++               av_frame_cropped_width(frame),
+++               av_frame_cropped_height(frame),
+++               desc->layers[0].format,
+++               bo_handles[0],
+++               bo_handles[1],
+++               bo_handles[2],
+++               bo_handles[3],
+++               pitches[0],
+++               pitches[1],
+++               pitches[2],
+++               pitches[3],
+++               offsets[0],
+++               offsets[1],
+++               offsets[2],
+++               offsets[3],
+++               (long long)modifiers[0],
+++               (long long)modifiers[1],
+++               (long long)modifiers[2],
+++               (long long)modifiers[3]
+++               );
+++#endif
+++
+++        if (drmModeAddFB2WithModifiers(de->drm_fd,
+++                                       av_frame_cropped_width(frame),
+++                                       av_frame_cropped_height(frame),
+++                                       desc->layers[0].format, bo_handles,
+++                                       pitches, offsets,
+++                                       has_mods ? modifiers : NULL,
+++                                       &da->fb_handle,
+++                                       has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) {
+++            av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
+++            return -1;
+++        }
+++    }
+++
+++    ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
+++                              da->fb_handle, 0,
+++                de->setup.compose.x, de->setup.compose.y,
+++                de->setup.compose.width,
+++                de->setup.compose.height,
+++                0, 0,
+++                av_frame_cropped_width(frame) << 16,
+++                av_frame_cropped_height(frame) << 16);
+++
+++    if (ret != 0) {
+++        av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
+++    }
+++
+++    de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
+++
+++    return ret;
+++}
+++
+++static int do_sem_wait(sem_t * const sem, const int nowait)
+++{
+++    while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
+++        if (errno != EINTR)
+++            return -errno;
+++    }
+++    return 0;
+++}
+++
+++static void * display_thread(void * v)
+++{
+++    AVFormatContext * const s = v;
+++    drm_display_env_t * const de = s->priv_data;
+++    int i;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++#endif
+++
+++    sem_post(&de->q_sem_out);
+++
+++    for (;;) {
+++        AVFrame * frame;
+++
+++        do_sem_wait(&de->q_sem_in, 0);
+++
+++        if (de->q_terminate)
+++            break;
+++
+++        frame = de->q_next;
+++        de->q_next = NULL;
+++        sem_post(&de->q_sem_out);
+++
+++        do_display(s, de, frame);
+++    }
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+++#endif
+++
+++    for (i = 0; i != AUX_SIZE; ++i)
+++        da_uninit(de, de->aux + i);
+++
+++    av_frame_free(&de->q_next);
+++
+++    return NULL;
+++}
+++
+++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
+++{
+++    const AVFrame * const src_frame = (AVFrame *)pkt->data;
+++    AVFrame * frame;
+++    drm_display_env_t * const de = s->priv_data;
+++    int ret;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
+++#endif
+++
+++    if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
+++        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
+++        return 0;
+++    }
+++
+++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
+++        frame = av_frame_alloc();
+++        av_frame_ref(frame, src_frame);
+++    }
+++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
+++        frame = av_frame_alloc();
+++        frame->format = AV_PIX_FMT_DRM_PRIME;
+++        if (av_hwframe_map(frame, src_frame, 0) != 0)
+++        {
+++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
+++            av_frame_free(&frame);
+++            return AVERROR(EINVAL);
+++        }
+++    }
+++    else {
+++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    ret = do_sem_wait(&de->q_sem_out, !de->show_all);
+++    if (ret) {
+++        av_frame_free(&frame);
+++    }
+++    else {
+++        de->q_next = frame;
+++        sem_post(&de->q_sem_in);
+++    }
+++
+++    return 0;
+++}
+++
+++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
+++                          unsigned flags)
+++{
+++    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
+++    return AVERROR_PATCHWELCOME;
+++}
+++
+++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
+++{
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
+++#endif
+++    switch(type) {
+++    case AV_APP_TO_DEV_WINDOW_REPAINT:
+++        return 0;
+++    default:
+++        break;
+++    }
+++    return AVERROR(ENOSYS);
+++}
+++
+++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
+++{
+++   int ret = -1;
+++   int i;
+++   drmModeRes *res = drmModeGetResources(drmfd);
+++   drmModeConnector *c;
+++
+++   if(!res)
+++   {
+++      printf( "drmModeGetResources failed: %s\n", ERRSTR);
+++      return -1;
+++   }
+++
+++   if (res->count_crtcs <= 0)
+++   {
+++      printf( "drm: no crts\n");
+++      goto fail_res;
+++   }
+++
+++   if (!s->conId) {
+++      fprintf(stderr,
+++         "No connector ID specified.  Choosing default from list:\n");
+++
+++      for (i = 0; i < res->count_connectors; i++) {
+++         drmModeConnector *con =
+++            drmModeGetConnector(drmfd, res->connectors[i]);
+++         drmModeEncoder *enc = NULL;
+++         drmModeCrtc *crtc = NULL;
+++
+++         if (con->encoder_id) {
+++            enc = drmModeGetEncoder(drmfd, con->encoder_id);
+++            if (enc->crtc_id) {
+++               crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
+++            }
+++         }
+++
+++         if (!s->conId && crtc) {
+++            s->conId = con->connector_id;
+++            s->crtcId = crtc->crtc_id;
+++         }
+++
+++         av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
+++                con->connector_id,
+++                crtc ? crtc->crtc_id : 0,
+++                con->connector_type,
+++                crtc ? crtc->width : 0,
+++                crtc ? crtc->height : 0,
+++                (s->conId == (int)con->connector_id ?
+++            " (chosen)" : ""));
+++      }
+++
+++      if (!s->conId) {
+++         av_log(avctx, AV_LOG_ERROR,
+++            "No suitable enabled connector found.\n");
+++         return -1;;
+++      }
+++   }
+++
+++   s->crtcIdx = -1;
+++
+++   for (i = 0; i < res->count_crtcs; ++i) {
+++      if (s->crtcId == res->crtcs[i]) {
+++         s->crtcIdx = i;
+++         break;
+++      }
+++   }
+++
+++   if (s->crtcIdx == -1)
+++   {
+++       av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
+++       goto fail_res;
+++   }
+++
+++   if (res->count_connectors <= 0)
+++   {
+++       av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
+++       goto fail_res;
+++   }
+++
+++   c = drmModeGetConnector(drmfd, s->conId);
+++   if (!c)
+++   {
+++       av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
+++       goto fail_res;
+++   }
+++
+++   if (!c->count_modes)
+++   {
+++       av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
+++       goto fail_conn;
+++   }
+++
+++   {
+++      drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
+++      s->compose.x = crtc->x;
+++      s->compose.y = crtc->y;
+++      s->compose.width = crtc->width;
+++      s->compose.height = crtc->height;
+++      drmModeFreeCrtc(crtc);
+++   }
+++
+++   if (pConId)
+++      *pConId = c->connector_id;
+++   ret = 0;
+++
+++fail_conn:
+++   drmModeFreeConnector(c);
+++
+++fail_res:
+++   drmModeFreeResources(res);
+++
+++   return ret;
+++}
+++
+++// deinit is called if init fails so no need to clean up explicity here
+++static int drm_vout_init(struct AVFormatContext * s)
+++{
+++    drm_display_env_t * const de = s->priv_data;
+++    int rv;
+++
+++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    de->drm_fd = -1;
+++    de->con_id = 0;
+++    de->setup = (struct drm_setup){0};
+++    de->q_terminate = 0;
+++
+++    if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0)
+++    {
+++        rv = AVERROR(errno);
+++        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv));
+++        return rv;
+++    }
+++
+++    if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
+++    {
+++        av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
+++        rv = AVERROR(EINVAL);
+++        goto fail_close;
+++    }
+++
+++    sem_init(&de->q_sem_in, 0, 0);
+++    sem_init(&de->q_sem_out, 0, 0);
+++    if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
+++        rv = AVERROR(errno);
+++        av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv));
+++        goto fail_close;
+++    }
+++
+++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+++
+++    return 0;
+++
+++fail_close:
+++    close(de->drm_fd);
+++    de->drm_fd = -1;
+++    av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
+++
+++    return rv;
+++}
+++
+++static void drm_vout_deinit(struct AVFormatContext * s)
+++{
+++    drm_display_env_t * const de = s->priv_data;
+++
+++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    de->q_terminate = 1;
+++    sem_post(&de->q_sem_in);
+++    pthread_join(de->q_thread, NULL);
+++    sem_destroy(&de->q_sem_in);
+++    sem_destroy(&de->q_sem_out);
+++
+++    for (unsigned int i = 0; i != AUX_SIZE; ++i)
+++        da_uninit(de, de->aux + i);
+++
+++    av_frame_free(&de->q_next);
+++
+++    if (de->drm_fd >= 0) {
+++        close(de->drm_fd);
+++        de->drm_fd = -1;
+++    }
+++
+++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+++}
+++
+++
+++#define OFFSET(x) offsetof(drm_display_env_t, x)
+++static const AVOption options[] = {
+++    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+++    { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
+++    { NULL }
+++};
+++
+++static const AVClass drm_vout_class = {
+++    .class_name = "drm vid outdev",
+++    .item_name  = av_default_item_name,
+++    .option     = options,
+++    .version    = LIBAVUTIL_VERSION_INT,
+++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
+++};
+++
+++FFOutputFormat ff_vout_drm_muxer = {
+++    .p = {
+++        .name           = "vout_drm",
+++        .long_name      = NULL_IF_CONFIG_SMALL("Drm video output device"),
+++        .audio_codec    = AV_CODEC_ID_NONE,
+++        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
+++        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
+++        .priv_class     = &drm_vout_class,
+++    },
+++    .priv_data_size = sizeof(drm_display_env_t),
+++    .write_header   = drm_vout_write_header,
+++    .write_packet   = drm_vout_write_packet,
+++    .write_uncoded_frame = drm_vout_write_frame,
+++    .write_trailer  = drm_vout_write_trailer,
+++    .control_message = drm_vout_control_message,
+++    .init           = drm_vout_init,
+++    .deinit         = drm_vout_deinit,
+++};
+++
++diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c
++new file mode 100644
++index 0000000000..afc7afd13e
++--- /dev/null
+++++ b/libavdevice/egl_vout.c
++@@ -0,0 +1,783 @@
+++/*
+++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
+++ *
+++ * This file is part of FFmpeg.
+++ *
+++ * FFmpeg is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * FFmpeg is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with FFmpeg; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++
+++// *** This module is a work in progress and its utility is strictly
+++//     limited to testing.
+++//     Amongst other issues it doesn't wait for the pic to be displayed before
+++//     returning the buffer so flikering does occur.
+++
+++#include <epoxy/gl.h>
+++#include <epoxy/egl.h>
+++
+++#include "libavutil/opt.h"
+++#include "libavutil/avassert.h"
+++#include "libavutil/pixdesc.h"
+++#include "libavutil/imgutils.h"
+++#include "libavutil/hwcontext_drm.h"
+++#include "libavformat/mux.h"
+++#include "avdevice.h"
+++
+++#include "pthread.h"
+++#include <semaphore.h>
+++#include <stdatomic.h>
+++#include <unistd.h>
+++
+++#include <X11/Xlib.h>
+++#include <X11/Xutil.h>
+++
+++#include "libavutil/rpi_sand_fns.h"
+++
+++#define TRACE_ALL 0
+++
+++struct egl_setup {
+++    int conId;
+++
+++    Display *dpy;
+++    EGLDisplay egl_dpy;
+++    EGLContext ctx;
+++    EGLSurface surf;
+++    Window win;
+++
+++    uint32_t crtcId;
+++    int crtcIdx;
+++    uint32_t planeId;
+++    struct {
+++        int x, y, width, height;
+++    } compose;
+++};
+++
+++typedef struct egl_aux_s {
+++    int fd;
+++    GLuint texture;
+++
+++} egl_aux_t;
+++
+++typedef struct egl_display_env_s {
+++    AVClass *class;
+++
+++    struct egl_setup setup;
+++    enum AVPixelFormat avfmt;
+++
+++    int show_all;
+++    int window_width, window_height;
+++    int window_x, window_y;
+++    int fullscreen;
+++
+++    egl_aux_t aux[32];
+++
+++    pthread_t q_thread;
+++    pthread_mutex_t q_lock;
+++    sem_t display_start_sem;
+++    sem_t q_sem;
+++    int q_terminate;
+++    AVFrame *q_this;
+++    AVFrame *q_next;
+++
+++} egl_display_env_t;
+++
+++
+++/**
+++ * Remove window border/decorations.
+++ */
+++static void
+++no_border(Display *dpy, Window w)
+++{
+++    static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
+++    static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
+++
+++    typedef struct {
+++        unsigned long       flags;
+++        unsigned long       functions;
+++        unsigned long       decorations;
+++        long                inputMode;
+++        unsigned long       status;
+++    } PropMotifWmHints;
+++
+++    PropMotifWmHints motif_hints;
+++    Atom prop, proptype;
+++    unsigned long flags = 0;
+++
+++    /* setup the property */
+++    motif_hints.flags = MWM_HINTS_DECORATIONS;
+++    motif_hints.decorations = flags;
+++
+++    /* get the atom for the property */
+++    prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True);
+++    if (!prop) {
+++        /* something went wrong! */
+++        return;
+++    }
+++
+++    /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
+++    proptype = prop;
+++
+++    XChangeProperty(dpy, w,                         /* display, window */
+++                    prop, proptype,                 /* property, type */
+++                    32,                             /* format: 32-bit datums */
+++                    PropModeReplace,                /* mode */
+++                    (unsigned char *)&motif_hints, /* data */
+++                    PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
+++                   );
+++}
+++
+++
+++/*
+++ * Create an RGB, double-buffered window.
+++ * Return the window and context handles.
+++ */
+++static int
+++make_window(struct AVFormatContext *const s,
+++            egl_display_env_t *const de,
+++            Display *dpy, EGLDisplay egl_dpy, const char *name,
+++            Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
+++{
+++    int scrnum = DefaultScreen(dpy);
+++    XSetWindowAttributes attr;
+++    unsigned long mask;
+++    Window root = RootWindow(dpy, scrnum);
+++    Window win;
+++    EGLContext ctx;
+++    const int fullscreen = de->fullscreen;
+++    EGLConfig config;
+++    int x = de->window_x;
+++    int y = de->window_y;
+++    int width = de->window_width ? de->window_width : 1280;
+++    int height = de->window_height ? de->window_height : 720;
+++
+++
+++    if (fullscreen) {
+++        int scrnum = DefaultScreen(dpy);
+++
+++        x = 0; y = 0;
+++        width = DisplayWidth(dpy, scrnum);
+++        height = DisplayHeight(dpy, scrnum);
+++    }
+++
+++    {
+++        EGLint num_configs;
+++        static const EGLint attribs[] = {
+++            EGL_RED_SIZE, 1,
+++            EGL_GREEN_SIZE, 1,
+++            EGL_BLUE_SIZE, 1,
+++            EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
+++            EGL_NONE
+++        };
+++
+++        if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
+++            av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
+++            return -1;
+++        }
+++    }
+++
+++    {
+++        EGLint vid;
+++        if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
+++            av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
+++            return -1;
+++        }
+++
+++        {
+++            XVisualInfo visTemplate = {
+++                .visualid = vid,
+++            };
+++            int num_visuals;
+++            XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
+++                                                  &visTemplate, &num_visuals);
+++
+++            /* window attributes */
+++            attr.background_pixel = 0;
+++            attr.border_pixel = 0;
+++            attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone);
+++            attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
+++            /* XXX this is a bad way to get a borderless window! */
+++            mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
+++
+++            win = XCreateWindow(dpy, root, x, y, width, height,
+++                                0, visinfo->depth, InputOutput,
+++                                visinfo->visual, mask, &attr);
+++            XFree(visinfo);
+++        }
+++    }
+++
+++    if (fullscreen)
+++        no_border(dpy, win);
+++
+++    /* set hints and properties */
+++    {
+++        XSizeHints sizehints;
+++        sizehints.x = x;
+++        sizehints.y = y;
+++        sizehints.width  = width;
+++        sizehints.height = height;
+++        sizehints.flags = USSize | USPosition;
+++        XSetNormalHints(dpy, win, &sizehints);
+++        XSetStandardProperties(dpy, win, name, name,
+++                               None, (char **)NULL, 0, &sizehints);
+++    }
+++
+++    eglBindAPI(EGL_OPENGL_ES_API);
+++
+++    {
+++        static const EGLint ctx_attribs[] = {
+++            EGL_CONTEXT_CLIENT_VERSION, 2,
+++            EGL_NONE
+++        };
+++        ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs);
+++        if (!ctx) {
+++            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
+++            return -1;
+++        }
+++    }
+++
+++
+++    XMapWindow(dpy, win);
+++
+++    {
+++        EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
+++        if (!surf) {
+++            av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
+++            return -1;
+++        }
+++
+++        if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
+++            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
+++            return -1;
+++        }
+++
+++        *winRet = win;
+++        *ctxRet = ctx;
+++        *surfRet = surf;
+++    }
+++
+++    return 0;
+++}
+++
+++static GLint
+++compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source)
+++{
+++    GLuint s = glCreateShader(target);
+++
+++    if (s == 0) {
+++        av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
+++        return 0;
+++    }
+++
+++    glShaderSource(s, 1, (const GLchar **)&source, NULL);
+++    glCompileShader(s);
+++
+++    {
+++        GLint ok;
+++        glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
+++
+++        if (!ok) {
+++            GLchar *info;
+++            GLint size;
+++
+++            glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
+++            info = malloc(size);
+++
+++            glGetShaderInfoLog(s, size, NULL, info);
+++            av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
+++
+++            return 0;
+++        }
+++    }
+++
+++    return s;
+++}
+++
+++static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs)
+++{
+++    GLuint prog = glCreateProgram();
+++
+++    if (prog == 0) {
+++        av_log(s, AV_LOG_ERROR, "Failed to create program\n");
+++        return 0;
+++    }
+++
+++    glAttachShader(prog, vs);
+++    glAttachShader(prog, fs);
+++    glLinkProgram(prog);
+++
+++    {
+++        GLint ok;
+++        glGetProgramiv(prog, GL_LINK_STATUS, &ok);
+++        if (!ok) {
+++            /* Some drivers return a size of 1 for an empty log.  This is the size
+++             * of a log that contains only a terminating NUL character.
+++             */
+++            GLint size;
+++            GLchar *info = NULL;
+++            glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
+++            if (size > 1) {
+++                info = malloc(size);
+++                glGetProgramInfoLog(prog, size, NULL, info);
+++            }
+++
+++            av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
+++                   (info != NULL) ? info : "<empty log>");
+++            return 0;
+++        }
+++    }
+++
+++    return prog;
+++}
+++
+++static int
+++gl_setup(struct AVFormatContext *const s)
+++{
+++    const char *vs =
+++        "attribute vec4 pos;\n"
+++        "varying vec2 texcoord;\n"
+++        "\n"
+++        "void main() {\n"
+++        "  gl_Position = pos;\n"
+++        "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
+++        "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
+++        "}\n";
+++    const char *fs =
+++        "#extension GL_OES_EGL_image_external : enable\n"
+++        "precision mediump float;\n"
+++        "uniform samplerExternalOES s;\n"
+++        "varying vec2 texcoord;\n"
+++        "void main() {\n"
+++        "  gl_FragColor = texture2D(s, texcoord);\n"
+++        "}\n";
+++
+++    GLuint vs_s;
+++    GLuint fs_s;
+++    GLuint prog;
+++
+++    if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
+++        !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
+++        !(prog = link_program(s, vs_s, fs_s)))
+++        return -1;
+++
+++    glUseProgram(prog);
+++
+++    {
+++        static const float verts[] = {
+++            -1, -1,
+++            1, -1,
+++            1,  1,
+++            -1,  1,
+++        };
+++        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
+++    }
+++
+++    glEnableVertexAttribArray(0);
+++    return 0;
+++}
+++
+++static int egl_vout_write_trailer(AVFormatContext *s)
+++{
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
+++#endif
+++
+++    return 0;
+++}
+++
+++static int egl_vout_write_header(AVFormatContext *s)
+++{
+++    const AVCodecParameters *const par = s->streams[0]->codecpar;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
+++#endif
+++    if (s->nb_streams > 1
+++        || par->codec_type != AVMEDIA_TYPE_VIDEO
+++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
+++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    return 0;
+++}
+++
+++
+++static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame)
+++{
+++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0];
+++    egl_aux_t *da = NULL;
+++    unsigned int i;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
+++#endif
+++
+++    for (i = 0; i != 32; ++i) {
+++        if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
+++            da = de->aux + i;
+++            break;
+++        }
+++    }
+++
+++    if (da == NULL) {
+++        av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    if (da->texture == 0) {
+++        EGLint attribs[50];
+++        EGLint *a = attribs;
+++        int i, j;
+++        static const EGLint anames[] = {
+++            EGL_DMA_BUF_PLANE0_FD_EXT,
+++            EGL_DMA_BUF_PLANE0_OFFSET_EXT,
+++            EGL_DMA_BUF_PLANE0_PITCH_EXT,
+++            EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
+++            EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
+++            EGL_DMA_BUF_PLANE1_FD_EXT,
+++            EGL_DMA_BUF_PLANE1_OFFSET_EXT,
+++            EGL_DMA_BUF_PLANE1_PITCH_EXT,
+++            EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
+++            EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
+++            EGL_DMA_BUF_PLANE2_FD_EXT,
+++            EGL_DMA_BUF_PLANE2_OFFSET_EXT,
+++            EGL_DMA_BUF_PLANE2_PITCH_EXT,
+++            EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
+++            EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
+++        };
+++        const EGLint *b = anames;
+++
+++        *a++ = EGL_WIDTH;
+++        *a++ = av_frame_cropped_width(frame);
+++        *a++ = EGL_HEIGHT;
+++        *a++ = av_frame_cropped_height(frame);
+++        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
+++        *a++ = desc->layers[0].format;
+++
+++        for (i = 0; i < desc->nb_layers; ++i) {
+++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
+++                const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j;
+++                const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index;
+++                *a++ = *b++;
+++                *a++ = obj->fd;
+++                *a++ = *b++;
+++                *a++ = p->offset;
+++                *a++ = *b++;
+++                *a++ = p->pitch;
+++                if (obj->format_modifier == 0) {
+++                    b += 2;
+++                }
+++                else {
+++                    *a++ = *b++;
+++                    *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
+++                    *a++ = *b++;
+++                    *a++ = (EGLint)(obj->format_modifier >> 32);
+++                }
+++            }
+++        }
+++
+++        *a = EGL_NONE;
+++
+++#if TRACE_ALL
+++        for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
+++            av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
+++        }
+++#endif
+++        {
+++            const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
+++                                                     EGL_NO_CONTEXT,
+++                                                     EGL_LINUX_DMA_BUF_EXT,
+++                                                     NULL, attribs);
+++            if (!image) {
+++                av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
+++                return -1;
+++            }
+++
+++            glGenTextures(1, &da->texture);
+++            glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
+++            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+++            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+++            glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
+++
+++            eglDestroyImageKHR(de->setup.egl_dpy, image);
+++        }
+++
+++        da->fd = desc->objects[0].fd;
+++    }
+++
+++    glClearColor(0.5, 0.5, 0.5, 0.5);
+++    glClear(GL_COLOR_BUFFER_BIT);
+++
+++    glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
+++    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+++    eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
+++
+++    glDeleteTextures(1, &da->texture);
+++    da->texture = 0;
+++    da->fd = -1;
+++
+++    return 0;
+++}
+++
+++static void* display_thread(void *v)
+++{
+++    AVFormatContext *const s = v;
+++    egl_display_env_t *const de = s->priv_data;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
+++#endif
+++    {
+++        EGLint egl_major, egl_minor;
+++
+++        de->setup.dpy = XOpenDisplay(NULL);
+++        if (!de->setup.dpy) {
+++            av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
+++            goto fail;
+++        }
+++
+++        de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
+++        if (!de->setup.egl_dpy) {
+++            av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
+++            goto fail;
+++        }
+++
+++        if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
+++            av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
+++            goto fail;
+++        }
+++
+++        av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
+++
+++        if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
+++            av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
+++            goto fail;
+++        }
+++    }
+++
+++    if (!de->window_width || !de->window_height) {
+++        de->window_width = 1280;
+++        de->window_height = 720;
+++    }
+++    if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
+++                    &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
+++        av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
+++        goto fail;
+++    }
+++
+++    if (gl_setup(s)) {
+++        av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
+++        goto fail;
+++    }
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
+++#endif
+++    sem_post(&de->display_start_sem);
+++
+++    for (;;) {
+++        AVFrame *frame;
+++
+++        while (sem_wait(&de->q_sem) != 0) {
+++            av_assert0(errno == EINTR);
+++        }
+++
+++        if (de->q_terminate)
+++            break;
+++
+++        pthread_mutex_lock(&de->q_lock);
+++        frame = de->q_next;
+++        de->q_next = NULL;
+++        pthread_mutex_unlock(&de->q_lock);
+++
+++        do_display(s, de, frame);
+++
+++        av_frame_free(&de->q_this);
+++        de->q_this = frame;
+++    }
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
+++#endif
+++
+++    return NULL;
+++
+++fail:
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
+++#endif
+++    de->q_terminate = 1;
+++    sem_post(&de->display_start_sem);
+++
+++    return NULL;
+++}
+++
+++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
+++{
+++    const AVFrame *const src_frame = (AVFrame *)pkt->data;
+++    AVFrame *frame;
+++    egl_display_env_t *const de = s->priv_data;
+++
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
+++#endif
+++
+++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
+++        frame = av_frame_alloc();
+++        av_frame_ref(frame, src_frame);
+++    }
+++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
+++        frame = av_frame_alloc();
+++        frame->format = AV_PIX_FMT_DRM_PRIME;
+++        if (av_hwframe_map(frame, src_frame, 0) != 0) {
+++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
+++            av_frame_free(&frame);
+++            return AVERROR(EINVAL);
+++        }
+++    }
+++    else {
+++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
+++        return AVERROR(EINVAL);
+++    }
+++
+++    // Really hacky sync
+++    while (de->show_all && de->q_next) {
+++        usleep(3000);
+++    }
+++
+++    pthread_mutex_lock(&de->q_lock);
+++    {
+++        AVFrame *const t = de->q_next;
+++        de->q_next = frame;
+++        frame = t;
+++    }
+++    pthread_mutex_unlock(&de->q_lock);
+++
+++    if (frame == NULL)
+++        sem_post(&de->q_sem);
+++    else
+++        av_frame_free(&frame);
+++
+++    return 0;
+++}
+++
+++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
+++                                unsigned flags)
+++{
+++    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
+++    return AVERROR_PATCHWELCOME;
+++}
+++
+++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
+++{
+++#if TRACE_ALL
+++    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
+++#endif
+++    switch (type) {
+++    case AV_APP_TO_DEV_WINDOW_REPAINT:
+++        return 0;
+++    default:
+++        break;
+++    }
+++    return AVERROR(ENOSYS);
+++}
+++
+++// deinit is called if init fails so no need to clean up explicity here
+++static int egl_vout_init(struct AVFormatContext *s)
+++{
+++    egl_display_env_t *const de = s->priv_data;
+++    unsigned int i;
+++
+++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    de->setup = (struct egl_setup) { 0 };
+++
+++    for (i = 0; i != 32; ++i) {
+++        de->aux[i].fd = -1;
+++    }
+++
+++    de->q_terminate = 0;
+++    pthread_mutex_init(&de->q_lock, NULL);
+++    sem_init(&de->q_sem, 0, 0);
+++    sem_init(&de->display_start_sem, 0, 0);
+++    av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
+++
+++    sem_wait(&de->display_start_sem);
+++    if (de->q_terminate) {
+++        av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
+++        return -1;
+++    }
+++
+++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+++
+++    return 0;
+++}
+++
+++static void egl_vout_deinit(struct AVFormatContext *s)
+++{
+++    egl_display_env_t *const de = s->priv_data;
+++
+++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
+++
+++    de->q_terminate = 1;
+++    sem_post(&de->q_sem);
+++    pthread_join(de->q_thread, NULL);
+++    sem_destroy(&de->q_sem);
+++    pthread_mutex_destroy(&de->q_lock);
+++
+++    av_frame_free(&de->q_next);
+++    av_frame_free(&de->q_this);
+++
+++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
+++}
+++
+++#define OFFSET(x) offsetof(egl_display_env_t, x)
+++static const AVOption options[] = {
+++    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+++    { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
+++    { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+++    { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+++    { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+++    { NULL }
+++
+++};
+++
+++static const AVClass egl_vout_class = {
+++    .class_name = "egl vid outdev",
+++    .item_name  = av_default_item_name,
+++    .option     = options,
+++    .version    = LIBAVUTIL_VERSION_INT,
+++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
+++};
+++
+++FFOutputFormat ff_vout_egl_muxer = {
+++    .p = {
+++        .name           = "vout_egl",
+++        .long_name      = NULL_IF_CONFIG_SMALL("Egl video output device"),
+++        .audio_codec    = AV_CODEC_ID_NONE,
+++        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
+++        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
+++        .priv_class     = &egl_vout_class,
+++    },
+++    .priv_data_size = sizeof(egl_display_env_t),
+++    .write_header   = egl_vout_write_header,
+++    .write_packet   = egl_vout_write_packet,
+++    .write_uncoded_frame = egl_vout_write_frame,
+++    .write_trailer  = egl_vout_write_trailer,
+++    .control_message = egl_vout_control_message,
+++    .init           = egl_vout_init,
+++    .deinit         = egl_vout_deinit,
+++};
+++
++diff --git a/libavfilter/Makefile b/libavfilter/Makefile
++index b3d3d981dd..0e7b5856bd 100644
++--- a/libavfilter/Makefile
+++++ b/libavfilter/Makefile
++@@ -262,6 +262,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
++ OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
++ OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_vpp_qsv.o
++ OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
+++OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER)    += vf_deinterlace_v4l2m2m.o
++ OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
++ OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
++ OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
++@@ -518,6 +519,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)        += vf_transpose_vaapi.o vaapi_vpp.o
++ OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER)       += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
++ OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
++ OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
+++OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
++ OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
++ OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
++                                                 opencl/unsharp.o
++diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
++index b58daa3a3f..1a4cd935f1 100644
++--- a/libavfilter/aarch64/Makefile
+++++ b/libavfilter/aarch64/Makefile
++@@ -1,3 +1,5 @@
+++OBJS-$(CONFIG_BWDIF_FILTER)                  += aarch64/vf_bwdif_init_aarch64.o
++ OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/vf_nlmeans_init.o
++ 
+++NEON-OBJS-$(CONFIG_BWDIF_FILTER)             += aarch64/vf_bwdif_aarch64.o
++ NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/vf_nlmeans_neon.o
++diff --git a/libavfilter/aarch64/vf_bwdif_aarch64.S b/libavfilter/aarch64/vf_bwdif_aarch64.S
++new file mode 100644
++index 0000000000..d6e047dbde
++--- /dev/null
+++++ b/libavfilter/aarch64/vf_bwdif_aarch64.S
++@@ -0,0 +1,410 @@
+++#include "libavutil/aarch64/asm.S"
+++
+++.macro SQSHRUNN b, s0, s1, s2, s3, n
+++        sqshrun         \s0\().4h, \s0\().4s, #\n - 8
+++        sqshrun2        \s0\().8h, \s1\().4s, #\n - 8
+++        sqshrun         \s1\().4h, \s2\().4s, #\n - 8
+++        sqshrun2        \s1\().8h, \s3\().4s, #\n - 8
+++        uzp2            \b\().16b, \s0\().16b, \s1\().16b
+++.endm
+++
+++.macro SMULL4K a0, a1, a2, a3, s0, s1, k
+++        smull           \a0\().4s, \s0\().4h, \k
+++        smull2          \a1\().4s, \s0\().8h, \k
+++        smull           \a2\().4s, \s1\().4h, \k
+++        smull2          \a3\().4s, \s1\().8h, \k
+++.endm
+++
+++.macro UMULL4K a0, a1, a2, a3, s0, s1, k
+++        umull           \a0\().4s, \s0\().4h, \k
+++        umull2          \a1\().4s, \s0\().8h, \k
+++        umull           \a2\().4s, \s1\().4h, \k
+++        umull2          \a3\().4s, \s1\().8h, \k
+++.endm
+++
+++.macro UMLAL4K a0, a1, a2, a3, s0, s1, k
+++        umlal           \a0\().4s, \s0\().4h, \k
+++        umlal2          \a1\().4s, \s0\().8h, \k
+++        umlal           \a2\().4s, \s1\().4h, \k
+++        umlal2          \a3\().4s, \s1\().8h, \k
+++.endm
+++
+++.macro UMLSL4K a0, a1, a2, a3, s0, s1, k
+++        umlsl           \a0\().4s, \s0\().4h, \k
+++        umlsl2          \a1\().4s, \s0\().8h, \k
+++        umlsl           \a2\().4s, \s1\().4h, \k
+++        umlsl2          \a3\().4s, \s1\().8h, \k
+++.endm
+++
+++
+++// void ff_bwdif_filter_line4_aarch64(
+++//         void * dst1,         // x0
+++//         int d_stride,        // w1
+++//         const void * prev1,  // x2
+++//         const void * cur1,   // x3
+++//         const void * next1,  // x4
+++//         int prefs,           // w5
+++//         int w,               // w6
+++//         int parity,          // w7
+++//         int clip_max);       // [sp, #0] (Ignored)
+++
+++// static const uint16_t coef_lf[2] = { 4309, 213 };
+++// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
+++// static const uint16_t coef_sp[2] = { 5077, 981 };
+++
+++        .align 16
+++
+++coeffs:
+++        .hword          4309 * 4, 213 * 4                    // lf[1]*4 = v0.h[1]
+++        .hword          5570, 3801, 1016, -3801              // hf[0] = v0.h[2], -hf[1] =v0.h[5]
+++        .hword          5077, 981
+++
+++function ff_bwdif_filter_line4_aarch64, export=1
+++// #define prev2 cur
+++//        const uint8_t * restrict next2 = parity ? prev : next;
+++        cmp             w7, #0
+++        csel            x17, x2, x4, ne
+++
+++        // We want all the V registers - save all the ones we must
+++        stp             d14, d15, [sp, #-64]!
+++        stp             d8,  d9,  [sp, #48]
+++        stp             d10, d11, [sp, #32]
+++        stp             d12, d13, [sp, #16]
+++
+++        ldr             q0, coeffs
+++
+++        // Some rearrangement of initial values for nice layout of refs
+++        mov             w10, w6                         // w10 = loop count
+++        neg             w9,  w5                         // w9  = mref
+++        lsl             w8,  w9,  #1                    // w8 =  mref2
+++        add             w7,  w9,  w9, LSL #1            // w7  = mref3
+++        lsl             w6,  w9,  #2                    // w6  = mref4
+++        mov             w11, w5                         // w11 = pref
+++        lsl             w12, w5,  #1                    // w12 = pref2
+++        add             w13, w5,  w5, LSL #1            // w13 = pref3
+++        lsl             w14, w5,  #2                    // w14 = pref4
+++        add             w15, w5,  w5, LSL #2            // w15 = pref5
+++        add             w16, w14, w12                   // w16 = pref6
+++
+++        lsl             w5,  w1,  #1                    // w5 = d_stride * 2
+++
+++//         for (x = 0; x < w; x++) {
+++//             int diff0, diff2;
+++//             int d0, d2;
+++//             int temporal_diff0, temporal_diff2;
+++//
+++//             int i1, i2;
+++//             int j1, j2;
+++//             int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
+++
+++10:
+++//             c0 = prev2[0] + next2[0];            // c0 = v20, v21
+++//             d0  = c0 >> 1;                       // d0 = v10
+++//             temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
+++        ldr             q31, [x3]
+++        ldr             q21, [x17]
+++        uhadd           v10.16b, v31.16b, v21.16b
+++        uabd            v11.16b, v31.16b, v21.16b
+++        uaddl           v20.8h,  v21.8b,  v31.8b
+++        uaddl2          v21.8h,  v21.16b, v31.16b
+++
+++        ldr             q31, [x3, w6, SXTW]
+++        ldr             q23, [x17, w6, SXTW]
+++
+++//             i1 = coef_hf[0] * c0;                // i1 = v2-v5
+++        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[2]
+++
+++        ldr             q30, [x3, w14, SXTW]
+++        ldr             q25, [x17, w14, SXTW]
+++
+++//             m4 = prev2[mrefs4] + next2[mrefs4];  // m4 = v22,v23
+++        uaddl           v22.8h,  v23.8b,  v31.8b
+++        uaddl2          v23.8h,  v23.16b, v31.16b
+++
+++//             p4 = prev2[prefs4] + next2[prefs4];  // p4 = v24,v25, (p4 >> 1) = v12
+++        uhadd           v12.16b, v25.16b, v30.16b
+++        uaddl           v24.8h,  v25.8b,  v30.8b
+++        uaddl2          v25.8h,  v25.16b, v30.16b
+++
+++//             j1 = -coef_hf[1] * (c0 + p4);        // j1 = v6-v9  (-c0:v20,v21)
+++        add             v20.8h,  v20.8h,  v24.8h
+++        add             v21.8h,  v21.8h,  v25.8h
+++        SMULL4K         v6, v7, v8, v9, v20, v21, v0.h[5]
+++
+++//             m3 = cur[mrefs3];                    // m3 = v20
+++        ldr             q20, [x3, w7, SXTW]
+++
+++//             p3 = cur[prefs3];                    // p3 = v21
+++        ldr             q21, [x3, w13, SXTW]
+++
+++//             i1 += coef_hf[2] * (m4 + p4);        // (-m4:v22,v23) (-p4:v24,v25)
+++        add             v22.8h,  v22.8h,  v24.8h
+++        add             v23.8h,  v23.8h,  v25.8h
+++        UMLAL4K         v2, v3, v4, v5, v22, v23, v0.h[4]
+++
+++        ldr             q29, [x3, w8, SXTW]
+++        ldr             q23, [x17, w8, SXTW]
+++
+++//             i1 -= coef_lf[1] * 4 * (m3 + p3);   // -
+++        uaddl           v30.8h,  v20.8b,  v21.8b
+++        uaddl2          v31.8h,  v20.16b, v21.16b
+++
+++        ldr             q28, [x3, w16, SXTW]
+++        ldr             q25, [x17, w16, SXTW]
+++
+++        UMLSL4K         v2, v3, v4, v5, v30, v31, v0.h[1]
+++
+++//             m2 = prev2[mrefs2] + next2[mrefs2];  // m2 = v22,v23, (m2 >> 1) = v13
+++        uhadd           v13.16b, v23.16b, v29.16b
+++        uaddl           v22.8h,  v23.8b,  v29.8b
+++        uaddl2          v23.8h,  v23.16b, v29.16b
+++
+++        ldr             q31, [x3, w12, SXTW]
+++        ldr             q27, [x17, w12, SXTW]
+++
+++//             p6 = prev2[prefs6] + next2[prefs6];  // p6 = v24,v25
+++        uaddl           v24.8h,  v25.8b,  v28.8b
+++        uaddl2          v25.8h,  v25.16b, v28.16b
+++
+++//             j1 += coef_hf[2] * (m2 + p6);        // (-p6:v24,v25)
+++        add             v24.8h,  v24.8h,  v22.8h
+++        add             v25.8h,  v25.8h,  v23.8h
+++        UMLAL4K         v6, v7, v8, v9, v24, v25, v0.h[4]
+++
+++//             m1 = cur[mrefs];                     // m1 = v24
+++        ldr             q24, [x3, w9, SXTW]
+++
+++//             p5 = cur[prefs5];                    // p5 = v25
+++        ldr             q25, [x3, w15, SXTW]
+++
+++
+++//             p2 = prev2[prefs2] + next2[prefs2];  // p2 = v26, v27
+++//             temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
+++//             d2  = p2 >> 1;                       // d2 = v15
+++        uabd            v14.16b, v31.16b, v27.16b
+++        uhadd           v15.16b, v31.16b, v27.16b
+++        uaddl           v26.8h,  v27.8b,  v31.8b
+++        uaddl2          v27.8h,  v27.16b, v31.16b
+++
+++//             j1 += coef_hf[0] * p2;               // -
+++        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[2]
+++
+++//             i1 -= coef_hf[1] * (m2 + p2);        // (-m2:v22,v23*) (-p2:v26*,v27*)
+++        add             v22.8h,  v22.8h,  v26.8h
+++        add             v23.8h,  v23.8h,  v27.8h
+++        UMLSL4K         v2, v3, v4, v5, v22, v23, v0.h[3]
+++
+++//             p1 = cur[prefs];                     // p1 = v22
+++        ldr             q22, [x3, w11, SXTW]
+++
+++//             j1 -= coef_lf[1] * 4 * (m1 + p5);    // -
+++        uaddl           v26.8h,  v24.8b,  v25.8b
+++        uaddl2          v27.8h,  v24.16b, v25.16b
+++        UMLSL4K         v6, v7, v8, v9, v26, v27, v0.h[1]
+++
+++//             j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1]  * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16
+++        uaddl           v18.8h,  v22.8b,  v21.8b
+++        uaddl2          v19.8h,  v22.16b, v21.16b
+++        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
+++
+++        uaddl           v18.8h,  v24.8b,  v25.8b
+++        uaddl2          v19.8h,  v24.16b, v25.16b
+++        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
+++
+++        SQSHRUNN        v16, v28, v29, v30, v31, 13
+++
+++//             i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
+++        uaddl           v18.8h,  v22.8b,  v24.8b
+++        uaddl2          v19.8h,  v22.16b, v24.16b
+++        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
+++
+++        uaddl           v18.8h,  v20.8b,  v21.8b
+++        uaddl2          v19.8h,  v20.16b, v21.16b
+++        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
+++
+++        SQSHRUNN        v17, v28, v29, v30, v31, 13
+++
+++//             i1 += coef_lf[0] * 4 * (m1 + p1);    // p1 = v22, m1 = v24
+++        uaddl           v26.8h,  v24.8b,  v22.8b
+++        uaddl2          v27.8h,  v24.16b, v22.16b
+++        UMLAL4K         v2, v3, v4, v5, v26, v27, v0.h[0]
+++
+++        ldr             q31, [x2, w9, SXTW]
+++        ldr             q29, [x4, w9, SXTW]
+++
+++//             j1 += coef_lf[0] * 4 * (p1 + p3);    // p1 = v22, p3 = v21
+++        uaddl           v26.8h,  v21.8b,  v22.8b
+++        uaddl2          v27.8h,  v21.16b, v22.16b
+++        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[0]
+++
+++        ldr             q30, [x2, w11, SXTW]
+++        ldr             q28, [x4, w11, SXTW]
+++
+++//             i1 >>= 15;                            // i1 = v2, -v3, -v4*, -v5*
+++        SQSHRUNN        v2, v2, v3, v4, v5, 15
+++
+++//             j1 >>= 15;                            // j1 = v3, -v6*, -v7*, -v8*, -v9*
+++        SQSHRUNN        v3, v6, v7, v8, v9, 15
+++
+++//             {
+++//                 int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
+++//                 int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
+++        uabd            v30.16b, v22.16b, v30.16b
+++        uabd            v31.16b, v24.16b, v31.16b
+++        uabd            v28.16b, v22.16b, v28.16b
+++        uabd            v29.16b, v24.16b, v29.16b
+++        uhadd           v31.16b, v31.16b, v30.16b
+++        uhadd           v29.16b, v29.16b, v28.16b
+++
+++        ldr             q27, [x2, w13, SXTW]
+++        ldr             q26, [x4, w13, SXTW]
+++
+++//                 diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
+++        ushr            v18.16b, v11.16b, #1
+++        umax            v18.16b, v18.16b, v31.16b
+++        umax            v18.16b, v18.16b, v29.16b
+++
+++//             }                                   // v28, v30 preserved for next block
+++//             {  // tdiff2 = v14
+++//                 int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1;
+++//                 int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1;
+++        uabd            v31.16b, v21.16b, v27.16b
+++        uabd            v29.16b, v21.16b, v26.16b
+++        uhadd           v31.16b, v31.16b, v30.16b
+++        uhadd           v29.16b, v29.16b, v28.16b
+++
+++//                 diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19
+++        ushr            v19.16b, v14.16b, #1
+++        umax            v19.16b, v19.16b, v31.16b
+++        umax            v19.16b, v19.16b, v29.16b
+++
+++//             }
+++//             {        // (m2 >> 1) = v13, m1 = v24, d0 = v10, d2 = v15, p1 = v22, diff0 = v18
+++//                 int b = (m2 >> 1) - m1;
+++//                 int f = d2 - p1;
+++//                 int dc = d0 - m1;
+++//                 int de = d0 - p1;
+++//                 int sp_max = FFMIN(p1 - d0, m1 - d0);
+++        uqsub           v31.16b, v22.16b, v10.16b
+++        uqsub           v29.16b, v24.16b, v10.16b
+++        umin            v29.16b, V31.16b, v29.16b
+++
+++//                 sp_max = FFMIN(sp_max, FFMAX(-b,-f));
+++        uqsub           v30.16b, v24.16b, v13.16b
+++        uqsub           v28.16b, v22.16b, v15.16b
+++        umax            v28.16b, v28.16b, v30.16b
+++        umin            v27.16b, v29.16b, v28.16b
+++
+++//                 int sp_min = FFMIN(d0 - p1, d0 - m1);
+++        uqsub           v31.16b, v10.16b, v22.16b
+++        uqsub           v29.16b, v10.16b, v24.16b
+++        umin            v29.16b, V31.16b, v29.16b
+++
+++//                 sp_min = FFMIN(sp_min, FFMAX(b,f));
+++        uqsub           v30.16b, v13.16b, v24.16b
+++        uqsub           v28.16b, v15.16b, v22.16b
+++        umax            v28.16b, v28.16b, v30.16b
+++        umin            v26.16b, v29.16b, v28.16b
+++
+++//                 diff0 = FFMAX3(diff0, sp_min, sp_max);  // diff0 = v18
+++        umax            v18.16b, v18.16b, v27.16b
+++        umax            v18.16b, v18.16b, v26.16b
+++//             }
+++//             {        // (p4 >> 1) = v12, p3 = v21, d0 = v10, d2 = v15, p1 = v22, diff2 = v19
+++//                 int b = d0 - p1;                  // 1
+++//                 int f = (p4 >> 1) - p3;           // [v23]
+++//                 int dc = d2 - p1;
+++//                 int de = d2 - p3;
+++//                 int sp_max = FFMIN(-de, -dc);
+++        uqsub           v31.16b, v21.16b, v15.16b
+++        uqsub           v29.16b, v22.16b, v15.16b
+++        umin            v29.16b, V31.16b, v29.16b
+++
+++//                 sp_max = FFMIN(sp_max, FFMAX(-b,-f));
+++        uqsub           v30.16b, v22.16b, v10.16b
+++        uqsub           v28.16b, v21.16b, v12.16b
+++        umax            v28.16b, v28.16b, v30.16b
+++        umin            v27.16b, v29.16b, v28.16b
+++
+++//                 int sp_min = FFMIN(de, dc);
+++        uqsub           v31.16b, v15.16b, v21.16b
+++        uqsub           v29.16b, v15.16b, v22.16b
+++        umin            v29.16b, V31.16b, v29.16b
+++
+++//                 sp_min = FFMIN(sp_min, FFMAX(b,f));
+++        uqsub           v30.16b, v10.16b, v22.16b
+++        uqsub           v28.16b, v12.16b, v21.16b
+++        umax            v28.16b, v28.16b, v30.16b
+++        umin            v26.16b, v29.16b, v28.16b
+++
+++//                 diff2 = FFMAX3(diff2, sp_min, sp_max);
+++        umax            v19.16b, v19.16b, v27.16b
+++        umax            v19.16b, v19.16b, v26.16b
+++
+++//             }
+++//
+++//
+++//             {
+++//                 int interpol = FFABS(p1 - p3) > temporal_diff2 ? j1:j2;  // interpol = v6 (-j1:v6) (-j2=v16)
+++        uabd            v31.16b, v22.16b, v21.16b
+++        cmhi            v31.16b, v31.16b, v14.16b
+++        bif             v3.16b,  v16.16b, v31.16b
+++
+++//                 if (interpol > d2 + diff2)
+++//                     interpol = d2 + diff2;
+++        uqadd           v30.16b, v15.16b, v19.16b
+++        umin            v3.16b,  v3.16b,  v30.16b
+++
+++//                 else if (interpol < d2 - diff2)
+++//                     interpol = d2 - diff2;
+++        uqsub           v29.16b, v15.16b, v19.16b
+++        umax            v3.16b,  v3.16b,  v29.16b
+++
+++//                 dst[d_stride * 2] = av_clip_uint8(interpol);
+++        str             q3,  [x0, w5, SXTW]
+++
+++//             }
+++
+++//             dst[d_stride] = p1;
+++        str             q22, [x0, w1, SXTW]
+++
+++
+++//             {
+++//                 int interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2;
+++        uabd            v31.16b, v24.16b, v22.16b       // m1 = v24, p1 = v22
+++        cmhi            v31.16b, v31.16b, v11.16b       // td0 = v11
+++        bif             v2.16b,  v17.16b, v31.16b       // i1 = v2, i2 = v17
+++
+++//                 if (interpol > d0 + diff0)
+++//                     interpol = d0 + diff0;
+++        uqadd           v30.16b, v10.16b, v18.16b       // diff0 = v18
+++        umin            v2.16b,  v2.16b,  v30.16b
+++
+++//                 else if (interpol < d0 - diff0)
+++//                     interpol = d0 - diff0;
+++        uqsub           v29.16b, v10.16b, v18.16b
+++        umax            v2.16b,  v2.16b,  v29.16b
+++//
+++//                 dst[0] = av_clip_uint8(interpol);
+++        str             q2,  [x0], #16
+++//             }
+++//
+++//             dst++;
+++//             cur++;
+++//             prev++;
+++//             prev2++;
+++//             next++;
+++//         }
+++
+++        subs            w10, w10, #16
+++        add             x2,  x2,  #16
+++        add             x3,  x3,  #16
+++        add             x4,  x4,  #16
+++        add             x17, x17, #16
+++        bgt             10b
+++
+++        ldp             d12, d13, [sp, #16]
+++        ldp             d10, d11, [sp, #32]
+++        ldp             d8,  d9,  [sp, #48]
+++        ldp             d14, d15, [sp], #64
+++        ret
++diff --git a/libavfilter/aarch64/vf_bwdif_aarch64.h b/libavfilter/aarch64/vf_bwdif_aarch64.h
++new file mode 100644
++index 0000000000..8d97802e5e
++--- /dev/null
+++++ b/libavfilter/aarch64/vf_bwdif_aarch64.h
++@@ -0,0 +1,8 @@
+++#ifndef AVFILTER_AARCH64_VF_BWDIF_H_
+++#define AVFILTER_AARCH64_VF_BWDIF_H_
+++
+++void ff_bwdif_filter_line4_aarch64(void * dst1, int d_stride,
+++                          const void * prev1, const void * cur1, const void * next1, int prefs,
+++                          int w, int parity, int clip_max);
+++
+++#endif
++diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
++new file mode 100644
++index 0000000000..c5506424c9
++--- /dev/null
+++++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
++@@ -0,0 +1,273 @@
+++#include "libavutil/common.h"
+++#include "libavutil/aarch64/cpu.h"
+++#include "../avfilter.h"
+++#include "../bwdif.h"
+++#include "vf_bwdif_aarch64.h"
+++
+++/*
+++ * Filter coefficients coef_lf and coef_hf taken from BBC PH-2071 (Weston 3 Field Deinterlacer).
+++ * Used when there is spatial and temporal interpolation.
+++ * Filter coefficients coef_sp are used when there is spatial interpolation only.
+++ * Adjusted for matching visual sharpness impression of spatial and temporal interpolation.
+++ */
+++static const uint16_t coef_lf[2] = { 4309, 213 };
+++static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
+++static const uint16_t coef_sp[2] = { 5077, 981 };
+++
+++#define NEXT_LINE()\
+++    dst += d_stride; \
+++    prev += prefs; \
+++    cur  += prefs; \
+++    next += prefs;
+++
+++static void filter_line4_check(void *restrict dst1, int d_stride,
+++                          const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs,
+++                          int w, int parity, int clip_max)
+++{
+++    uint8_t * restrict dst  = dst1;
+++    const uint8_t * restrict prev = prev1;
+++    const uint8_t * restrict cur  = cur1;
+++    const uint8_t * restrict next = next1;
+++
+++    const int mrefs = -prefs;
+++    const int mrefs2 = mrefs * 2;
+++    const int prefs2 = prefs * 2;
+++    const int mrefs3 = mrefs * 3;
+++    const int prefs3 = prefs * 3;
+++    const int mrefs4 = mrefs * 4;
+++    const int prefs4 = prefs * 4;
+++
+++    static int n = 0;
+++    uint64_t buf[2048*4/sizeof(uint64_t)];
+++    int i, j;
+++    static int fail_count = 0;
+++
+++    memset(dst, 0xba, d_stride * 3);
+++    memset(buf, 0xba, d_stride * 3);
+++
+++    ff_bwdif_filter_line4_aarch64(dst, d_stride, prev, cur, next, prefs, w, parity, clip_max);
+++
+++    dst  = (uint8_t*)buf;
+++    prev = prev1;
+++    cur  = cur1;
+++    next = next1;
+++
+++    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
+++                           prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
+++    NEXT_LINE();
+++    memcpy(dst, cur, w);
+++    NEXT_LINE();
+++    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
+++                           prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
+++
+++    for (j = 0; j != 3; ++j)
+++    {
+++        const uint8_t * ref = (uint8_t*)buf + j * d_stride;
+++        const uint8_t * tst = (uint8_t*)dst1 + j * d_stride;
+++        for (i = 0; i != w; ++i)
+++        {
+++            if (ref[i] != tst[i])
+++            {
+++                printf("n=%d, (%d,%d): Ref: %02x, Tst: %02x\n", n, i, j, ref[i], tst[i]);
+++                if (fail_count++ > 16)
+++                    exit(1);
+++            }
+++        }
+++    }
+++
+++    ++n;
+++}
+++
+++static void __attribute__((optimize("tree-vectorize"))) filter_line4_debug(void *restrict dst1, int d_stride,
+++                          const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs,
+++                          int w, int parity, int clip_max)
+++{
+++    uint8_t * restrict dst  = dst1;
+++    const uint8_t * restrict prev = prev1;
+++    const uint8_t * restrict cur  = cur1;
+++    const uint8_t * restrict next = next1;
+++
+++    const int mrefs = -prefs;
+++    const int mrefs2 = mrefs * 2;
+++    const int prefs2 = prefs * 2;
+++    const int mrefs3 = mrefs * 3;
+++    const int prefs3 = prefs * 3;
+++    const int mrefs4 = mrefs * 4;
+++    const int prefs4 = prefs * 4;
+++
+++    static int n = 0;
+++    static int itt = -1;
+++
+++    {
+++        int x;
+++#define prev2 cur
+++        const uint8_t * restrict next2 = parity ? prev : next;
+++
+++        for (x = 0; x < w; x++) {
+++            int diff0, diff2;
+++            int d0, d2;
+++            int temporal_diff0, temporal_diff2;
+++
+++            int i1, i2;
+++            int j1, j2;
+++            int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
+++
+++            if ((x & 15) == 0)
+++                ++itt;
+++
+++//            printf("======= n=%d x=%d [iteration %d.%d] =======\n", n, x, itt, x & 15);
+++            c0 = prev2[0] + next2[0];            // c0 = v20,v26
+++            d0  = c0 >> 1;                       // d0 = v21
+++            temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v9
+++//            printf("c0=%d, d0=%d, temporal_diff0=%d\n", c0, d0, temporal_diff0);
+++            i1 = coef_hf[0] * c0;                // -
+++//            printf("i1=%d\n", i1);
+++            m4 = prev2[mrefs4] + next2[mrefs4];  // m4 = v3,v4
+++            p4 = prev2[prefs4] + next2[prefs4];  // p4 = v5,v6, (p4 >> 1) = v23
+++            j1 = -coef_hf[1] * (c0 + p4);        // (-c0:v20,v26*)
+++//            printf("m4=%d, p4=%d, j1=%d\n", m4, p4, j1);
+++            i1 += coef_hf[2] * (m4 + p4);        // (-m4:v3,v4) (-p4:v5,v6) i1 = v3,v4,v7,v8
+++//            printf("hf2 i1=%d\n", i1);
+++            m3 = cur[mrefs3];                    // m3 = v5
+++            p3 = cur[prefs3];                    // p3 = v10, [f2=v23]
+++            i1 -= coef_lf[1] * 4 * (m3 + p3);   // -
+++//            printf("lf1 i1=%d\n", i1);
+++            m2 = prev2[mrefs2] + next2[mrefs2];  // m2 = v11,v12, (m2 >> 1) = v22
+++            p6 = prev2[prefs4 + prefs2] + next2[prefs4 + prefs2];  // p6=v0,v1
+++            j1 += coef_hf[2] * (m2 + p6);        // (-p6:v0*,v1*), j1 = v13,v14,v15,v16
+++//            printf("hf2 j1=%d\n", j1);
+++            p2 = prev2[prefs2] + next2[prefs2];  // p2 = v17,v18
+++            temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v6
+++            j1 += coef_hf[0] * p2;               // -
+++            d2  = p2 >> 1;                       // d2 = v19
+++            i1 -= coef_hf[1] * (m2 + p2);        // (-m2:v11,v12)
+++//            printf("hf1 i1=%d\n", i1);
+++            m1 = cur[mrefs];                     // m1 = v11, [b0=v22]
+++            p5 = cur[prefs3 + prefs2];           // p5=v2
+++            j1 -= coef_lf[1] * 4 * (m1 + p5);    // -
+++            p1 = cur[prefs];                     // p1 = v12
+++            dst[d_stride] = p1;
+++            j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v2) j2=v2
+++            i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v5) i2=v5
+++            {
+++                int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
+++                int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
+++                diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v24
+++//                printf("tdiff0=%d, t1=%d, t2=%d\n", temporal_diff0, t1, t2);
+++            }
+++            {
+++                int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1;
+++                int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1;
+++                diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v25
+++//                printf("tdiff2=%d, t1=%d, t2=%d\n", temporal_diff2, t1, t2);
+++            }
+++            i1 += coef_lf[0] * 4 * (m1 + p1);    // -
+++            j1 += coef_lf[0] * 4 * (p1 + p3);    // -
+++//            printf("lf0 i1=%d, j1=%d, diff0=%d, diff2=%d\n", i1, j1, diff0, diff2);
+++            {
+++                int b = (m2 >> 1) - m1;           // [v22]
+++                int f = d2 - p1;                  // 1
+++                int dc = d0 - m1;
+++                int de = d0 - p1;
+++                int sp_max = FFMIN(-de, -dc);
+++                int sp_min = FFMIN(de, dc);
+++                sp_max = FFMIN(sp_max, FFMAX(-b,-f));
+++                sp_min = FFMIN(sp_min, FFMAX(b,f));
+++//                printf("spmax0=%d, spmin0=%d, b=%d, f=%d, dc=%d, de=%d\n", sp_max, sp_min, b, f, dc, de);
+++                diff0 = FFMAX3(diff0, sp_min, sp_max);
+++            }
+++            {
+++                int b = d0 - p1;                  // 1
+++                int f = (p4 >> 1) - p3;           // [v23]
+++                int dc = d2 - p1;
+++                int de = d2 - p3;
+++                int sp_max = FFMIN(-de, -dc);
+++                int sp_min = FFMIN(de, dc);
+++                sp_max = FFMIN(sp_max, FFMAX(-b,-f));
+++                sp_min = FFMIN(sp_min, FFMAX(b,f));
+++//                printf("spmax2=%d, spmin2=%d, b=%d, f=%d, dc=%d, de=%d\n", sp_max, sp_min, b, f, dc, de);
+++                diff2 = FFMAX3(diff2, sp_min, sp_max);
+++            }
+++
+++            i1 >>= 15;
+++            j1 >>= 15;
+++
+++//            printf("Final i1=%d, i2=%d, j1=%d, j2=%d\n", i1, i2, j1, j2);
+++
+++
+++            {
+++                int interpol = FFABS(p1 - p3) > temporal_diff2 ? j1:j2;
+++
+++//                printf("diff2=%d, interpol=%d, d2=%d\n", diff2, interpol, d2);
+++
+++                if (interpol > d2 + diff2)
+++                    interpol = d2 + diff2;
+++                else if (interpol < d2 - diff2)
+++                    interpol = d2 - diff2;
+++                dst[d_stride * 2] = av_clip_uint8(interpol);
+++            }
+++            {
+++                int interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2;
+++
+++//                printf("diff0=%d, interpol=%d, d0=%d\n", diff0, interpol, d0);
+++
+++                if (interpol > d0 + diff0)
+++                    interpol = d0 + diff0;
+++                else if (interpol < d0 - diff0)
+++                    interpol = d0 - diff0;
+++
+++                dst[0] = av_clip_uint8(interpol);
+++            }
+++//            printf("dst[0]=%d, dst[2]=%d\n", dst[0], dst[d_stride*2]);
+++
+++            dst++;
+++            cur++;
+++            prev++;
+++            next++;
+++            next2++;
+++//            if (n >= 513 && x >= 719)
+++//            {
+++//                exit(99);
+++//            }
+++        }
+++#undef prev2
+++
+++//        NEXT_LINE();
+++//        memcpy(dst, cur, w);
+++        ++n;
+++    }
+++}
+++
+++
+++void
+++ff_bwdif_init_aarch64(AVFilterContext *ctx)
+++{
+++    const int cpu_flags = av_get_cpu_flags();
+++    BWDIFContext *s = ctx->priv;
+++    YADIFContext *yadif = &s->yadif;
+++
+++    if ((ctx->inputs[0]->w & 31) != 0)
+++    {
+++        av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: w=%d, (needs multiple of 32)\n", ctx->inputs[0]->w);
+++        return;
+++    }
+++    if (yadif->csp->comp[0].depth != 8)
+++    {
+++        av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: bits=%d, (only 8 supported)\n", yadif->csp->comp[0].depth);
+++        return;
+++    }
+++
+++    if (!have_neon(cpu_flags))
+++    {
+++        av_log(ctx, AV_LOG_DEBUG, "Cannot use aarch64 optimization: no NEON!\n");
+++        return;
+++    }
+++
+++    if (yadif->useasm == 3)
+++        s->filter_line4 = filter_line4_check;
+++    else if (yadif->useasm == 2)
+++        s->filter_line4 = filter_line4_debug;
+++    else
+++        s->filter_line4 = ff_bwdif_filter_line4_aarch64;
+++}
+++
++diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
++index d7db46c2af..d504fa1bc8 100644
++--- a/libavfilter/allfilters.c
+++++ b/libavfilter/allfilters.c
++@@ -248,6 +248,7 @@ extern const AVFilter ff_vf_derain;
++ extern const AVFilter ff_vf_deshake;
++ extern const AVFilter ff_vf_deshake_opencl;
++ extern const AVFilter ff_vf_despill;
+++extern const AVFilter ff_vf_deinterlace_v4l2m2m;
++ extern const AVFilter ff_vf_detelecine;
++ extern const AVFilter ff_vf_dilation;
++ extern const AVFilter ff_vf_dilation_opencl;
++@@ -420,6 +421,7 @@ extern const AVFilter ff_vf_scale;
++ extern const AVFilter ff_vf_scale_cuda;
++ extern const AVFilter ff_vf_scale_npp;
++ extern const AVFilter ff_vf_scale_qsv;
+++extern const AVFilter ff_vf_scale_v4l2m2m;
++ extern const AVFilter ff_vf_scale_vaapi;
++ extern const AVFilter ff_vf_scale_vulkan;
++ extern const AVFilter ff_vf_scale2ref;
++@@ -490,6 +492,7 @@ extern const AVFilter ff_vf_trim;
++ extern const AVFilter ff_vf_unpremultiply;
++ extern const AVFilter ff_vf_unsharp;
++ extern const AVFilter ff_vf_unsharp_opencl;
+++extern const AVFilter ff_vf_unsand;
++ extern const AVFilter ff_vf_untile;
++ extern const AVFilter ff_vf_uspp;
++ extern const AVFilter ff_vf_v360;
++diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
++index 306c283f77..d3c82aabf3 100644
++--- a/libavfilter/buffersink.c
+++++ b/libavfilter/buffersink.c
++@@ -62,6 +62,11 @@ typedef struct BufferSinkContext {
++     int sample_rates_size;
++ 
++     AVFrame *peeked_frame;
+++
+++    union {
+++        av_buffersink_alloc_video_frame * video;
+++    } alloc_cb;
+++    void * alloc_v;
++ } BufferSinkContext;
++ 
++ #define NB_ITEMS(list) (list ## _size / sizeof(*list))
++@@ -154,6 +159,44 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx,
++     return get_frame_internal(ctx, frame, 0, nb_samples);
++ }
++ 
+++static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
+++{
+++    AVFilterContext * const ctx = link->dst;
+++    BufferSinkContext * const bs = ctx->priv;
+++    return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
+++        ff_default_get_video_buffer(link, w, h);
+++}
+++
+++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
+++{
+++    BufferSinkContext * const bs = ctx->priv;
+++    bs->alloc_cb.video = cb;
+++    bs->alloc_v = v;
+++    return 0;
+++}
+++
+++#if FF_API_BUFFERSINK_ALLOC
+++AVBufferSinkParams *av_buffersink_params_alloc(void)
+++{
+++    static const int pixel_fmts[] = { AV_PIX_FMT_NONE };
+++    AVBufferSinkParams *params = av_malloc(sizeof(AVBufferSinkParams));
+++    if (!params)
+++        return NULL;
+++
+++    params->pixel_fmts = pixel_fmts;
+++    return params;
+++}
+++
+++AVABufferSinkParams *av_abuffersink_params_alloc(void)
+++{
+++    AVABufferSinkParams *params = av_mallocz(sizeof(AVABufferSinkParams));
+++
+++    if (!params)
+++        return NULL;
+++    return params;
+++}
+++#endif
+++
++ static av_cold int common_init(AVFilterContext *ctx)
++ {
++     BufferSinkContext *buf = ctx->priv;
++@@ -381,6 +424,7 @@ static const AVFilterPad avfilter_vsink_buffer_inputs[] = {
++     {
++         .name = "default",
++         .type = AVMEDIA_TYPE_VIDEO,
+++        .get_buffer = {.video = alloc_video_buffer},
++     },
++ };
++ 
++diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
++index 64e08de53e..09737d322f 100644
++--- a/libavfilter/buffersink.h
+++++ b/libavfilter/buffersink.h
++@@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
++  */
++ int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
++ 
+++typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
+++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
+++
++ /**
++  * @}
++  */
++diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
++index ba17450b93..0dbe5d2335 100644
++--- a/libavfilter/buffersrc.c
+++++ b/libavfilter/buffersrc.c
++@@ -201,7 +201,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
++ 
++         switch (ctx->outputs[0]->type) {
++         case AVMEDIA_TYPE_VIDEO:
++-            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
+++            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
++                                      frame->format, frame->pts);
++             break;
++         case AVMEDIA_TYPE_AUDIO:
++diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
++index 889ff772ed..5ba8006e42 100644
++--- a/libavfilter/bwdif.h
+++++ b/libavfilter/bwdif.h
++@@ -35,8 +35,17 @@ typedef struct BWDIFContext {
++     void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
++                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
++                         int parity, int clip_max, int spat);
+++    void (*filter_line4)(void *dst, int dstride,
+++                         const void *prev, const void *cur, const void *next, int prefs,
+++                         int w, int parity, int clip_max);
++ } BWDIFContext;
++ 
+++void ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
+++                          int w, int prefs, int mrefs, int prefs2, int mrefs2,
+++                          int prefs3, int mrefs3, int prefs4, int mrefs4,
+++                          int parity, int clip_max);
+++
++ void ff_bwdif_init_x86(BWDIFContext *bwdif);
+++void ff_bwdif_init_aarch64(AVFilterContext *ctx);
++ 
++ #endif /* AVFILTER_BWDIF_H */
++diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
++index 65c617ebb3..fbc9491642 100644
++--- a/libavfilter/vf_bwdif.c
+++++ b/libavfilter/vf_bwdif.c
++@@ -38,6 +38,10 @@
++ #include "video.h"
++ #include "bwdif.h"
++ 
+++#include <time.h>
+++#define OPT_TEST 0
+++#define OPT_NEW  0
+++
++ /*
++  * Filter coefficients coef_lf and coef_hf taken from BBC PH-2071 (Weston 3 Field Deinterlacer).
++  * Used when there is spatial and temporal interpolation.
++@@ -74,10 +78,10 @@ typedef struct ThreadData {
++         int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \
++         int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \
++         int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
++- \
+++ {/*\
++         if (!diff) { \
++             dst[0] = d; \
++-        } else {
+++        } else {*/
++ 
++ #define SPAT_CHECK() \
++             int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \
++@@ -89,15 +93,16 @@ typedef struct ThreadData {
++             diff = FFMAX3(diff, min, -max);
++ 
++ #define FILTER_LINE() \
+++            int i1, i2; \
++             SPAT_CHECK() \
++-            if (FFABS(c - e) > temporal_diff0) { \
++-                interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \
+++            /*if (FFABS(c - e) > temporal_diff0)*/ { \
+++                i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \
++                     - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \
++                     + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \
++                     + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
++-            } else { \
++-                interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
++-            }
+++            } /*else*/ { \
+++                i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
+++            }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\
++ 
++ #define FILTER_EDGE() \
++             if (spat) { \
++@@ -111,7 +116,7 @@ typedef struct ThreadData {
++             else if (interpol < d - diff) \
++                 interpol = d - diff; \
++  \
++-            dst[0] = av_clip(interpol, 0, clip_max); \
+++            dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \
++         } \
++  \
++         dst++; \
++@@ -122,7 +127,7 @@ typedef struct ThreadData {
++         next2++; \
++     }
++ 
++-static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
+++static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
++                          int prefs3, int mrefs3, int parity, int clip_max)
++ {
++     uint8_t *dst = dst1;
++@@ -132,7 +137,101 @@ static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
++     FILTER_INTRA()
++ }
++ 
++-static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
+++#if OPT_NEW
+++void __attribute__((optimize("tree-vectorize"))) ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
+++                          int w, int prefs, int mrefs, int prefs2, int mrefs2,
+++                          int prefs3, int mrefs3, int prefs4, int mrefs4,
+++                          int parity, int clip_max)
+++{
+++    if (parity) {
+++        uint8_t * restrict dst   = dst1;
+++        const uint8_t * prev  = prev1;
+++        const uint8_t * cur   = cur1;
+++        const uint8_t * next  = next1;
+++        const uint8_t * prev2 = prev;
+++        const uint8_t * next2 = cur;
+++        int interpol, x;
+++
+++        FILTER1()
+++        FILTER_LINE()
+++        FILTER2()
+++    }
+++    else {
+++        uint8_t * restrict dst   = dst1;
+++        const uint8_t * prev  = prev1;
+++        const uint8_t * cur   = cur1;
+++        const uint8_t * next  = next1;
+++        int interpol, x;
+++#define prev2 cur
+++#define next2 next
+++
+++        for (x = 0; x < w; x++) {
+++            int diff0;
+++            int d0;
+++            int temporal_diff0;
+++
+++            int i1, i2;
+++            int p4, p3, p2, p1, c0, m1, m2, m3, m4;
+++
+++            m4 = prev2[mrefs4] + next2[mrefs4];  // 2
+++            p4 = prev2[prefs4] + next2[prefs4];
+++            i1 = coef_hf[2] * (m4 + p4);
+++            m3 = cur[mrefs3];                    // 1
+++            p3 = cur[prefs3];
+++            i1 += -coef_lf[1] * 4 * (m3 + p3);
+++            m2 = prev2[mrefs2] + next2[mrefs2];  // 2
+++            p2 = prev2[prefs2] + next2[prefs2];  // 2
+++            i1 += -coef_hf[1] * (m2 + p2);
+++            m1 = cur[mrefs];                     // 1
+++            p1 = cur[prefs];                     // 1
+++            c0 = prev2[0] + next2[0];            // 2
+++            i1 += coef_hf[0] * c0;                // 4
+++            d0  = c0 >> 1;                        // 1
+++            temporal_diff0 = FFABS(prev2[0] - next2[0]); // 1
+++            i1 += coef_lf[0] * 4 * (m1 + p1);    // -
+++            {
+++                int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
+++                int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
+++                diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // 1
+++            }
+++            {
+++                int b = (m2 >> 1) - m1;                  // 1
+++                int f = (p2 >> 1) - p1;                  // 1
+++                int dc = d0 - m1;
+++                int de = d0 - p1;
+++                int sp_max = FFMAX(de, dc);
+++                int sp_min = FFMIN(de, dc);
+++                sp_max = FFMAX(sp_max, FFMIN(b,f));
+++                sp_min = FFMIN(sp_min, FFMAX(b,f));
+++                diff0 = FFMAX3(diff0, sp_min, -sp_max);
+++            }
+++
+++            i1 >>= 15;
+++
+++            i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13;
+++
+++
+++            interpol = FFABS(m1 - p1) > temporal_diff0 ? i1:i2;
+++
+++            if (interpol > d0 + diff0)
+++                interpol = d0 + diff0;
+++            else if (interpol < d0 - diff0)
+++                interpol = d0 - diff0;
+++
+++            dst[0] = av_clip_uint8(interpol);
+++
+++            dst++;
+++            cur++;
+++            prev++;
+++            next++;
+++#undef prev2
+++#undef next2
+++        }
+++    }
+++}
+++
+++#else
+++void __attribute__((optimize("tree-vectorize"))) ff_bwdif_filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
++                           int w, int prefs, int mrefs, int prefs2, int mrefs2,
++                           int prefs3, int mrefs3, int prefs4, int mrefs4,
++                           int parity, int clip_max)
++@@ -149,8 +248,34 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
++     FILTER_LINE()
++     FILTER2()
++ }
+++#endif
+++
+++#define NEXT_LINE()\
+++    dst += d_stride; \
+++    prev += prefs; \
+++    cur  += prefs; \
+++    next += prefs;
+++
+++// ***** Temp
+++static void __attribute__((optimize("tree-vectorize"))) filter_line4_c(void *restrict dst1, int d_stride,
+++                          const void *restrict prev1, const void *restrict cur1, const void *restrict next1, int prefs,
+++                          int w, int parity, int clip_max)
+++{
+++    uint8_t * restrict dst  = dst1;
+++    const uint8_t * restrict prev = prev1;
+++    const uint8_t * restrict cur  = cur1;
+++    const uint8_t * restrict next = next1;
+++
+++    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
+++                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
+++    NEXT_LINE();
+++    memcpy(dst, cur, w);
+++    NEXT_LINE();
+++    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
+++                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
+++}
++ 
++-static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
+++static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
++                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
++                         int parity, int clip_max, int spat)
++ {
++@@ -167,7 +292,7 @@ static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
++     FILTER2()
++ }
++ 
++-static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs,
+++static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
++                                int prefs3, int mrefs3, int parity, int clip_max)
++ {
++     uint16_t *dst = dst1;
++@@ -177,7 +302,7 @@ static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mre
++     FILTER_INTRA()
++ }
++ 
++-static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1,
+++static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
++                                 int w, int prefs, int mrefs, int prefs2, int mrefs2,
++                                 int prefs3, int mrefs3, int prefs4, int mrefs4,
++                                 int parity, int clip_max)
++@@ -195,7 +320,7 @@ static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1
++     FILTER2()
++ }
++ 
++-static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1,
+++static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
++                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
++                               int parity, int clip_max, int spat)
++ {
++@@ -244,6 +369,10 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
++                                refs << 1, -(refs << 1),
++                                td->parity ^ td->tff, clip_max,
++                                (y < 2) || ((y + 3) > td->h) ? 0 : 1);
+++            } else if (s->filter_line4 && y + 2 < slice_end && ((y + 7) <= td->h)) {
+++                s->filter_line4(dst, td->frame->linesize[td->plane], prev, cur, next, refs, td->w,
+++                               td->parity ^ td->tff, clip_max);
+++                y += 2;
++             } else {
++                 s->filter_line(dst, prev, cur, next, td->w,
++                                refs, -refs, refs << 1, -(refs << 1),
++@@ -258,6 +387,19 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
++     return 0;
++ }
++ 
+++#if OPT_TEST
+++static unsigned int test_frames = 0;
+++static uint64_t cum_time = 0;
+++static uint64_t min_delta = 99999999;
+++static uint64_t max_delta = 0;
+++static uint64_t utime(void)
+++{
+++    struct timespec ts;
+++    clock_gettime(CLOCK_MONOTONIC, &ts);
+++    return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
+++}
+++#endif
+++
++ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
++                    int parity, int tff)
++ {
++@@ -278,9 +420,23 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
++         td.w     = w;
++         td.h     = h;
++         td.plane = i;
++-
+++#if OPT_TEST
+++        {
+++            const uint64_t now = utime();
+++            uint64_t delta;
+++            filter_slice(ctx, &td, 0, 1);
+++            delta = utime() - now;
+++            ++test_frames;
+++            cum_time += delta;
+++            if (min_delta > delta)
+++                min_delta = delta;
+++            if (max_delta < delta)
+++                max_delta = delta;
+++        }
+++#else
++         ff_filter_execute(ctx, filter_slice, &td, NULL,
++                           FFMIN(h, ff_filter_get_nb_threads(ctx)));
+++#endif
++     }
++     if (yadif->current_field == YADIF_FIELD_END) {
++         yadif->current_field = YADIF_FIELD_NORMAL;
++@@ -297,6 +453,11 @@ static av_cold void uninit(AVFilterContext *ctx)
++     av_frame_free(&yadif->prev);
++     av_frame_free(&yadif->cur );
++     av_frame_free(&yadif->next);
+++#if OPT_TEST
+++    av_log(ctx, AV_LOG_INFO, "Stats: Avg:%"PRIu64", Max:%"PRIu64", Min:%"PRIu64"\n",
+++           test_frames == 0 ? (uint64_t)0 : cum_time / test_frames,
+++           max_delta, min_delta);
+++#endif
++ }
++ 
++ static const enum AVPixelFormat pix_fmts[] = {
++@@ -340,19 +501,27 @@ static int config_props(AVFilterLink *link)
++ 
++     yadif->csp = av_pix_fmt_desc_get(link->format);
++     yadif->filter = filter;
+++    s->filter_line4 = 0;
++     if (yadif->csp->comp[0].depth > 8) {
++         s->filter_intra = filter_intra_16bit;
++         s->filter_line  = filter_line_c_16bit;
++         s->filter_edge  = filter_edge_16bit;
++     } else {
++         s->filter_intra = filter_intra;
++-        s->filter_line  = filter_line_c;
+++        s->filter_line  = ff_bwdif_filter_line_c;
++         s->filter_edge  = filter_edge;
+++        if (yadif->useasm == 0)
+++            s->filter_line4 = filter_line4_c;
++     }
++ 
+++    if (yadif->useasm != 0)
+++    {
++ #if ARCH_X86
++-    ff_bwdif_init_x86(s);
+++        ff_bwdif_init_x86(s);
+++#elif ARCH_AARCH64
+++        ff_bwdif_init_aarch64(ctx);
++ #endif
+++    }
++ 
++     return 0;
++ }
++@@ -377,6 +546,7 @@ static const AVOption bwdif_options[] = {
++     CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,        "deint"),
++     CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"),
++ 
+++    {"useasm", "use asm functions (default true)", OFFSET(useasm), AV_OPT_TYPE_INT, {.i64=1}, 0, 3, FLAGS, NULL },
++     { NULL }
++ };
++ 
++diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
++new file mode 100644
++index 0000000000..a173a291f8
++--- /dev/null
+++++ b/libavfilter/vf_deinterlace_v4l2m2m.c
++@@ -0,0 +1,2102 @@
+++/*
+++ * This file is part of FFmpeg.
+++ *
+++ * FFmpeg is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * FFmpeg is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with FFmpeg; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++/**
+++ * @file
+++ * deinterlace video filter - V4L2 M2M
+++ */
+++
+++#include <drm_fourcc.h>
+++
+++#include <linux/videodev2.h>
+++
+++#include <dirent.h>
+++#include <fcntl.h>
+++#include <poll.h>
+++#include <stdatomic.h>
+++#include <stdio.h>
+++#include <string.h>
+++#include <sys/ioctl.h>
+++#include <sys/mman.h>
+++#include <unistd.h>
+++
+++#include "config.h"
+++
+++#include "libavutil/avassert.h"
+++#include "libavutil/avstring.h"
+++#include "libavutil/common.h"
+++#include "libavutil/hwcontext.h"
+++#include "libavutil/hwcontext_drm.h"
+++#include "libavutil/internal.h"
+++#include "libavutil/mathematics.h"
+++#include "libavutil/opt.h"
+++#include "libavutil/pixdesc.h"
+++#include "libavutil/time.h"
+++
+++#define FF_INTERNAL_FIELDS 1
+++#include "framequeue.h"
+++#include "filters.h"
+++#include "avfilter.h"
+++#include "formats.h"
+++#include "internal.h"
+++#include "scale_eval.h"
+++#include "video.h"
+++
+++#ifndef DRM_FORMAT_P030
+++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
+++#endif
+++
+++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
+++// in drm_fourcc.h hopefully will be sometime in the future but until then...
+++#ifndef V4L2_PIX_FMT_NV12_10_COL128
+++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
+++#endif
+++
+++#ifndef V4L2_PIX_FMT_NV12_COL128
+++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
+++#endif
+++
+++typedef struct V4L2Queue V4L2Queue;
+++typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
+++
+++typedef enum filter_type_v4l2_e
+++{
+++    FILTER_V4L2_DEINTERLACE = 1,
+++    FILTER_V4L2_SCALE,
+++} filter_type_v4l2_t;
+++
+++typedef struct V4L2Buffer {
+++    int enqueued;
+++    int reenqueue;
+++    struct v4l2_buffer buffer;
+++    AVFrame frame;
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+++    int num_planes;
+++    AVDRMFrameDescriptor drm_frame;
+++    V4L2Queue *q;
+++} V4L2Buffer;
+++
+++typedef struct V4L2Queue {
+++    struct v4l2_format format;
+++    struct v4l2_selection sel;
+++    int eos;
+++    int num_buffers;
+++    V4L2Buffer *buffers;
+++    const char * name;
+++    DeintV4L2M2MContextShared *ctx;
+++} V4L2Queue;
+++
+++typedef struct pts_stats_s
+++{
+++    void * logctx;
+++    const char * name;  // For debug
+++    unsigned int last_count;
+++    unsigned int last_interval;
+++    int64_t last_pts;
+++} pts_stats_t;
+++
+++#define PTS_TRACK_SIZE 32
+++typedef struct pts_track_el_s
+++{
+++    uint32_t n;
+++    unsigned int interval;
+++    AVFrame * props;
+++} pts_track_el_t;
+++
+++typedef struct pts_track_s
+++{
+++    uint32_t n;
+++    uint32_t last_n;
+++    int got_2;
+++    void * logctx;
+++    pts_stats_t stats;
+++    pts_track_el_t a[PTS_TRACK_SIZE];
+++} pts_track_t;
+++
+++typedef enum drain_state_e
+++{
+++    DRAIN_NONE = 0,     // Not draining
+++    DRAIN_TIMEOUT,      // Drain until normal timeout setup yields no frame
+++    DRAIN_LAST,         // Drain with long timeout last_frame in received on output expected
+++    DRAIN_EOS,          // Drain with long timeout EOS expected
+++    DRAIN_DONE          // Drained
+++} drain_state_t;
+++
+++typedef struct DeintV4L2M2MContextShared {
+++    void * logctx;  // For logging - will be NULL when done
+++    filter_type_v4l2_t filter_type;
+++
+++    int fd;
+++    int done;   // fd closed - awating all refs dropped
+++    int width;
+++    int height;
+++
+++    int drain;          // EOS received (inlink status)
+++    drain_state_t drain_state;
+++    int64_t drain_pts;  // PTS associated with inline status
+++
+++    unsigned int frames_rx;
+++    unsigned int frames_tx;
+++
+++    // from options
+++    int output_width;
+++    int output_height;
+++    enum AVPixelFormat output_format;
+++
+++    int has_enc_stop;
+++    // We expect to get exactly the same number of frames out as we put in
+++    // We can drain by matching input to output
+++    int one_to_one;
+++
+++    int orig_width;
+++    int orig_height;
+++    atomic_uint refcount;
+++
+++    AVBufferRef *hw_frames_ctx;
+++
+++    unsigned int field_order;
+++
+++    pts_track_t track;
+++
+++    V4L2Queue output;
+++    V4L2Queue capture;
+++} DeintV4L2M2MContextShared;
+++
+++typedef struct DeintV4L2M2MContext {
+++    const AVClass *class;
+++
+++    DeintV4L2M2MContextShared *shared;
+++
+++    char * w_expr;
+++    char * h_expr;
+++    char * output_format_string;;
+++
+++    int force_original_aspect_ratio;
+++    int force_divisible_by;
+++
+++    char *colour_primaries_string;
+++    char *colour_transfer_string;
+++    char *colour_matrix_string;
+++    int   colour_range;
+++    char *chroma_location_string;
+++
+++    enum AVColorPrimaries colour_primaries;
+++    enum AVColorTransferCharacteristic colour_transfer;
+++    enum AVColorSpace colour_matrix;
+++    enum AVChromaLocation chroma_location;
+++} DeintV4L2M2MContext;
+++
+++
+++static inline int drain_frame_expected(const drain_state_t d)
+++{
+++    return d == DRAIN_EOS || d == DRAIN_LAST;
+++}
+++
+++// These just list the ones we know we can cope with
+++static uint32_t
+++fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
+++{
+++    switch (avfmt) {
+++    case AV_PIX_FMT_YUV420P:
+++        return V4L2_PIX_FMT_YUV420;
+++    case AV_PIX_FMT_NV12:
+++        return V4L2_PIX_FMT_NV12;
+++#if CONFIG_SAND
+++    case AV_PIX_FMT_RPI4_8:
+++    case AV_PIX_FMT_SAND128:
+++        return V4L2_PIX_FMT_NV12_COL128;
+++#endif
+++    default:
+++        break;
+++    }
+++    return 0;
+++}
+++
+++static enum AVPixelFormat
+++fmt_v4l2_to_av(const uint32_t pixfmt)
+++{
+++    switch (pixfmt) {
+++    case V4L2_PIX_FMT_YUV420:
+++        return AV_PIX_FMT_YUV420P;
+++    case V4L2_PIX_FMT_NV12:
+++        return AV_PIX_FMT_NV12;
+++#if CONFIG_SAND
+++    case V4L2_PIX_FMT_NV12_COL128:
+++        return AV_PIX_FMT_RPI4_8;
+++#endif
+++    default:
+++        break;
+++    }
+++    return AV_PIX_FMT_NONE;
+++}
+++
+++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
+++{
+++    return stats->last_interval;
+++}
+++
+++// Pick 64 for max last count - that is >1sec at 60fps
+++#define STATS_LAST_COUNT_MAX 64
+++#define STATS_INTERVAL_MAX (1 << 30)
+++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
+++{
+++    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
+++        if (stats->last_count < STATS_LAST_COUNT_MAX)
+++            ++stats->last_count;
+++        return;
+++    }
+++
+++    if (stats->last_pts != AV_NOPTS_VALUE) {
+++        const int64_t interval = pts - stats->last_pts;
+++
+++        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
+++            stats->last_count >= STATS_LAST_COUNT_MAX) {
+++            if (stats->last_interval != 0)
+++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
+++                       __func__, stats->name, interval, stats->last_count);
+++            stats->last_interval = 0;
+++        }
+++        else {
+++            const int64_t frame_time = interval / (int64_t)stats->last_count;
+++
+++            if (frame_time != stats->last_interval)
+++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
+++                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
+++            stats->last_interval = frame_time;
+++        }
+++    }
+++
+++    stats->last_pts = pts;
+++    stats->last_count = 1;
+++}
+++
+++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
+++{
+++    *stats = (pts_stats_t){
+++        .logctx = logctx,
+++        .name = name,
+++        .last_count = 1,
+++        .last_interval = 0,
+++        .last_pts = AV_NOPTS_VALUE
+++    };
+++}
+++
+++static inline uint32_t pts_track_next_n(pts_track_t * const trk)
+++{
+++    if (++trk->n == 0)
+++        trk->n = 1;
+++    return trk->n;
+++}
+++
+++static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
+++{
+++    uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
+++    pts_track_el_t * t;
+++
+++    // As a first guess assume that n==0 means last frame
+++    if (n == 0) {
+++        n = trk->last_n;
+++        if (n == 0)
+++            goto fail;
+++    }
+++
+++    t = trk->a + (n & (PTS_TRACK_SIZE - 1));
+++
+++    if (t->n != n) {
+++        av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
+++        goto fail;
+++    }
+++
+++    // 1st frame is simple - just believe it
+++    if (n != trk->last_n) {
+++        trk->last_n = n;
+++        trk->got_2 = 0;
+++        return av_frame_copy_props(dst, t->props);
+++    }
+++
+++    // Only believe in a single interpolated frame
+++    if (trk->got_2)
+++        goto fail;
+++    trk->got_2 = 1;
+++
+++    av_frame_copy_props(dst, t->props);
+++
+++
+++    // If we can't guess - don't
+++    if (t->interval == 0) {
+++        dst->best_effort_timestamp = AV_NOPTS_VALUE;
+++        dst->pts = AV_NOPTS_VALUE;
+++        dst->pkt_dts = AV_NOPTS_VALUE;
+++    }
+++    else {
+++        if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
+++            dst->best_effort_timestamp += t->interval / 2;
+++        if (dst->pts != AV_NOPTS_VALUE)
+++            dst->pts += t->interval / 2;
+++        if (dst->pkt_dts != AV_NOPTS_VALUE)
+++            dst->pkt_dts += t->interval / 2;
+++    }
+++
+++    return 0;
+++
+++fail:
+++    trk->last_n = 0;
+++    trk->got_2 = 0;
+++    dst->pts = AV_NOPTS_VALUE;
+++    dst->pkt_dts = AV_NOPTS_VALUE;
+++    return 0;
+++}
+++
+++// We are only ever expecting in-order frames so nothing more clever is required
+++static unsigned int
+++pts_track_count(const pts_track_t * const trk)
+++{
+++    return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
+++}
+++
+++static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
+++{
+++    const uint32_t n = pts_track_next_n(trk);
+++    pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
+++
+++    pts_stats_add(&trk->stats, src->pts);
+++
+++    t->n = n;
+++    t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
+++    av_frame_unref(t->props);
+++    av_frame_copy_props(t->props, src);
+++
+++    // We now know what the previous interval was, rather than having to guess,
+++    // so set it.  There is a better than decent chance that this is before
+++    // we use it.
+++    if (t->interval != 0) {
+++        pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
+++        prev_t->interval = t->interval;
+++    }
+++
+++    // In case deinterlace interpolates frames use every other usec
+++    return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
+++}
+++
+++static void pts_track_uninit(pts_track_t * const trk)
+++{
+++    unsigned int i;
+++    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
+++        trk->a[i].n = 0;
+++        av_frame_free(&trk->a[i].props);
+++    }
+++}
+++
+++static int pts_track_init(pts_track_t * const trk, void *logctx)
+++{
+++    unsigned int i;
+++    trk->n = 1;
+++    pts_stats_init(&trk->stats, logctx, "track");
+++    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
+++        trk->a[i].n = 0;
+++        if ((trk->a[i].props = av_frame_alloc()) == NULL) {
+++            pts_track_uninit(trk);
+++            return AVERROR(ENOMEM);
+++        }
+++    }
+++    return 0;
+++}
+++
+++static inline uint32_t
+++fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline;
+++}
+++
+++static inline uint32_t
+++fmt_height(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+++}
+++
+++static inline uint32_t
+++fmt_width(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+++}
+++
+++static inline uint32_t
+++fmt_pixelformat(const struct v4l2_format * const fmt)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
+++}
+++
+++static inline uint32_t
+++buf_bytesused0(const struct v4l2_buffer * const buf)
+++{
+++    return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
+++}
+++
+++static void
+++init_format(V4L2Queue * const q, const uint32_t format_type)
+++{
+++    memset(&q->format, 0, sizeof(q->format));
+++    memset(&q->sel,    0, sizeof(q->sel));
+++    q->format.type = format_type;
+++    q->sel.type    = format_type;
+++}
+++
+++static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
+++{
+++    struct v4l2_capability cap;
+++    int ret;
+++
+++    memset(&cap, 0, sizeof(cap));
+++    ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
+++    if (ret < 0)
+++        return ret;
+++
+++    if (ctx->filter_type == FILTER_V4L2_SCALE &&
+++        strcmp("bcm2835-codec-isp", cap.card) != 0)
+++    {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
+++        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+++        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+++    }
+++    else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
+++        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+++        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT);
+++    }
+++    else {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    return 0;
+++}
+++
+++// Just use for probe - doesn't modify q format
+++static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
+++{
+++    struct v4l2_format fmt         = {.type = queue->format.type};
+++    DeintV4L2M2MContextShared *ctx = queue->ctx;
+++    int ret, field;
+++    // Pick YUV to test with if not otherwise specified
+++    uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
+++    enum AVPixelFormat r_avfmt;
+++
+++
+++    ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
+++    if (ret)
+++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
+++
+++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
+++        field = V4L2_FIELD_INTERLACED_TB;
+++    else
+++        field = V4L2_FIELD_NONE;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+++        fmt.fmt.pix_mp.pixelformat = pixelformat;
+++        fmt.fmt.pix_mp.field = field;
+++        fmt.fmt.pix_mp.width = width;
+++        fmt.fmt.pix_mp.height = height;
+++    } else {
+++        fmt.fmt.pix.pixelformat = pixelformat;
+++        fmt.fmt.pix.field = field;
+++        fmt.fmt.pix.width = width;
+++        fmt.fmt.pix.height = height;
+++    }
+++
+++    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
+++         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
+++         fmt.fmt.pix_mp.pixelformat,
+++         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
+++
+++    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
+++    if (ret)
+++        return AVERROR(EINVAL);
+++
+++    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
+++         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
+++         fmt.fmt.pix_mp.pixelformat,
+++         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
+++
+++    r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
+++    if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
+++        return AVERROR(EINVAL);
+++    }
+++    if (r_avfmt == AV_PIX_FMT_NONE) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
+++        return AVERROR(EINVAL);
+++    }
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
+++        if (fmt.fmt.pix_mp.field != field) {
+++            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
+++
+++            return AVERROR(EINVAL);
+++        }
+++    } else {
+++        if (fmt.fmt.pix.field != field) {
+++            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
+++
+++            return AVERROR(EINVAL);
+++        }
+++    }
+++
+++    return 0;
+++}
+++
+++static int
+++do_s_fmt(V4L2Queue * const q)
+++{
+++    DeintV4L2M2MContextShared * const ctx = q->ctx;
+++    const uint32_t pixelformat = fmt_pixelformat(&q->format);
+++    int ret;
+++
+++    ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
+++    if (ret) {
+++        ret = AVERROR(errno);
+++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
+++        return ret;
+++    }
+++
+++    if (pixelformat != fmt_pixelformat(&q->format)) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
+++        return AVERROR(EINVAL);
+++    }
+++
+++    q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
+++    q->sel.flags  = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
+++
+++    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
+++    if (ret) {
+++        ret = AVERROR(errno);
+++        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
+++    }
+++
+++    return 0;
+++}
+++
+++static void
+++set_fmt_color(struct v4l2_format *const fmt,
+++               const enum AVColorPrimaries avcp,
+++               const enum AVColorSpace avcs,
+++               const enum AVColorTransferCharacteristic avxc)
+++{
+++    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
+++    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
+++    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
+++
+++    switch (avcp) {
+++    case AVCOL_PRI_BT709:
+++        cs = V4L2_COLORSPACE_REC709;
+++        ycbcr = V4L2_YCBCR_ENC_709;
+++        break;
+++    case AVCOL_PRI_BT470M:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+++        ycbcr = V4L2_YCBCR_ENC_601;
+++        break;
+++    case AVCOL_PRI_BT470BG:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+++        break;
+++    case AVCOL_PRI_SMPTE170M:
+++        cs = V4L2_COLORSPACE_SMPTE170M;
+++        break;
+++    case AVCOL_PRI_SMPTE240M:
+++        cs = V4L2_COLORSPACE_SMPTE240M;
+++        break;
+++    case AVCOL_PRI_BT2020:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        break;
+++    case AVCOL_PRI_SMPTE428:
+++    case AVCOL_PRI_SMPTE431:
+++    case AVCOL_PRI_SMPTE432:
+++    case AVCOL_PRI_EBU3213:
+++    case AVCOL_PRI_RESERVED:
+++    case AVCOL_PRI_FILM:
+++    case AVCOL_PRI_UNSPECIFIED:
+++    default:
+++        break;
+++    }
+++
+++    switch (avcs) {
+++    case AVCOL_SPC_RGB:
+++        cs = V4L2_COLORSPACE_SRGB;
+++        break;
+++    case AVCOL_SPC_BT709:
+++        cs = V4L2_COLORSPACE_REC709;
+++        break;
+++    case AVCOL_SPC_FCC:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
+++        break;
+++    case AVCOL_SPC_BT470BG:
+++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
+++        break;
+++    case AVCOL_SPC_SMPTE170M:
+++        cs = V4L2_COLORSPACE_SMPTE170M;
+++        break;
+++    case AVCOL_SPC_SMPTE240M:
+++        cs = V4L2_COLORSPACE_SMPTE240M;
+++        break;
+++    case AVCOL_SPC_BT2020_CL:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
+++        break;
+++    case AVCOL_SPC_BT2020_NCL:
+++        cs = V4L2_COLORSPACE_BT2020;
+++        break;
+++    default:
+++        break;
+++    }
+++
+++    switch (xfer) {
+++    case AVCOL_TRC_BT709:
+++        xfer = V4L2_XFER_FUNC_709;
+++        break;
+++    case AVCOL_TRC_IEC61966_2_1:
+++        xfer = V4L2_XFER_FUNC_SRGB;
+++        break;
+++    case AVCOL_TRC_SMPTE240M:
+++        xfer = V4L2_XFER_FUNC_SMPTE240M;
+++        break;
+++    case AVCOL_TRC_SMPTE2084:
+++        xfer = V4L2_XFER_FUNC_SMPTE2084;
+++        break;
+++    default:
+++        break;
+++    }
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        fmt->fmt.pix_mp.colorspace = cs;
+++        fmt->fmt.pix_mp.ycbcr_enc = ycbcr;
+++        fmt->fmt.pix_mp.xfer_func = xfer;
+++    } else {
+++        fmt->fmt.pix.colorspace = cs;
+++        fmt->fmt.pix.ycbcr_enc = ycbcr;
+++        fmt->fmt.pix.xfer_func = xfer;
+++    }
+++}
+++
+++static void
+++set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr)
+++{
+++    const enum v4l2_quantization q =
+++        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
+++        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
+++            V4L2_QUANTIZATION_DEFAULT;
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        fmt->fmt.pix_mp.quantization = q;
+++    } else {
+++        fmt->fmt.pix.quantization = q;
+++    }
+++}
+++
+++static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt)
+++{
+++    enum v4l2_ycbcr_encoding ycbcr;
+++    enum v4l2_colorspace cs;
+++
+++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.colorspace :
+++        fmt->fmt.pix.colorspace;
+++
+++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.ycbcr_enc:
+++        fmt->fmt.pix.ycbcr_enc;
+++
+++    switch(ycbcr) {
+++    case V4L2_YCBCR_ENC_XV709:
+++    case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709;
+++    case V4L2_YCBCR_ENC_XV601:
+++    case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M;
+++    default:
+++        break;
+++    }
+++
+++    switch(cs) {
+++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG;
+++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M;
+++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M;
+++    case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020;
+++    default:
+++        break;
+++    }
+++
+++    return AVCOL_PRI_UNSPECIFIED;
+++}
+++
+++static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt)
+++{
+++    enum v4l2_ycbcr_encoding ycbcr;
+++    enum v4l2_colorspace cs;
+++
+++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.colorspace :
+++        fmt->fmt.pix.colorspace;
+++
+++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.ycbcr_enc:
+++        fmt->fmt.pix.ycbcr_enc;
+++
+++    switch(cs) {
+++    case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
+++    case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
+++    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
+++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
+++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
+++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
+++    case V4L2_COLORSPACE_BT2020:
+++        if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
+++            return AVCOL_SPC_BT2020_CL;
+++        else
+++             return AVCOL_SPC_BT2020_NCL;
+++    default:
+++        break;
+++    }
+++
+++    return AVCOL_SPC_UNSPECIFIED;
+++}
+++
+++static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt)
+++{
+++    enum v4l2_ycbcr_encoding ycbcr;
+++    enum v4l2_xfer_func xfer;
+++    enum v4l2_colorspace cs;
+++
+++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.colorspace :
+++        fmt->fmt.pix.colorspace;
+++
+++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.ycbcr_enc:
+++        fmt->fmt.pix.ycbcr_enc;
+++
+++    xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.xfer_func:
+++        fmt->fmt.pix.xfer_func;
+++
+++    switch (xfer) {
+++    case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709;
+++    case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1;
+++    default:
+++        break;
+++    }
+++
+++    switch (cs) {
+++    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22;
+++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28;
+++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M;
+++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M;
+++    default:
+++        break;
+++    }
+++
+++    switch (ycbcr) {
+++    case V4L2_YCBCR_ENC_XV709:
+++    case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG;
+++    default:
+++        break;
+++    }
+++
+++    return AVCOL_TRC_UNSPECIFIED;
+++}
+++
+++static enum AVColorRange get_color_range(const struct v4l2_format *const fmt)
+++{
+++    enum v4l2_quantization qt;
+++
+++    qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
+++        fmt->fmt.pix_mp.quantization :
+++        fmt->fmt.pix.quantization;
+++
+++    switch (qt) {
+++    case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
+++    case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
+++    default:
+++        break;
+++    }
+++
+++     return AVCOL_RANGE_UNSPECIFIED;
+++}
+++
+++static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
+++{
+++    struct v4l2_format *const format = &q->format;
+++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
+++
+++    const uint32_t drm_fmt = src->layers[0].format;
+++    // Treat INVALID as LINEAR
+++    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
+++        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
+++    uint32_t pix_fmt = 0;
+++    uint32_t w = 0;
+++    uint32_t h = 0;
+++    uint32_t bpl = src->layers[0].planes[0].pitch;
+++
+++    // We really don't expect multiple layers
+++    // All formats that we currently cope with are single object
+++
+++    if (src->nb_layers != 1 || src->nb_objects != 1)
+++        return AVERROR(EINVAL);
+++
+++    switch (drm_fmt) {
+++        case DRM_FORMAT_YUV420:
+++            if (mod == DRM_FORMAT_MOD_LINEAR) {
+++                if (src->layers[0].nb_planes != 3)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_YUV420;
+++                h = src->layers[0].planes[1].offset / bpl;
+++                w = bpl;
+++            }
+++            break;
+++
+++        case DRM_FORMAT_NV12:
+++            if (mod == DRM_FORMAT_MOD_LINEAR) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_NV12;
+++                h = src->layers[0].planes[1].offset / bpl;
+++                w = bpl;
+++            }
+++#if CONFIG_SAND
+++            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
+++                w = bpl;
+++                h = src->layers[0].planes[1].offset / 128;
+++                bpl = fourcc_mod_broadcom_param(mod);
+++            }
+++#endif
+++            break;
+++
+++        case DRM_FORMAT_P030:
+++#if CONFIG_SAND
+++            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
+++                if (src->layers[0].nb_planes != 2)
+++                    break;
+++                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
+++                w = bpl / 2;  // Matching lie to how we construct this
+++                h = src->layers[0].planes[1].offset / 128;
+++                bpl = fourcc_mod_broadcom_param(mod);
+++            }
+++#endif
+++            break;
+++
+++        default:
+++            break;
+++    }
+++
+++    if (!pix_fmt)
+++        return AVERROR(EINVAL);
+++
+++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
+++        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
+++
+++        pix->width = w;
+++        pix->height = h;
+++        pix->pixelformat = pix_fmt;
+++        pix->plane_fmt[0].bytesperline = bpl;
+++        pix->num_planes = 1;
+++    }
+++    else {
+++        struct v4l2_pix_format *const pix = &format->fmt.pix;
+++
+++        pix->width = w;
+++        pix->height = h;
+++        pix->pixelformat = pix_fmt;
+++        pix->bytesperline = bpl;
+++    }
+++
+++    set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
+++    set_fmt_color_range(format, frame->color_range);
+++
+++    q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right);
+++    q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom);
+++    q->sel.r.left = frame->crop_left;
+++    q->sel.r.top = frame->crop_top;
+++
+++    return 0;
+++}
+++
+++
+++static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
+++{
+++    struct v4l2_format * const fmt   = &queue->format;
+++    struct v4l2_selection *const sel = &queue->sel;
+++
+++    memset(&fmt->fmt, 0, sizeof(fmt->fmt));
+++
+++    // Align w/h to 16 here in case there are alignment requirements at the next
+++    // stage of the filter chain (also RPi deinterlace setup is bust and this
+++    // fixes it)
+++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
+++        fmt->fmt.pix_mp.pixelformat = pixelformat;
+++        fmt->fmt.pix_mp.field = field;
+++        fmt->fmt.pix_mp.width = FFALIGN(width, 16);
+++        fmt->fmt.pix_mp.height = FFALIGN(height, 16);
+++    } else {
+++        fmt->fmt.pix.pixelformat = pixelformat;
+++        fmt->fmt.pix.field = field;
+++        fmt->fmt.pix.width = FFALIGN(width, 16);
+++        fmt->fmt.pix.height = FFALIGN(height, 16);
+++    }
+++
+++    set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer);
+++    set_fmt_color_range(fmt, priv->colour_range);
+++
+++    sel->r.width = width;
+++    sel->r.height = height;
+++    sel->r.left = 0;
+++    sel->r.top = 0;
+++
+++    return do_s_fmt(queue);
+++}
+++
+++static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
+++{
+++    int ret;
+++
+++    ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
+++    if (ctx->fd < 0)
+++        return AVERROR(errno);
+++
+++    ret = deint_v4l2m2m_prepare_context(ctx);
+++    if (ret) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
+++        goto fail;
+++    }
+++
+++    ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
+++    if (ret) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
+++        goto fail;
+++    }
+++
+++    ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
+++    if (ret) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
+++        goto fail;
+++    }
+++
+++    return 0;
+++
+++fail:
+++    close(ctx->fd);
+++    ctx->fd = -1;
+++
+++    return ret;
+++}
+++
+++static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
+++{
+++    int ret = AVERROR(EINVAL);
+++    struct dirent *entry;
+++    char node[PATH_MAX];
+++    DIR *dirp;
+++
+++    dirp = opendir("/dev");
+++    if (!dirp)
+++        return AVERROR(errno);
+++
+++    for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
+++
+++        if (strncmp(entry->d_name, "video", 5))
+++            continue;
+++
+++        snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
+++        ret = deint_v4l2m2m_probe_device(ctx, node);
+++        if (!ret)
+++            break;
+++    }
+++
+++    closedir(dirp);
+++
+++    if (ret) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
+++        ctx->fd = -1;
+++
+++        return ret;
+++    }
+++
+++    av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
+++
+++    return 0;
+++}
+++
+++static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
+++{
+++    int ret;
+++
+++    ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
+++    if (ret < 0)
+++        return AVERROR(errno);
+++
+++    buf->enqueued = 1;
+++
+++    return 0;
+++}
+++
+++static void
+++drm_frame_init(AVDRMFrameDescriptor * const d)
+++{
+++    unsigned int i;
+++    for (i = 0; i != AV_DRM_MAX_PLANES; ++i) {
+++        d->objects[i].fd = -1;
+++    }
+++}
+++
+++static void
+++drm_frame_uninit(AVDRMFrameDescriptor * const d)
+++{
+++    unsigned int i;
+++    for (i = 0; i != d->nb_objects; ++i) {
+++        if (d->objects[i].fd != -1) {
+++            close(d->objects[i].fd);
+++            d->objects[i].fd = -1;
+++        }
+++    }
+++}
+++
+++static void
+++avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n)
+++{
+++    unsigned int i;
+++    V4L2Buffer* const avbufs = *ppavbufs;
+++
+++    if (avbufs == NULL)
+++        return;
+++    *ppavbufs = NULL;
+++
+++    for (i = 0; i != n; ++i) {
+++        V4L2Buffer* const avbuf = avbufs + i;
+++        drm_frame_uninit(&avbuf->drm_frame);
+++    }
+++
+++    av_free(avbufs);
+++}
+++
+++static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
+++{
+++    struct v4l2_exportbuffer expbuf;
+++    int i, ret;
+++    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
+++
+++    AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
+++    AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
+++    const struct v4l2_format *const fmt = &q->format;
+++    const uint32_t height = fmt_height(fmt);
+++    ptrdiff_t bpl0;
+++
+++    /* fill the DRM frame descriptor */
+++    drm_desc->nb_layers = 1;
+++    layer->nb_planes = avbuf->num_planes;
+++
+++    for (int i = 0; i < avbuf->num_planes; i++) {
+++        layer->planes[i].object_index = i;
+++        layer->planes[i].offset = 0;
+++        layer->planes[i].pitch = fmt_bpl(fmt, i);
+++    }
+++    bpl0 = layer->planes[0].pitch;
+++
+++    switch (fmt_pixelformat(fmt)) {
+++#if CONFIG_SAND
+++        case V4L2_PIX_FMT_NV12_COL128:
+++            mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
+++            layer->format = V4L2_PIX_FMT_NV12;
+++
+++            if (avbuf->num_planes > 1)
+++                break;
+++
+++            layer->nb_planes = 2;
+++            layer->planes[1].object_index = 0;
+++            layer->planes[1].offset = height * 128;
+++            layer->planes[0].pitch = fmt_width(fmt);
+++            layer->planes[1].pitch = layer->planes[0].pitch;
+++            break;
+++#endif
+++
+++        case DRM_FORMAT_NV12:
+++            layer->format = V4L2_PIX_FMT_NV12;
+++
+++            if (avbuf->num_planes > 1)
+++                break;
+++
+++            layer->nb_planes = 2;
+++            layer->planes[1].object_index = 0;
+++            layer->planes[1].offset = bpl0 * height;
+++            layer->planes[1].pitch = bpl0;
+++            break;
+++
+++        case V4L2_PIX_FMT_YUV420:
+++            layer->format = DRM_FORMAT_YUV420;
+++
+++            if (avbuf->num_planes > 1)
+++                break;
+++
+++            layer->nb_planes = 3;
+++            layer->planes[1].object_index = 0;
+++            layer->planes[1].offset = bpl0 * height;
+++            layer->planes[1].pitch = bpl0 / 2;
+++            layer->planes[2].object_index = 0;
+++            layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4);
+++            layer->planes[2].pitch = bpl0 / 2;
+++            break;
+++
+++        default:
+++            drm_desc->nb_layers = 0;
+++            return AVERROR(EINVAL);
+++    }
+++
+++    drm_desc->nb_objects = 0;
+++    for (i = 0; i < avbuf->num_planes; i++) {
+++        memset(&expbuf, 0, sizeof(expbuf));
+++
+++        expbuf.index = avbuf->buffer.index;
+++        expbuf.type = avbuf->buffer.type;
+++        expbuf.plane = i;
+++
+++        ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
+++        if (ret < 0)
+++            return AVERROR(errno);
+++
+++        drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
+++            avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
+++        drm_desc->objects[i].fd = expbuf.fd;
+++        drm_desc->objects[i].format_modifier = mod;
+++        drm_desc->nb_objects = i + 1;
+++    }
+++
+++    return 0;
+++}
+++
+++static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
+++{
+++    struct v4l2_format *fmt = &queue->format;
+++    DeintV4L2M2MContextShared *ctx = queue->ctx;
+++    struct v4l2_requestbuffers req;
+++    int ret, i, multiplanar;
+++    uint32_t memory;
+++
+++    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
+++        V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+++
+++    multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
+++
+++    memset(&req, 0, sizeof(req));
+++    req.count = queue->num_buffers;
+++    req.memory = memory;
+++    req.type = fmt->type;
+++
+++    ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
+++    if (ret < 0) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
+++
+++        return AVERROR(errno);
+++    }
+++
+++    queue->num_buffers = req.count;
+++    queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
+++    if (!queue->buffers) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
+++
+++        return AVERROR(ENOMEM);
+++    }
+++
+++    for (i = 0; i < queue->num_buffers; i++) {
+++        V4L2Buffer * const buf = &queue->buffers[i];
+++
+++        buf->enqueued = 0;
+++        buf->q = queue;
+++
+++        buf->buffer.type = fmt->type;
+++        buf->buffer.memory = memory;
+++        buf->buffer.index = i;
+++
+++        if (multiplanar) {
+++            buf->buffer.length = VIDEO_MAX_PLANES;
+++            buf->buffer.m.planes = buf->planes;
+++        }
+++
+++        drm_frame_init(&buf->drm_frame);
+++    }
+++
+++    for (i = 0; i < queue->num_buffers; i++) {
+++        V4L2Buffer * const buf = &queue->buffers[i];
+++
+++        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
+++        if (ret < 0) {
+++            ret = AVERROR(errno);
+++
+++            goto fail;
+++        }
+++
+++        buf->num_planes = multiplanar ? buf->buffer.length : 1;
+++
+++        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
+++            ret = deint_v4l2m2m_enqueue_buffer(buf);
+++            if (ret)
+++                goto fail;
+++
+++            ret = v4l2_buffer_export_drm(queue, buf);
+++            if (ret)
+++                goto fail;
+++        }
+++    }
+++
+++    return 0;
+++
+++fail:
+++    avbufs_delete(&queue->buffers, queue->num_buffers);
+++    queue->num_buffers = 0;
+++    return ret;
+++}
+++
+++static int deint_v4l2m2m_streamon(V4L2Queue *queue)
+++{
+++    DeintV4L2M2MContextShared * const ctx = queue->ctx;
+++    int type = queue->format.type;
+++    int ret;
+++
+++    ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
+++    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
+++    if (ret < 0)
+++        return AVERROR(errno);
+++
+++    return 0;
+++}
+++
+++static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
+++{
+++    DeintV4L2M2MContextShared * const ctx = queue->ctx;
+++    int type = queue->format.type;
+++    int ret;
+++
+++    ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
+++    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
+++    if (ret < 0)
+++        return AVERROR(errno);
+++
+++    return 0;
+++}
+++
+++// timeout in ms
+++static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
+++{
+++    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+++    DeintV4L2M2MContextShared *ctx = queue->ctx;
+++    struct v4l2_buffer buf = { 0 };
+++    V4L2Buffer* avbuf = NULL;
+++    struct pollfd pfd;
+++    short events;
+++    int ret;
+++
+++    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
+++        events =  POLLOUT | POLLWRNORM;
+++    else
+++        events = POLLIN | POLLRDNORM;
+++
+++    pfd.events = events;
+++    pfd.fd = ctx->fd;
+++
+++    for (;;) {
+++        ret = poll(&pfd, 1, timeout);
+++        if (ret > 0)
+++            break;
+++        if (errno == EINTR)
+++            continue;
+++        return NULL;
+++    }
+++
+++    if (pfd.revents & POLLERR)
+++        return NULL;
+++
+++    if (pfd.revents & events) {
+++        memset(&buf, 0, sizeof(buf));
+++        buf.memory = V4L2_MEMORY_MMAP;
+++        buf.type = queue->format.type;
+++        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
+++            memset(planes, 0, sizeof(planes));
+++            buf.length = VIDEO_MAX_PLANES;
+++            buf.m.planes = planes;
+++        }
+++
+++        ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
+++        if (ret) {
+++            if (errno != EAGAIN)
+++                av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
+++                       av_err2str(AVERROR(errno)));
+++            return NULL;
+++        }
+++
+++        avbuf = &queue->buffers[buf.index];
+++        avbuf->enqueued = 0;
+++        avbuf->buffer = buf;
+++        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
+++            memcpy(avbuf->planes, planes, sizeof(planes));
+++            avbuf->buffer.m.planes = avbuf->planes;
+++        }
+++        return avbuf;
+++    }
+++
+++    return NULL;
+++}
+++
+++static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
+++{
+++    int i;
+++    V4L2Buffer *buf = NULL;
+++
+++    for (i = 0; i < queue->num_buffers; i++)
+++        if (!queue->buffers[i].enqueued) {
+++            buf = &queue->buffers[i];
+++            break;
+++        }
+++    return buf;
+++}
+++
+++static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
+++{
+++    int i;
+++    V4L2Buffer *buf = NULL;
+++
+++    if (!queue || !queue->buffers)
+++        return;
+++    for (i = 0; i < queue->num_buffers; i++) {
+++        buf = &queue->buffers[i];
+++        if (queue->buffers[i].enqueued)
+++            av_frame_unref(&buf->frame);
+++    }
+++}
+++
+++static void recycle_q(V4L2Queue * const queue)
+++{
+++    V4L2Buffer* avbuf;
+++    while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
+++        av_frame_unref(&avbuf->frame);
+++    }
+++}
+++
+++static int count_enqueued(V4L2Queue *queue)
+++{
+++    int i;
+++    int n = 0;
+++
+++    if (queue->buffers == NULL)
+++        return 0;
+++
+++    for (i = 0; i < queue->num_buffers; i++)
+++        if (queue->buffers[i].enqueued)
+++            ++n;
+++    return n;
+++}
+++
+++static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
+++{
+++    DeintV4L2M2MContextShared *const ctx = queue->ctx;
+++    AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
+++    V4L2Buffer *buf;
+++    int i;
+++
+++    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
+++        recycle_q(queue);
+++
+++    buf = deint_v4l2m2m_find_free_buf(queue);
+++    if (!buf) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
+++        return AVERROR(EAGAIN);
+++    }
+++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
+++        for (i = 0; i < drm_desc->nb_objects; i++)
+++            buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
+++    else
+++        buf->buffer.m.fd = drm_desc->objects[0].fd;
+++
+++    buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE :
+++        frame->top_field_first ? V4L2_FIELD_INTERLACED_TB :
+++            V4L2_FIELD_INTERLACED_BT;
+++
+++    if (ctx->field_order != buf->buffer.field) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
+++        ctx->field_order = buf->buffer.field;
+++    }
+++
+++    buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
+++
+++    buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
+++
+++    av_frame_move_ref(&buf->frame, frame);
+++
+++    return deint_v4l2m2m_enqueue_buffer(buf);
+++}
+++
+++static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
+++{
+++    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
+++        V4L2Queue *capture = &ctx->capture;
+++        V4L2Queue *output  = &ctx->output;
+++
+++        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
+++
+++        if (ctx->fd >= 0) {
+++            deint_v4l2m2m_streamoff(capture);
+++            deint_v4l2m2m_streamoff(output);
+++        }
+++
+++        avbufs_delete(&capture->buffers, capture->num_buffers);
+++
+++        deint_v4l2m2m_unref_queued(output);
+++
+++        av_buffer_unref(&ctx->hw_frames_ctx);
+++
+++        if (capture->buffers)
+++            av_free(capture->buffers);
+++
+++        if (output->buffers)
+++            av_free(output->buffers);
+++
+++        if (ctx->fd >= 0) {
+++            close(ctx->fd);
+++            ctx->fd = -1;
+++        }
+++
+++        av_free(ctx);
+++    }
+++}
+++
+++static void v4l2_free_buffer(void *opaque, uint8_t *unused)
+++{
+++    V4L2Buffer *buf                = opaque;
+++    DeintV4L2M2MContextShared *ctx = buf->q->ctx;
+++
+++    if (!ctx->done)
+++        deint_v4l2m2m_enqueue_buffer(buf);
+++
+++    deint_v4l2m2m_destroy_context(ctx);
+++}
+++
+++// timeout in ms
+++static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
+++{
+++    DeintV4L2M2MContextShared *ctx = queue->ctx;
+++    V4L2Buffer* avbuf;
+++    enum AVColorPrimaries color_primaries;
+++    enum AVColorSpace colorspace;
+++    enum AVColorTransferCharacteristic color_trc;
+++    enum AVColorRange color_range;
+++
+++    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
+++
+++    if (queue->eos) {
+++        av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
+++        return AVERROR_EOF;
+++    }
+++
+++    avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
+++    if (!avbuf) {
+++        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
+++        return AVERROR(EAGAIN);
+++    }
+++
+++    if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
+++        if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
+++            queue->eos = 1;
+++        if (buf_bytesused0(&avbuf->buffer) == 0)
+++            return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
+++    }
+++
+++    // Fill in PTS and anciliary info from src frame
+++    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
+++
+++    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
+++                            sizeof(avbuf->drm_frame), v4l2_free_buffer,
+++                            avbuf, AV_BUFFER_FLAG_READONLY);
+++    if (!frame->buf[0]) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
+++        return AVERROR(ENOMEM);
+++    }
+++
+++    atomic_fetch_add(&ctx->refcount, 1);
+++
+++    frame->data[0] = (uint8_t *)&avbuf->drm_frame;
+++    frame->format = AV_PIX_FMT_DRM_PRIME;
+++    if (ctx->hw_frames_ctx)
+++        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
+++    frame->height = ctx->output_height;
+++    frame->width = ctx->output_width;
+++
+++    color_primaries = get_color_primaries(&ctx->capture.format);
+++    colorspace      = get_color_space(&ctx->capture.format);
+++    color_trc       = get_color_trc(&ctx->capture.format);
+++    color_range     = get_color_range(&ctx->capture.format);
+++
+++    // If the color parameters are unspecified by V4L2 then leave alone as they
+++    // will have been copied from src
+++    if (color_primaries != AVCOL_PRI_UNSPECIFIED)
+++        frame->color_primaries = color_primaries;
+++    if (colorspace != AVCOL_SPC_UNSPECIFIED)
+++        frame->colorspace = colorspace;
+++    if (color_trc != AVCOL_TRC_UNSPECIFIED)
+++        frame->color_trc = color_trc;
+++    if (color_range != AVCOL_RANGE_UNSPECIFIED)
+++        frame->color_range = color_range;
+++
+++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
+++        // Not interlaced now
+++        frame->interlaced_frame = 0;   // *** Fill in from dst buffer?
+++        frame->top_field_first = 0;
+++        // Pkt duration halved
+++        frame->pkt_duration /= 2;
+++    }
+++
+++    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
+++        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
+++        frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
+++    }
+++
+++    av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
+++    return 0;
+++}
+++
+++static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
+++{
+++    AVFilterLink *inlink           = outlink->src->inputs[0];
+++    AVFilterContext *avctx         = outlink->src;
+++    DeintV4L2M2MContext *priv      = avctx->priv;
+++    DeintV4L2M2MContextShared *ctx = priv->shared;
+++    int ret;
+++
+++    ctx->height = avctx->inputs[0]->h;
+++    ctx->width = avctx->inputs[0]->w;
+++
+++    if (ctx->filter_type == FILTER_V4L2_SCALE) {
+++        if ((ret = ff_scale_eval_dimensions(priv,
+++                                            priv->w_expr, priv->h_expr,
+++                                            inlink, outlink,
+++                                            &ctx->output_width, &ctx->output_height)) < 0)
+++            return ret;
+++
+++        ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height,
+++                                   priv->force_original_aspect_ratio, priv->force_divisible_by);
+++    }
+++    else {
+++        ctx->output_width  = ctx->width;
+++        ctx->output_height = ctx->height;
+++    }
+++
+++    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__,
+++           ctx->width, ctx->height, ctx->output_width, ctx->output_height,
+++           inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den);
+++
+++    outlink->time_base           = inlink->time_base;
+++    outlink->w                   = ctx->output_width;
+++    outlink->h                   = ctx->output_height;
+++    outlink->format              = inlink->format;
+++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0)
+++        outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den};
+++
+++    if (inlink->sample_aspect_ratio.num)
+++        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+++    else
+++        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+++
+++    ret = deint_v4l2m2m_find_device(ctx);
+++    if (ret)
+++        return ret;
+++
+++    if (inlink->hw_frames_ctx) {
+++        ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+++        if (!ctx->hw_frames_ctx)
+++            return AVERROR(ENOMEM);
+++    }
+++    return 0;
+++}
+++
+++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
+++{
+++    const uint64_t mod = drm_desc->objects[0].format_modifier;
+++    const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
+++
+++    // Only currently support single object things
+++    if (drm_desc->nb_objects != 1)
+++        return 0;
+++
+++    switch (drm_desc->layers[0].format) {
+++    case DRM_FORMAT_YUV420:
+++        return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
+++    case DRM_FORMAT_NV12:
+++        return is_linear ? V4L2_PIX_FMT_NV12 :
+++#if CONFIG_SAND
+++            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
+++#endif
+++            0;
+++    default:
+++        break;
+++    }
+++    return 0;
+++}
+++
+++static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
+++{
+++    AVFilterContext *avctx         = link->dst;
+++    DeintV4L2M2MContext *priv      = avctx->priv;
+++    DeintV4L2M2MContextShared *ctx = priv->shared;
+++    V4L2Queue *capture             = &ctx->capture;
+++    V4L2Queue *output              = &ctx->output;
+++    int ret;
+++
+++    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
+++           __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
+++    av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__,
+++           avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
+++
+++    if (ctx->field_order == V4L2_FIELD_ANY) {
+++        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
+++        uint32_t pixelformat = desc_pixelformat(drm_desc);
+++
+++        if (pixelformat == 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
+++                   av_fourcc2str(drm_desc->layers[0].format),
+++                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
+++            return AVERROR(EINVAL);
+++        }
+++
+++        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
+++        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
+++
+++        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
+++           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
+++
+++        if ((ret = set_src_fmt(output, in)) != 0) {
+++            av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
+++                   av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
+++            return ret;
+++        }
+++
+++        ret = do_s_fmt(output);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
+++            return ret;
+++        }
+++
+++        if (ctx->output_format != AV_PIX_FMT_NONE)
+++           pixelformat = fmt_av_to_v4l2(ctx->output_format);
+++        ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
+++            return ret;
+++        }
+++
+++        ret = deint_v4l2m2m_allocate_buffers(capture);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
+++            return ret;
+++        }
+++
+++        ret = deint_v4l2m2m_streamon(capture);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
+++            return ret;
+++        }
+++
+++        ret = deint_v4l2m2m_allocate_buffers(output);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
+++            return ret;
+++        }
+++
+++        ret = deint_v4l2m2m_streamon(output);
+++        if (ret) {
+++            av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
+++            return ret;
+++        }
+++
+++        if (in->top_field_first)
+++            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
+++        else
+++            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
+++
+++        {
+++            struct v4l2_encoder_cmd ecmd = {
+++                .cmd = V4L2_ENC_CMD_STOP
+++            };
+++            ctx->has_enc_stop = 0;
+++            if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
+++                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
+++                ctx->has_enc_stop = 1;
+++            }
+++            else {
+++                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
+++            }
+++
+++        }
+++    }
+++
+++    ret = deint_v4l2m2m_enqueue_frame(output, in);
+++
+++    av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
+++    return ret;
+++}
+++
+++static int
+++ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
+++           AVFilterLink * const inlink)
+++{
+++    int instatus;
+++    int64_t inpts;
+++
+++    if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
+++        return 0;
+++
+++    s->drain      = instatus;
+++    s->drain_pts  = inpts;
+++    s->drain_state = DRAIN_TIMEOUT;
+++
+++    if (s->field_order == V4L2_FIELD_ANY) {  // Not yet started
+++        s->drain_state = DRAIN_DONE;
+++    }
+++    else if (s->one_to_one) {
+++        s->drain_state = DRAIN_LAST;
+++    }
+++    else if (s->has_enc_stop) {
+++        struct v4l2_encoder_cmd ecmd = {
+++            .cmd = V4L2_ENC_CMD_STOP
+++        };
+++        if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
+++            av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
+++            s->drain_state = DRAIN_EOS;
+++        }
+++        else {
+++            av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
+++        }
+++    }
+++    return 1;
+++}
+++
+++static int deint_v4l2m2m_activate(AVFilterContext *avctx)
+++{
+++    DeintV4L2M2MContext * const priv = avctx->priv;
+++    DeintV4L2M2MContextShared *const s = priv->shared;
+++    AVFilterLink * const outlink = avctx->outputs[0];
+++    AVFilterLink * const inlink = avctx->inputs[0];
+++    int n = 0;
+++    int cn = 99;
+++    int did_something = 0;
+++
+++    av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
+++
+++    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
+++
+++    ack_inlink(avctx, s, inlink);
+++
+++    if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
+++    {
+++        AVFrame * frame = av_frame_alloc();
+++        int rv;
+++
+++        recycle_q(&s->output);
+++        n = count_enqueued(&s->output);
+++
+++        if (frame == NULL) {
+++            av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
+++            return AVERROR(ENOMEM);
+++        }
+++
+++        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
+++                                         drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
+++        if (rv != 0) {
+++            av_frame_free(&frame);
+++            if (rv == AVERROR_EOF) {
+++                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
+++                s->drain_state = DRAIN_DONE;
+++            }
+++            else if (rv == AVERROR(EAGAIN)) {
+++                if (s->drain_state != DRAIN_NONE) {
+++                    av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
+++                    s->drain_state = DRAIN_DONE;
+++                }
+++            }
+++            else {
+++                av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
+++                return rv;
+++            }
+++        }
+++        else {
+++            frame->interlaced_frame = 0;
+++            // frame is always consumed by filter_frame - even on error despite
+++            // a somewhat confusing comment in the header
+++            rv = ff_filter_frame(outlink, frame);
+++            ++s->frames_tx;
+++
+++            av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
+++            did_something = 1;
+++
+++            if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
+++                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
+++                s->drain_state = DRAIN_DONE;
+++            }
+++        }
+++
+++        cn = count_enqueued(&s->capture);
+++    }
+++
+++    if (s->drain_state == DRAIN_DONE) {
+++        ff_outlink_set_status(outlink, s->drain, s->drain_pts);
+++        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
+++        return 0;
+++    }
+++
+++    recycle_q(&s->output);
+++    n = count_enqueued(&s->output);
+++
+++    while (n < 6 && !s->drain) {
+++        AVFrame * frame;
+++        int rv;
+++
+++        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
+++            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
+++            return rv;
+++        }
+++
+++        if (frame == NULL) {
+++            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
+++            if (!ack_inlink(avctx, s, inlink)) {
+++                ff_inlink_request_frame(inlink);
+++                av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
+++            }
+++            break;
+++        }
+++        ++s->frames_rx;
+++
+++        rv = deint_v4l2m2m_filter_frame(inlink, frame);
+++        av_frame_free(&frame);
+++
+++        if (rv != 0)
+++            return rv;
+++
+++        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
+++        did_something = 1;
+++        ++n;
+++    }
+++
+++    if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
+++        ff_filter_set_ready(avctx, 1);
+++        did_something = 1;
+++        av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
+++    }
+++
+++    av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
+++    return did_something ? 0 : FFERROR_NOT_READY;
+++}
+++
+++static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
+++{
+++    DeintV4L2M2MContext * const priv = avctx->priv;
+++    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
+++
+++    if (!ctx) {
+++        av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
+++        return AVERROR(ENOMEM);
+++    }
+++    priv->shared = ctx;
+++    ctx->logctx = priv;
+++    ctx->filter_type = filter_type;
+++    ctx->fd = -1;
+++    ctx->output.ctx = ctx;
+++    ctx->output.num_buffers = 8;
+++    ctx->output.name = "OUTPUT";
+++    ctx->capture.ctx = ctx;
+++    ctx->capture.num_buffers = 12;
+++    ctx->capture.name = "CAPTURE";
+++    ctx->done = 0;
+++    ctx->field_order = V4L2_FIELD_ANY;
+++
+++    pts_track_init(&ctx->track, priv);
+++
+++    atomic_init(&ctx->refcount, 1);
+++
+++    if (priv->output_format_string) {
+++        ctx->output_format = av_get_pix_fmt(priv->output_format_string);
+++        if (ctx->output_format == AV_PIX_FMT_NONE) {
+++            av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string);
+++            return AVERROR(EINVAL);
+++        }
+++        if (fmt_av_to_v4l2(ctx->output_format) == 0) {
+++            av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format));
+++            return AVERROR(EINVAL);
+++        }
+++    } else {
+++        // Use the input format once that is configured.
+++        ctx->output_format = AV_PIX_FMT_NONE;
+++    }
+++
+++#define STRING_OPTION(var_name, func_name, default_value) do { \
+++        if (priv->var_name ## _string) { \
+++            int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \
+++            if (var < 0) { \
+++                av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
+++                return AVERROR(EINVAL); \
+++            } \
+++            priv->var_name = var; \
+++        } else { \
+++            priv->var_name = default_value; \
+++        } \
+++    } while (0)
+++
+++    STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED);
+++    STRING_OPTION(colour_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
+++    STRING_OPTION(colour_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
+++    STRING_OPTION(chroma_location,  chroma_location, AVCHROMA_LOC_UNSPECIFIED);
+++
+++    return 0;
+++}
+++
+++static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
+++{
+++    return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
+++}
+++
+++static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
+++{
+++    int rv;
+++    DeintV4L2M2MContext * priv;
+++    DeintV4L2M2MContextShared * ctx;
+++
+++    if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
+++        return rv;
+++
+++    priv = avctx->priv;
+++    ctx = priv->shared;
+++
+++    ctx->one_to_one = 1;
+++    return 0;
+++}
+++
+++static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
+++{
+++    DeintV4L2M2MContext *priv = avctx->priv;
+++    DeintV4L2M2MContextShared *ctx = priv->shared;
+++
+++    av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
+++           ctx->frames_rx, ctx->frames_tx);
+++    ctx->done = 1;
+++    ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
+++    pts_track_uninit(&ctx->track);
+++    deint_v4l2m2m_destroy_context(ctx);
+++}
+++
+++static const AVOption deinterlace_v4l2m2m_options[] = {
+++    { NULL },
+++};
+++
+++AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
+++
+++#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
+++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+++
+++static const AVOption scale_v4l2m2m_options[] = {
+++    { "w", "Output video width",
+++      OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+++    { "h", "Output video height",
+++      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+++    { "format", "Output video format (software format of hardware frames)",
+++      OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+++      // These colour properties match the ones of the same name in vf_scale.
+++      { "out_color_matrix", "Output colour matrix coefficient set",
+++      OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
+++    { "out_range", "Output colour range",
+++      OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED },
+++      AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" },
+++        { "full",    "Full range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+++        { "limited", "Limited range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+++        { "jpeg",    "Full range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+++        { "mpeg",    "Limited range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+++        { "tv",      "Limited range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
+++        { "pc",      "Full range",
+++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
+++    // These colour properties match the ones in the VAAPI scaler
+++    { "out_color_primaries", "Output colour primaries",
+++      OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING,
+++      { .str = NULL }, .flags = FLAGS },
+++    { "out_color_transfer", "Output colour transfer characteristics",
+++      OFFSET(colour_transfer_string),  AV_OPT_TYPE_STRING,
+++      { .str = NULL }, .flags = FLAGS },
+++    { "out_chroma_location", "Output chroma sample location",
+++      OFFSET(chroma_location_string),  AV_OPT_TYPE_STRING,
+++      { .str = NULL }, .flags = FLAGS },
+++    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
+++    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+++    { NULL },
+++};
+++
+++AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
+++
+++static const AVFilterPad deint_v4l2m2m_inputs[] = {
+++    {
+++        .name         = "default",
+++        .type         = AVMEDIA_TYPE_VIDEO,
+++    },
+++};
+++
+++static const AVFilterPad deint_v4l2m2m_outputs[] = {
+++    {
+++        .name          = "default",
+++        .type          = AVMEDIA_TYPE_VIDEO,
+++        .config_props  = deint_v4l2m2m_config_props,
+++    },
+++};
+++
+++AVFilter ff_vf_deinterlace_v4l2m2m = {
+++    .name           = "deinterlace_v4l2m2m",
+++    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
+++    .priv_size      = sizeof(DeintV4L2M2MContext),
+++    .init           = &deint_v4l2m2m_init,
+++    .uninit         = &deint_v4l2m2m_uninit,
+++    FILTER_INPUTS(deint_v4l2m2m_inputs),
+++    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
+++    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
+++    .priv_class     = &deinterlace_v4l2m2m_class,
+++    .activate       = deint_v4l2m2m_activate,
+++};
+++
+++AVFilter ff_vf_scale_v4l2m2m = {
+++    .name           = "scale_v4l2m2m",
+++    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"),
+++    .priv_size      = sizeof(DeintV4L2M2MContext),
+++    .init           = &scale_v4l2m2m_init,
+++    .uninit         = &deint_v4l2m2m_uninit,
+++    FILTER_INPUTS(deint_v4l2m2m_inputs),
+++    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
+++    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
+++    .priv_class     = &scale_v4l2m2m_class,
+++    .activate       = deint_v4l2m2m_activate,
+++};
+++
++diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c
++new file mode 100644
++index 0000000000..7100f2fc9b
++--- /dev/null
+++++ b/libavfilter/vf_unsand.c
++@@ -0,0 +1,228 @@
+++/*
+++ * Copyright (c) 2007 Bobby Bingham
+++ *
+++ * This file is part of FFmpeg.
+++ *
+++ * FFmpeg is free software; you can redistribute it and/or
+++ * modify it under the terms of the GNU Lesser General Public
+++ * License as published by the Free Software Foundation; either
+++ * version 2.1 of the License, or (at your option) any later version.
+++ *
+++ * FFmpeg is distributed in the hope that it will be useful,
+++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+++ * Lesser General Public License for more details.
+++ *
+++ * You should have received a copy of the GNU Lesser General Public
+++ * License along with FFmpeg; if not, write to the Free Software
+++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+++ */
+++
+++/**
+++ * @file
+++ * format and noformat video filters
+++ */
+++
+++#include <string.h>
+++
+++#include "libavutil/internal.h"
+++#include "libavutil/mem.h"
+++#include "libavutil/pixdesc.h"
+++#include "libavutil/opt.h"
+++#include "libavutil/rpi_sand_fns.h"
+++
+++#include "avfilter.h"
+++#include "formats.h"
+++#include "internal.h"
+++#include "video.h"
+++
+++typedef struct UnsandContext {
+++    const AVClass *class;
+++} UnsandContext;
+++
+++static av_cold void uninit(AVFilterContext *ctx)
+++{
+++//    UnsandContext *s = ctx->priv;
+++}
+++
+++static av_cold int init(AVFilterContext *ctx)
+++{
+++//    UnsandContext *s = ctx->priv;
+++
+++    return 0;
+++}
+++
+++
+++static int filter_frame(AVFilterLink *link, AVFrame *in)
+++{
+++    AVFilterLink * const outlink = link->dst->outputs[0];
+++    AVFrame *out = NULL;
+++    int rv = 0;
+++
+++    if (outlink->format == in->format) {
+++        // If nothing to do then do nothing
+++        out = in;
+++    }
+++    else
+++    {
+++        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
+++        {
+++            rv = AVERROR(ENOMEM);
+++            goto fail;
+++        }
+++        if (av_rpi_sand_to_planar_frame(out, in) != 0)
+++        {
+++            rv = -1;
+++            goto fail;
+++        }
+++
+++        av_frame_free(&in);
+++    }
+++
+++    return ff_filter_frame(outlink, out);
+++
+++fail:
+++    av_frame_free(&out);
+++    av_frame_free(&in);
+++    return rv;
+++}
+++
+++#if 0
+++static void dump_fmts(const AVFilterFormats * fmts)
+++{
+++    int i;
+++    if (fmts== NULL) {
+++        printf("NULL\n");
+++        return;
+++    }
+++    for (i = 0; i < fmts->nb_formats; ++i) {
+++        printf(" %d", fmts->formats[i]);
+++    }
+++    printf("\n");
+++}
+++#endif
+++
+++static int query_formats(AVFilterContext *ctx)
+++{
+++//    UnsandContext *s = ctx->priv;
+++    int ret;
+++
+++    // If we aren't connected at both ends then just do nothing
+++    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
+++        return 0;
+++
+++    // Our output formats depend on our input formats and we can't/don't
+++    // want to convert between bit depths so we need to wait for the source
+++    // to have an opinion before we do
+++    if (ctx->inputs[0]->incfg.formats == NULL)
+++        return AVERROR(EAGAIN);
+++
+++    // Accept anything
+++    if (ctx->inputs[0]->outcfg.formats == NULL &&
+++        (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
+++        return ret;
+++
+++    // Filter out sand formats
+++
+++    // Generate a container if we don't already have one
+++    if (ctx->outputs[0]->incfg.formats == NULL)
+++    {
+++        // Somewhat rubbish way of ensuring we have a good structure
+++        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
+++        AVFilterFormats *formats = ff_make_format_list(out_fmts);
+++
+++        if (formats == NULL)
+++            return AVERROR(ENOMEM);
+++        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
+++            return ret;
+++    }
+++
+++    // Replace old format list with new filtered list derived from what our
+++    // input says it can do
+++    {
+++        const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
+++        AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
+++        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
+++        int i;
+++        int n = 0;
+++        int seen_420p = 0;
+++        int seen_420p10 = 0;
+++
+++        for (i = 0; i < src_ff->nb_formats; ++i) {
+++            const enum AVPixelFormat f = src_ff->formats[i];
+++
+++            switch (f){
+++                case AV_PIX_FMT_YUV420P:
+++                case AV_PIX_FMT_SAND128:
+++                case AV_PIX_FMT_RPI4_8:
+++                    if (!seen_420p) {
+++                        seen_420p = 1;
+++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
+++                    }
+++                    break;
+++                case AV_PIX_FMT_SAND64_10:
+++                case AV_PIX_FMT_YUV420P10:
+++                case AV_PIX_FMT_RPI4_10:
+++                    if (!seen_420p10) {
+++                        seen_420p10 = 1;
+++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
+++                    }
+++                    break;
+++                default:
+++                    dst_fmts[n++] = f;
+++                    break;
+++            }
+++        }
+++
+++        av_freep(&dst_ff->formats);
+++        dst_ff->formats = dst_fmts;
+++        dst_ff->nb_formats = n;
+++    }
+++
+++//    printf("Unsand: %s calc: ", __func__);
+++//    dump_fmts(ctx->outputs[0]->incfg.formats);
+++
+++    return 0;
+++}
+++
+++
+++#define OFFSET(x) offsetof(UnsandContext, x)
+++static const AVOption unsand_options[] = {
+++    { NULL }
+++};
+++
+++
+++AVFILTER_DEFINE_CLASS(unsand);
+++
+++static const AVFilterPad avfilter_vf_unsand_inputs[] = {
+++    {
+++        .name             = "default",
+++        .type             = AVMEDIA_TYPE_VIDEO,
+++        .filter_frame = filter_frame,
+++    },
+++    { NULL }
+++};
+++
+++static const AVFilterPad avfilter_vf_unsand_outputs[] = {
+++    {
+++        .name = "default",
+++        .type = AVMEDIA_TYPE_VIDEO
+++    },
+++};
+++
+++AVFilter ff_vf_unsand = {
+++    .name          = "unsand",
+++    .description   = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
+++
+++    .init          = init,
+++    .uninit        = uninit,
+++
+++    FILTER_QUERY_FUNC(query_formats),
+++
+++    .priv_size     = sizeof(UnsandContext),
+++    .priv_class    = &unsand_class,
+++
+++    FILTER_INPUTS(avfilter_vf_unsand_inputs),
+++    FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
+++};
+++
++diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h
++index c928911b35..e1a6037f62 100644
++--- a/libavfilter/yadif.h
+++++ b/libavfilter/yadif.h
++@@ -53,6 +53,7 @@ typedef struct YADIFContext {
++     int mode;           ///< YADIFMode
++     int parity;         ///< YADIFParity
++     int deint;          ///< YADIFDeint
+++    int useasm;         ///< Use any asm code
++ 
++     int frame_pending;
++ 
++diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
++index 113541bd9a..61e4c976ef 100644
++--- a/libavformat/matroskaenc.c
+++++ b/libavformat/matroskaenc.c
++@@ -77,6 +77,10 @@
++ 
++ #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
++                       ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
+++
+++/* Reserved size for H264 headers if not extant at init time */
+++#define MAX_H264_HEADER_SIZE 1024
+++
++ #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
++                               !(mkv)->is_live)
++ 
++@@ -1121,8 +1125,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn
++     case AV_CODEC_ID_WAVPACK:
++         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
++     case AV_CODEC_ID_H264:
++-        return ff_isom_write_avcc(dyn_cp, extradata,
++-                                  extradata_size);
+++        if (par->extradata_size)
+++            return ff_isom_write_avcc(dyn_cp, extradata,
+++                                      extradata_size);
+++        else
+++            *size_to_reserve = MAX_H264_HEADER_SIZE;
+++        break;
++     case AV_CODEC_ID_HEVC:
++         return ff_isom_write_hvcc(dyn_cp, extradata,
++                                   extradata_size, 0);
++@@ -2731,8 +2739,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
++         }
++         break;
++ #endif
++-    // FIXME: Remove the following once libaom starts propagating proper extradata during init()
++-    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
+++    // FIXME: Remove the following once libaom starts propagating extradata during init()
+++    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
++     case AV_CODEC_ID_AV1:
++         if (side_data_size && mkv->track.bc && !par->extradata_size) {
++             // If the reserved space doesn't suffice, only write
++@@ -2744,6 +2752,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
++         } else if (!par->extradata_size)
++             return AVERROR_INVALIDDATA;
++         break;
+++    // H264 V4L2 has a similar issue
+++    case AV_CODEC_ID_H264:
+++        if (side_data_size && mkv->track.bc && !par->extradata_size) {
+++            ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
+++                                          par, mkv->track.bc, track, 0);
+++            if (ret < 0)
+++                return ret;
+++        } else if (!par->extradata_size)
+++            return AVERROR_INVALIDDATA;
+++        break;
++     default:
++         if (side_data_size)
++             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
++diff --git a/libavformat/movenc.c b/libavformat/movenc.c
++index c4fcb5f8b1..891adbf7b2 100644
++--- a/libavformat/movenc.c
+++++ b/libavformat/movenc.c
++@@ -6343,6 +6343,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
++     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
++             trk->par->codec_id == AV_CODEC_ID_AAC ||
++             trk->par->codec_id == AV_CODEC_ID_AV1 ||
+++            trk->par->codec_id == AV_CODEC_ID_H264 ||
++             trk->par->codec_id == AV_CODEC_ID_FLAC) {
++         size_t side_size;
++         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
++diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
++index a8d296a154..f67dc2a15a 100644
++--- a/libavformat/rtpenc.c
+++++ b/libavformat/rtpenc.c
++@@ -19,6 +19,7 @@
++  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++  */
++ 
+++#include "avc.h"
++ #include "avformat.h"
++ #include "mpegts.h"
++ #include "internal.h"
++@@ -585,8 +586,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
++         ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
++         break;
++     case AV_CODEC_ID_H264:
+++    {
+++        uint8_t *side_data;
+++        int side_data_size = 0;
+++
+++        side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
+++                                            &side_data_size);
+++
+++        if (side_data_size != 0) {
+++            int ps_size = side_data_size;
+++            uint8_t * ps_buf = NULL;
+++
+++            ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
+++            av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
+++            ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
+++            av_free(ps_buf);
+++        }
++         ff_rtp_send_h264_hevc(s1, pkt->data, size);
++         break;
+++    }
++     case AV_CODEC_ID_H261:
++         ff_rtp_send_h261(s1, pkt->data, size);
++         break;
++diff --git a/libavutil/Makefile b/libavutil/Makefile
++index dc9012f9a8..e33f5db099 100644
++--- a/libavutil/Makefile
+++++ b/libavutil/Makefile
++@@ -73,6 +73,7 @@ HEADERS = adler32.h                                                     \
++           rational.h                                                    \
++           replaygain.h                                                  \
++           ripemd.h                                                      \
+++	  rpi_sand_fns.h                                                \
++           samplefmt.h                                                   \
++           sha.h                                                         \
++           sha512.h                                                      \
++@@ -192,6 +193,7 @@ OBJS-$(CONFIG_MACOS_KPERF)              += macos_kperf.o
++ OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
++ OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
++ OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
+++OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
++ OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
++ OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
++ OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
++@@ -212,6 +214,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA)          += hwcontext_d3d11va.h
++ SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
++ SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
++ SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
+++SKIPHEADERS-$(CONFIG-RPI)              += rpi_sand_fn_pw.h
++ SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
++ SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
++ SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
++diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
++index eba0151337..1b44beab39 100644
++--- a/libavutil/aarch64/Makefile
+++++ b/libavutil/aarch64/Makefile
++@@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o                                                 \
++ 
++ NEON-OBJS += aarch64/float_dsp_neon.o                                 \
++              aarch64/tx_float_neon.o                                  \
+++             aarch64/rpi_sand_neon.o                                  \
+++
++diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
++new file mode 100644
++index 0000000000..2f07d9674c
++--- /dev/null
+++++ b/libavutil/aarch64/rpi_sand_neon.S
++@@ -0,0 +1,781 @@
+++/*
+++Copyright (c) 2021 Michael Eiler
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: Michael Eiler <eiler.mike@gmail.com>
+++*/
+++
+++#include "asm.S"
+++
+++// void ff_rpi_sand8_lines_to_planar_y8(
+++//   uint8_t * dest,            : x0
+++//   unsigned int dst_stride,   : w1
+++//   const uint8_t * src,       : x2
+++//   unsigned int src_stride1,  : w3, always 128
+++//   unsigned int src_stride2,  : w4
+++//   unsigned int _x,           : w5
+++//   unsigned int y,            : w6
+++//   unsigned int _w,           : w7
+++//   unsigned int h);           : [sp, #0]
+++
+++function ff_rpi_sand8_lines_to_planar_y8, export=1
+++    // w15 contains the number of rows we need to process
+++    ldr w15, [sp, #0]
+++
+++    // w8 will contain the number of blocks per row
+++    // w8 = floor(_w/stride1)
+++    // stride1 is assumed to always be 128
+++    mov w8, w1
+++    lsr w8, w8, #7
+++
+++    // in case the width of the image is not a multiple of 128, there will
+++    // be an incomplete block at the end of every row
+++    // w9 contains the number of pixels stored within this block
+++    // w9 = _w - w8 * 128
+++    lsl w9, w8, #7
+++    sub w9, w7, w9
+++
+++    // this is the value we have to add to the src pointer after reading a complete block
+++    // it will move the address to the start of the next block
+++    // w10 = stride2 * stride1 - stride1 
+++    mov w10, w4
+++    lsl w10, w10, #7
+++    sub w10, w10, #128
+++
+++    // w11 is the row offset, meaning the start offset of the first block of every collumn
+++    // this will be increased with stride1 within every iteration of the row_loop
+++    eor w11, w11, w11
+++
+++    // w12 = 0, processed row count
+++    eor w12, w12, w12
+++row_loop:
+++    // start of the first block within the current row
+++    // x13 = row offset + src
+++    mov x13, x2
+++    add x13, x13, x11
+++
+++    // w14 = 0, processed block count
+++    eor w14, w14, w14
+++
+++    cmp w8, #0
+++    beq no_main_y8
+++
+++block_loop:
+++    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
+++    // fortunately these aren't callee saved ones, meaning we don't need to backup them
+++    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
+++    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64 
+++
+++    // write these registers back to the destination vector and increase the dst address by 128
+++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
+++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
+++
+++    // move the source register to the beginning of the next block (x13 = src + block offset)
+++    add x13, x13, x10
+++    // increase the block counter
+++    add w14, w14, #1
+++
+++    // continue with the block_loop if we haven't copied all full blocks yet
+++    cmp w8, w14
+++    bgt block_loop
+++
+++    // handle the last block at the end of each row
+++    // at most 127 byte values copied from src to dst
+++no_main_y8:
+++    eor w5, w5, w5 // i = 0
+++incomplete_block_loop_y8:
+++    cmp w5, w9
+++    bge incomplete_block_loop_end_y8
+++
+++    ldrb w6, [x13]
+++    strb w6, [x0]
+++    add x13, x13, #1
+++    add x0, x0, #1
+++
+++    add w5, w5, #1
+++    b incomplete_block_loop_y8
+++incomplete_block_loop_end_y8:
+++    
+++   
+++    // increase the row offset by 128 (stride1) 
+++    add w11, w11, #128
+++    // increment the row counter
+++    add w12, w12, #1
+++    
+++    // process the next row if we haven't finished yet
+++    cmp w15, w12
+++    bgt row_loop
+++
+++    ret
+++endfunc
+++
+++
+++
+++// void ff_rpi_sand8_lines_to_planar_c8(
+++//   uint8_t * dst_u,           : x0
+++//   unsigned int dst_stride_u, : w1 == width
+++//   uint8_t * dst_v,           : x2
+++//   unsigned int dst_stride_v, : w3 == width
+++//   const uint8_t * src,       : x4
+++//   unsigned int stride1,      : w5 == 128
+++//   unsigned int stride2,      : w6
+++//   unsigned int _x,           : w7
+++//   unsigned int y,            : [sp, #0]
+++//   unsigned int _w,           : [sp, #8]
+++//   unsigned int h);           : [sp, #16]
+++
+++function ff_rpi_sand8_lines_to_planar_c8, export=1
+++    // w7 = width
+++    ldr w7, [sp, #8]
+++
+++    // w15 contains the number of rows we need to process
+++    // counts down
+++    ldr w15, [sp, #16]
+++
+++    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
+++    mov w8, w7
+++    lsr w8, w8, #6
+++
+++    // number of pixels in block at the end of every row
+++    // w9 = _w - (w8 * 64)
+++    lsl w9, w8, #6
+++    sub w9, w7, w9
+++
+++    // Skip at the end of the line to account for stride
+++    sub w12, w1, w7
+++
+++    // address delta to the beginning of the next block
+++    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
+++    lsl w10, w6, #7
+++    sub w10, w10, #128
+++
+++    // w11 = row address start offset = 0
+++    eor w11, w11, w11
+++
+++row_loop_c8:
+++    // start of the first block within the current row
+++    // x13 = row offset + src
+++    mov x13, x4
+++    add x13, x13, x11
+++
+++    // w14 = 0, processed block count
+++    eor w14, w14, w14
+++
+++    cmp w8, #0
+++    beq no_main_c8
+++
+++block_loop_c8:
+++    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values 
+++    ld2 { v0.16b,  v1.16b }, [x13], #32
+++    ld2 { v2.16b,  v3.16b }, [x13], #32
+++    ld2 { v4.16b,  v5.16b }, [x13], #32
+++    ld2 { v6.16b,  v7.16b }, [x13], #32
+++
+++    // swap register so that we can write them out with a single instruction
+++    mov v16.16b, v1.16b
+++    mov v17.16b, v3.16b
+++    mov v18.16b, v5.16b
+++    mov v1.16b, v2.16b
+++    mov v2.16b, v4.16b
+++    mov v3.16b, v6.16b
+++    mov v4.16b, v16.16b
+++    mov v5.16b, v17.16b
+++    mov v6.16b, v18.16b
+++
+++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
+++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
+++
+++    // increment row counter and move src to the beginning of the next block
+++    add w14, w14, #1
+++    add x13, x13, x10
+++    
+++    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
+++    cmp w8, w14
+++    bgt block_loop_c8
+++
+++no_main_c8:
+++    // handle incomplete block at the end of every row
+++    eor w5, w5, w5 // point counter, this might be 
+++incomplete_block_loop_c8:
+++    cmp w5, w9
+++    bge incomplete_block_loop_end_c8
+++
+++    ldrb w1, [x13]
+++    strb w1, [x0]
+++    add x13, x13, #1
+++
+++    ldrb w1, [x13]
+++    strb w1, [x2]
+++    add x13, x13, #1
+++
+++    add x0, x0, #1
+++    add x2, x2, #1
+++
+++    add w5, w5, #1
+++    b incomplete_block_loop_c8
+++incomplete_block_loop_end_c8:
+++
+++    // increase row_offset by stride1
+++    add w11, w11, #128
+++    add x0, x0, w12, sxtw
+++    add x2, x2, w12, sxtw
+++
+++    // jump to row_Loop_c8 iff the row count is small than the height
+++    subs w15, w15, #1
+++    bgt row_loop_c8
+++
+++    ret
+++endfunc
+++
+++//void ff_rpi_sand30_lines_to_planar_c16(
+++//  uint8_t * dst_u,            // [x0]
+++//  unsigned int dst_stride_u,  // [w1] == _w*2
+++//  uint8_t * dst_v,            // [x2]
+++//  unsigned int dst_stride_v,  // [w3] == _w*2
+++//  const uint8_t * src,        // [x4]
+++//  unsigned int stride1,       // [w5] == 128
+++//  unsigned int stride2,       // [w6] 
+++//  unsigned int _x,            // [w7] == 0
+++//  unsigned int y,             // [sp, #0] == 0
+++//  unsigned int _w,            // [sp, #8] -> w3
+++//  unsigned int h);            // [sp, #16] -> w7
+++
+++.macro rpi_sand30_lines_to_planar_c16_block_half
+++    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
+++
+++    xtn v4.4h, v0.4s
+++    ushr v0.4s, v0.4s, #10
+++    xtn v5.4h, v0.4s
+++    ushr v0.4s, v0.4s, #10
+++    xtn v6.4h, v0.4s
+++    xtn2 v4.8h, v1.4s
+++    ushr v1.4s, v1.4s, #10
+++    xtn2 v5.8h, v1.4s
+++    ushr v1.4s, v1.4s, #10
+++    xtn2 v6.8h, v1.4s
+++    and v4.16b, v4.16b, v16.16b
+++    and v5.16b, v5.16b, v16.16b
+++    and v6.16b, v6.16b, v16.16b
+++    st3 { v4.8h, v5.8h, v6.8h }, [sp], #48
+++    
+++    xtn v4.4h, v2.4s
+++    ushr v2.4s, v2.4s, #10
+++    xtn v5.4h, v2.4s
+++    ushr v2.4s, v2.4s, #10
+++    xtn v6.4h, v2.4s
+++    xtn2 v4.8h, v3.4s
+++    ushr v3.4s, v3.4s, #10
+++    xtn2 v5.8h, v3.4s
+++    ushr v3.4s, v3.4s, #10
+++    xtn2 v6.8h, v3.4s
+++    and v4.16b, v4.16b, v16.16b
+++    and v5.16b, v5.16b, v16.16b
+++    and v6.16b, v6.16b, v16.16b
+++    st3 { v4.8h, v5.8h, v6.8h }, [sp]
+++    sub sp, sp, #48
+++.endm
+++
+++function ff_rpi_sand30_lines_to_planar_c16, export=1
+++    stp x19, x20, [sp, #-48]!
+++    stp x21, x22, [sp, #16]
+++    stp x23, x24, [sp, #32]
+++
+++    ldr w3, [sp, #48+8]    // w3 = width
+++    ldr w7, [sp, #48+16]   // w7 = height
+++
+++    // reserve space on the stack for intermediate results
+++    sub sp, sp, #256
+++
+++    // number of 128byte blocks per row, w8 = width / 48
+++    mov w9, #48
+++    udiv w8, w3, w9
+++
+++    // remaining pixels (rem_pix) per row, w9 = width - w8 * 48
+++    mul w9, w8, w9
+++    sub w9, w3, w9
+++
+++    // row offset, the beginning of the next row to process
+++    eor w10, w10, w10
+++
+++    // offset to the beginning of the next block, w11 = stride2 * 128 - 128
+++    lsl w11, w6, #7
+++    sub w11, w11, #128
+++
+++    // decrease the height by one and in case of remaining pixels increase the block count by one
+++    sub w7, w7, #1
+++    cmp w9, #0
+++    cset w19, ne    // w19 == 1 iff reamining pixels != 0
+++    add w8, w8, w19
+++
+++    // bytes we have to move dst back by at the end of every row
+++    mov w21, #48*2
+++    mul w21, w21, w8
+++    sub w21, w1, w21
+++
+++    mov w20, #0     // w20 = flag, last row processed
+++
+++    mov x12, #0x03ff03ff03ff03ff
+++    dup v16.2d, x12
+++
+++    // iterate through rows, row counter = w12 = 0
+++    eor w12, w12, w12
+++row_loop_c16:
+++    cmp w12, w7
+++    bge row_loop_c16_fin
+++
+++    // address of row data = src + row_offset
+++    mov x13, x4
+++    add x13, x13, x10
+++
+++    eor w14, w14, w14
+++block_loop_c16:
+++    cmp w14, w8
+++    bge block_loop_c16_fin
+++
+++    rpi_sand30_lines_to_planar_c16_block_half
+++
+++    ld2 { v0.8h, v1.8h }, [sp], #32
+++    ld2 { v2.8h, v3.8h }, [sp], #32
+++    ld2 { v4.8h, v5.8h }, [sp]
+++    sub sp, sp, #64
+++
+++    st1 { v0.8h }, [x0], #16
+++    st1 { v2.8h }, [x0], #16
+++    st1 { v4.8h }, [x0], #16
+++    st1 { v1.8h }, [x2], #16
+++    st1 { v3.8h }, [x2], #16
+++    st1 { v5.8h }, [x2], #16
+++
+++    rpi_sand30_lines_to_planar_c16_block_half
+++
+++    ld2 { v0.8h, v1.8h }, [sp], #32
+++    ld2 { v2.8h, v3.8h }, [sp], #32
+++    ld2 { v4.8h, v5.8h }, [sp]
+++    sub sp, sp, #64
+++
+++    st1 { v0.8h }, [x0], #16
+++    st1 { v2.8h }, [x0], #16
+++    st1 { v4.8h }, [x0], #16
+++    st1 { v1.8h }, [x2], #16
+++    st1 { v3.8h }, [x2], #16
+++    st1 { v5.8h }, [x2], #16
+++
+++    add x13, x13, x11 // offset to next block
+++    add w14, w14, #1
+++    b block_loop_c16
+++block_loop_c16_fin:
+++
+++    add w10, w10, #128
+++    add w12, w12, #1
+++    add x0, x0, w21, sxtw  // move dst pointers back by x21
+++    add x2, x2, w21, sxtw
+++    b row_loop_c16
+++row_loop_c16_fin:
+++
+++    cmp w20, #1
+++    beq row_loop_c16_fin2
+++    mov w20, #1
+++    sub w8, w8, w19 // decrease block count by w19
+++    add w7, w7, #1 // increase height
+++    b row_loop_c16
+++
+++row_loop_c16_fin2:
+++    sub x0, x0, w21, sxtw // readd x21 in case of the last row
+++    sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels
+++
+++    // last incomplete block to be finished
+++    // read operations are fine, stride2 is more than large enough even if rem_pix is 0
+++    rpi_sand30_lines_to_planar_c16_block_half
+++    ld2 { v0.8h, v1.8h }, [sp], #32
+++    ld2 { v2.8h, v3.8h }, [sp], #32
+++    ld2 { v4.8h, v5.8h }, [sp], #32
+++    rpi_sand30_lines_to_planar_c16_block_half
+++    ld2 { v0.8h, v1.8h }, [sp], #32
+++    ld2 { v2.8h, v3.8h }, [sp], #32
+++    ld2 { v4.8h, v5.8h }, [sp]
+++    sub sp, sp, #160
+++
+++    mov x4, sp
+++    eor w20, w20, w20
+++rem_pix_c16_loop:
+++    cmp w20, w9
+++    bge rem_pix_c16_fin
+++
+++    ldr w22, [x4], #4
+++    str w22, [x0], #2
+++    lsr w22, w22, #16
+++    str w22, [x2], #2 
+++
+++    add w20, w20, #1
+++    b rem_pix_c16_loop
+++rem_pix_c16_fin:
+++
+++    add sp, sp, #256
+++
+++    ldp x23, x24, [sp, #32]
+++    ldp x21, x22, [sp, #16]
+++    ldp x19, x20, [sp], #48
+++    ret
+++endfunc
+++
+++
+++
+++//void ff_rpi_sand30_lines_to_planar_p010(
+++//  uint8_t * dest,
+++//  unsigned int dst_stride,
+++//  const uint8_t * src,
+++//  unsigned int src_stride1,
+++//  unsigned int src_stride2,
+++//  unsigned int _x,
+++//  unsigned int y,
+++//  unsigned int _w,
+++//  unsigned int h);
+++
+++// void ff_rpi_sand30_lines_to_planar_y8(
+++//   uint8_t * dest,            : x0
+++//   unsigned int dst_stride,   : w1
+++//   const uint8_t * src,       : x2
+++//   unsigned int src_stride1,  : w3, always 128
+++//   unsigned int src_stride2,  : w4
+++//   unsigned int _x,           : w5
+++//   unsigned int y,            : w6
+++//   unsigned int _w,           : w7
+++//   unsigned int h);           : [sp, #0]
+++//
+++// Assumes that we are starting on a stripe boundary and that overreading
+++// within the stripe is OK. However it does respect the dest size for wri
+++
+++function ff_rpi_sand30_lines_to_planar_y16, export=1
+++                lsl             w4,  w4,  #7
+++                sub             w4,  w4,  #64
+++                sub             w1,  w1,  w7, lsl #1
+++                uxtw            x6,  w6
+++                add             x8,  x2,  x6, lsl #7
+++                ldr             w6,  [sp, #0]
+++
+++10:
+++                mov             x2,  x8
+++                mov             w5,  w7
+++1:
+++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
+++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
+++
+++                subs            w5,  w5,  #96
+++
+++                // v0, v1
+++
+++                shrn            v18.4h,  v0.4s,   #14
+++                xtn             v16.4h,  v0.4s
+++                shrn            v17.4h,  v0.4s,   #10
+++
+++                shrn2           v18.8h,  v1.4s,   #14
+++                xtn2            v16.8h,  v1.4s
+++                shrn2           v17.8h,  v1.4s,   #10
+++
+++                ushr            v18.8h,  v18.8h,  #6
+++                bic             v16.8h,  #0xfc,   lsl #8
+++                bic             v17.8h,  #0xfc,   lsl #8
+++
+++                // v2, v3
+++
+++                shrn            v21.4h,  v2.4s,   #14
+++                xtn             v19.4h,  v2.4s
+++                shrn            v20.4h,  v2.4s,   #10
+++
+++                shrn2           v21.8h,  v3.4s,   #14
+++                xtn2            v19.8h,  v3.4s
+++                shrn2           v20.8h,  v3.4s,   #10
+++
+++                ushr            v21.8h,  v21.8h,  #6
+++                bic             v19.8h,  #0xfc,   lsl #8
+++                bic             v20.8h,  #0xfc,   lsl #8
+++
+++                // v4, v5
+++
+++                shrn            v24.4h,  v4.4s,   #14
+++                xtn             v22.4h,  v4.4s
+++                shrn            v23.4h,  v4.4s,   #10
+++
+++                shrn2           v24.8h,  v5.4s,   #14
+++                xtn2            v22.8h,  v5.4s
+++                shrn2           v23.8h,  v5.4s,   #10
+++
+++                ushr            v24.8h,  v24.8h,  #6
+++                bic             v22.8h,  #0xfc,   lsl #8
+++                bic             v23.8h,  #0xfc,   lsl #8
+++
+++                // v6, v7
+++
+++                shrn            v27.4h,  v6.4s,   #14
+++                xtn             v25.4h,  v6.4s
+++                shrn            v26.4h,  v6.4s,   #10
+++
+++                shrn2           v27.8h,  v7.4s,   #14
+++                xtn2            v25.8h,  v7.4s
+++                shrn2           v26.8h,  v7.4s,   #10
+++
+++                ushr            v27.8h,  v27.8h,  #6
+++                bic             v25.8h,  #0xfc,   lsl #8
+++                bic             v26.8h,  #0xfc,   lsl #8
+++
+++                blt             2f
+++
+++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
+++                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
+++                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
+++
+++                bne             1b
+++
+++11:
+++                subs            w6,  w6,  #1
+++                add             x0,  x0,  w1,  uxtw
+++                add             x8,  x8,  #128
+++                bne             10b
+++
+++                ret
+++
+++// Partial final write
+++2:
+++                cmp             w5,  #48-96
+++                blt             1f
+++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
+++                beq             11b
+++                mov             v16.16b, v22.16b
+++                mov             v17.16b, v23.16b
+++                sub             w5,  w5,  #48
+++                mov             v18.16b, v24.16b
+++                mov             v19.16b, v25.16b
+++                mov             v20.16b, v26.16b
+++                mov             v21.16b, v27.16b
+++1:
+++                cmp             w5,  #24-96
+++                blt             1f
+++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
+++                beq             11b
+++                mov             v16.16b, v19.16b
+++                mov             v17.16b, v20.16b
+++                sub             w5,  w5,  #24
+++                mov             v18.16b, v21.16b
+++1:
+++                cmp             w5,  #12-96
+++                blt             1f
+++                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
+++                beq             11b
+++                mov             v16.2d[0], v16.2d[1]
+++                sub             w5,  w5,  #12
+++                mov             v17.2d[0], v17.2d[1]
+++                mov             v18.2d[0], v18.2d[1]
+++1:
+++                cmp             w5,  #6-96
+++                blt             1f
+++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
+++                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
+++                beq             11b
+++                mov             v16.2s[0], v16.2s[1]
+++                sub             w5,  w5,  #6
+++                mov             v17.2s[0], v17.2s[1]
+++                mov             v18.2s[0], v18.2s[1]
+++1:
+++                cmp             w5,  #3-96
+++                blt             1f
+++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
+++                beq             11b
+++                mov             v16.4h[0], v16.4h[1]
+++                sub             w5,  w5,  #3
+++                mov             v17.4h[0], v17.4h[1]
+++1:
+++                cmp             w5,  #2-96
+++                blt             1f
+++                st2             {v16.h, v17.h}[0], [x0], #4
+++                b               11b
+++1:
+++                st1             {v16.h}[0], [x0], #2
+++                b               11b
+++
+++endfunc
+++
+++// void ff_rpi_sand30_lines_to_planar_y8(
+++//   uint8_t * dest,            : x0
+++//   unsigned int dst_stride,   : w1
+++//   const uint8_t * src,       : x2
+++//   unsigned int src_stride1,  : w3, always 128
+++//   unsigned int src_stride2,  : w4
+++//   unsigned int _x,           : w5
+++//   unsigned int y,            : w6
+++//   unsigned int _w,           : w7
+++//   unsigned int h);           : [sp, #0]
+++//
+++// Assumes that we are starting on a stripe boundary and that overreading
+++// within the stripe is OK. However it does respect the dest size for wri
+++
+++function ff_rpi_sand30_lines_to_planar_y8, export=1
+++                lsl             w4,  w4,  #7
+++                sub             w4,  w4,  #64
+++                sub             w1,  w1,  w7
+++                uxtw            x6,  w6
+++                add             x8,  x2,  x6, lsl #7
+++                ldr             w6,  [sp, #0]
+++
+++10:
+++                mov             x2,  x8
+++                mov             w5,  w7
+++1:
+++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
+++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
+++
+++                subs            w5,  w5,  #96
+++
+++                // v0, v1
+++
+++                shrn            v18.4h,  v0.4s,   #16
+++                xtn             v16.4h,  v0.4s
+++                shrn            v17.4h,  v0.4s,   #12
+++
+++                shrn2           v18.8h,  v1.4s,   #16
+++                xtn2            v16.8h,  v1.4s
+++                shrn2           v17.8h,  v1.4s,   #12
+++
+++                shrn            v18.8b,  v18.8h,  #6
+++                shrn            v16.8b,  v16.8h,  #2
+++                xtn             v17.8b,  v17.8h
+++
+++                // v2, v3
+++
+++                shrn            v21.4h,  v2.4s,   #16
+++                xtn             v19.4h,  v2.4s
+++                shrn            v20.4h,  v2.4s,   #12
+++
+++                shrn2           v21.8h,  v3.4s,   #16
+++                xtn2            v19.8h,  v3.4s
+++                shrn2           v20.8h,  v3.4s,   #12
+++
+++                shrn2           v18.16b, v21.8h,  #6
+++                shrn2           v16.16b, v19.8h,  #2
+++                xtn2            v17.16b, v20.8h
+++
+++                // v4, v5
+++
+++                shrn            v24.4h,  v4.4s,   #16
+++                xtn             v22.4h,  v4.4s
+++                shrn            v23.4h,  v4.4s,   #12
+++
+++                shrn2           v24.8h,  v5.4s,   #16
+++                xtn2            v22.8h,  v5.4s
+++                shrn2           v23.8h,  v5.4s,   #12
+++
+++                shrn            v21.8b,  v24.8h,  #6
+++                shrn            v19.8b,  v22.8h,  #2
+++                xtn             v20.8b,  v23.8h
+++
+++                // v6, v7
+++
+++                shrn            v27.4h,  v6.4s,   #16
+++                xtn             v25.4h,  v6.4s
+++                shrn            v26.4h,  v6.4s,   #12
+++
+++                shrn2           v27.8h,  v7.4s,   #16
+++                xtn2            v25.8h,  v7.4s
+++                shrn2           v26.8h,  v7.4s,   #12
+++
+++                shrn2           v21.16b, v27.8h,  #6
+++                shrn2           v19.16b, v25.8h,  #2
+++                xtn2            v20.16b, v26.8h
+++
+++                blt             2f
+++
+++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
+++                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
+++
+++                bne             1b
+++
+++11:
+++                subs            w6,  w6,  #1
+++                add             x0,  x0,  w1,  uxtw
+++                add             x8,  x8,  #128
+++                bne             10b
+++
+++                ret
+++
+++// Partial final write
+++2:
+++                cmp             w5,  #48-96
+++                blt             1f
+++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
+++                beq             11b
+++                mov             v16.16b, v22.16b
+++                mov             v17.16b, v23.16b
+++                sub             w5,  w5,  #48
+++                mov             v18.16b, v24.16b
+++1:
+++                cmp             w5,  #24-96
+++                blt             1f
+++                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
+++                beq             11b
+++                mov             v16.2d[0], v16.2d[1]
+++                sub             w5,  w5,  #24
+++                mov             v17.2d[0], v17.2d[1]
+++                mov             v18.2d[0], v18.2d[1]
+++1:
+++                cmp             w5,  #12-96
+++                blt             1f
+++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
+++                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
+++                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
+++                beq             11b
+++                mov             v16.2s[0], v16.2s[1]
+++                sub             w5,  w5,  #12
+++                mov             v17.2s[0], v17.2s[1]
+++                mov             v18.2s[0], v18.2s[1]
+++1:
+++                cmp             w5,  #6-96
+++                blt             1f
+++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
+++                beq             11b
+++                mov             v16.4h[0], v16.4h[1]
+++                sub             w5,  w5,  #6
+++                mov             v17.4h[0], v17.4h[1]
+++                mov             v18.4h[0], v18.4h[1]
+++1:
+++                cmp             w5,  #3-96
+++                blt             1f
+++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
+++                beq             11b
+++                mov             v16.8b[0], v16.8b[1]
+++                sub             w5,  w5,  #3
+++                mov             v17.8b[0], v17.8b[1]
+++1:
+++                cmp             w5,  #2-96
+++                blt             1f
+++                st2             {v16.b, v17.b}[0], [x0], #2
+++                b               11b
+++1:
+++                st1             {v16.b}[0], [x0], #1
+++                b               11b
+++
+++endfunc
+++
++diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
++new file mode 100644
++index 0000000000..2a56135bc3
++--- /dev/null
+++++ b/libavutil/aarch64/rpi_sand_neon.h
++@@ -0,0 +1,59 @@
+++/*
+++Copyright (c) 2021 Michael Eiler
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: Michael Eiler <eiler.mike@gmail.com>
+++*/
+++
+++#pragma once
+++
+++#ifdef __cplusplus
+++extern "C" {
+++#endif
+++
+++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
+++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+++
+++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
+++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
+++  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
+++  unsigned int _w, unsigned int h);
+++
+++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
+++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+++
+++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
+++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
+++  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+++
+++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
+++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
+++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
+++
+++#ifdef __cplusplus
+++}
+++#endif
+++
++diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile
++index 5da44b0542..b74b7c4e2f 100644
++--- a/libavutil/arm/Makefile
+++++ b/libavutil/arm/Makefile
++@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o                                    \
++ 
++ NEON-OBJS += arm/float_dsp_init_neon.o                                  \
++              arm/float_dsp_neon.o                                       \
+++             arm/rpi_sand_neon.o                                        \
++diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
++new file mode 100644
++index 0000000000..60e697f681
++--- /dev/null
+++++ b/libavutil/arm/rpi_sand_neon.S
++@@ -0,0 +1,925 @@
+++/*
+++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+++All rights reserved.
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: John Cox
+++*/
+++
+++#include "libavutil/arm/asm.S"
+++
+++
+++@ General notes:
+++@ Having done some timing on this in sand8->y8 (Pi4)
+++@  vst1 (680fps) is a bit faster than vstm (660fps)
+++@  vldm (680fps) is noticably faster than vld1 (480fps)
+++@  (or it might be that a mix is what is required)
+++@
+++@ At least on a Pi4 it is no more expensive to have a single auto-inc register
+++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
+++@ the latter was better)
+++@
+++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
+++@ the memory is uncached.
+++@ As these are Sand -> planar we can assume that src is going to be aligned but
+++@ it is possible that dest isn't (converting to .yuv or other packed format).
+++@ Luckily vst1 is faster than vstm :-) so all is well
+++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
+++@ .8 stores would let us do non-word aligned stores into uncached but it
+++@ probably isn't worth it.
+++
+++
+++
+++
+++@ void ff_rpi_sand128b_stripe_to_8_10(
+++@   uint8_t * dest,             // [r0]
+++@   const uint8_t * src1,       // [r1]
+++@   const uint8_t * src2,       // [r2]
+++@   unsigned int lines);        // [r3]
+++
+++.macro  stripe2_to_8, bit_depth
+++        vpush    {q4-q7}
+++1:
+++        vldm     r1!, {q0-q7}
+++        subs     r3, #1
+++        vldm     r2!, {q8-q15}
+++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
+++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
+++        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
+++        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
+++        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
+++        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
+++        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
+++        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
+++        vqrshrn.u16 d10, q10, #\bit_depth - 8
+++        vqrshrn.u16 d11, q11, #\bit_depth - 8
+++        vqrshrn.u16 d12, q12, #\bit_depth - 8
+++        vqrshrn.u16 d13, q13, #\bit_depth - 8
+++        vqrshrn.u16 d14, q14, #\bit_depth - 8
+++        vqrshrn.u16 d15, q15, #\bit_depth - 8
+++        vstm     r0!, {q0-q7}
+++        bne      1b
+++        vpop     {q4-q7}
+++        bx       lr
+++.endm
+++
+++function ff_rpi_sand128b_stripe_to_8_10, export=1
+++        stripe2_to_8     10
+++endfunc
+++
+++@ void ff_rpi_sand8_lines_to_planar_y8(
+++@   uint8_t * dest,             // [r0]
+++@   unsigned int dst_stride,    // [r1]
+++@   const uint8_t * src,        // [r2]
+++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++@   unsigned int src_stride2,   // [sp, #0]  -> r3
+++@   unsigned int _x,            // [sp, #4]  Ignored - 0
+++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++@   unsigned int h);            // [sp, #16] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for writing
+++
+++function ff_rpi_sand8_lines_to_planar_y8, export=1
+++                push            {r4-r8, lr}     @ +24            L
+++                ldr             r3,  [sp, #24]
+++                ldr             r6,  [sp, #36]
+++                ldr             r7,  [sp, #32]  @ y
+++                lsl             r3,  #7
+++                sub             r1,  r6
+++                add             r8,  r2,  r7,  lsl #7
+++                ldr             r7,  [sp, #40]
+++
+++10:
+++                mov             r2,  r8
+++                add             r4,  r0,  #24
+++                mov             r5,  r6
+++                mov             lr,  #0
+++1:
+++                vldm            r2,  {q8-q15}
+++                add             r2,  r3
+++                subs            r5,  #128
+++                blt             2f
+++                vst1.8          {d16, d17, d18, d19}, [r0]!
+++                vst1.8          {d20, d21, d22, d23}, [r0]!
+++                vst1.8          {d24, d25, d26, d27}, [r0]!
+++                vst1.8          {d28, d29, d30, d31}, [r0]!
+++                bne             1b
+++11:
+++                subs            r7,  #1
+++                add             r0,  r1
+++                add             r8,  #128
+++                bne             10b
+++
+++                pop             {r4-r8, pc}
+++
+++@ Partial final write
+++2:
+++                cmp             r5,  #64-128
+++                blt             1f
+++                vst1.8          {d16, d17, d18, d19}, [r0]!
+++                vst1.8          {d20, d21, d22, d23}, [r0]!
+++                beq             11b
+++                vmov            q8,  q12
+++                vmov            q9,  q13
+++                sub             r5,  #64
+++                vmov            q10, q14
+++                vmov            q11, q15
+++1:
+++                cmp             r5,  #32-128
+++                blt             1f
+++                vst1.8          {d16, d17, d18, d19}, [r0]!
+++                beq             11b
+++                vmov            q8,  q10
+++                sub             r5,  #32
+++                vmov            q9,  q11
+++1:
+++                cmp             r5,  #16-128
+++                blt             1f
+++                vst1.8          {d16, d17}, [r0]!
+++                beq             11b
+++                sub             r5,  #16
+++                vmov            q8,  q9
+++1:
+++                cmp             r5,  #8-128
+++                blt             1f
+++                vst1.8          {d16}, [r0]!
+++                beq             11b
+++                sub             r5,  #8
+++                vmov            d16, d17
+++1:
+++                cmp             r5,  #4-128
+++                blt             1f
+++                vst1.32         {d16[0]}, [r0]!
+++                beq             11b
+++                sub             r5,  #4
+++                vshr.u64        d16, #32
+++1:
+++                cmp             r5,  #2-128
+++                blt             1f
+++                vst1.16         {d16[0]}, [r0]!
+++                beq             11b
+++                vst1.8          {d16[2]}, [r0]!
+++                b               11b
+++1:
+++                vst1.8          {d16[0]}, [r0]!
+++                b               11b
+++endfunc
+++
+++@ void ff_rpi_sand8_lines_to_planar_c8(
+++@   uint8_t * dst_u,            // [r0]
+++@   unsigned int dst_stride_u,  // [r1]
+++@   uint8_t * dst_v,            // [r2]
+++@   unsigned int dst_stride_v,  // [r3]
+++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
+++@   unsigned int stride1,       // [sp, #4]  128
+++@   unsigned int stride2,       // [sp, #8]  -> r8
+++@   unsigned int _x,            // [sp, #12] 0
+++@   unsigned int y,             // [sp, #16] (r7 in prefix)
+++@   unsigned int _w,            // [sp, #20] -> r12, r6
+++@   unsigned int h);            // [sp, #24] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for writing
+++
+++function ff_rpi_sand8_lines_to_planar_c8, export=1
+++                push            {r4-r8, lr}     @ +24
+++
+++                ldr             r5,  [sp, #24]
+++                ldr             r8,  [sp, #32]
+++                ldr             r7,  [sp, #40]
+++                ldr             r6,  [sp, #44]
+++                lsl             r8,  #7
+++                add             r5,  r5,  r7,  lsl #7
+++                sub             r1,  r1,  r6
+++                sub             r3,  r3,  r6
+++                ldr             r7,  [sp, #48]
+++                vpush           {q4-q7}
+++
+++10:
+++                mov             r4,  r5
+++                mov             r12, r6
+++1:
+++                subs            r12, #64
+++                vldm            r4,  {q0-q7}
+++                add             r4,  r8
+++                it              gt
+++                vldmgt          r4,  {q8-q15}
+++                add             r4,  r8
+++
+++                vuzp.8          q0,  q1
+++                vuzp.8          q2,  q3
+++                vuzp.8          q4,  q5
+++                vuzp.8          q6,  q7
+++
+++                vuzp.8          q8,  q9
+++                vuzp.8          q10, q11
+++                vuzp.8          q12, q13
+++                vuzp.8          q14, q15
+++                subs            r12, #64
+++
+++                @ Rearrange regs so we can use vst1 with 4 regs
+++                vswp            q1,  q2
+++                vswp            q5,  q6
+++                vswp            q9,  q10
+++                vswp            q13, q14
+++                blt             2f
+++
+++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
+++                vst1.8          {d16, d17, d18, d19}, [r0]!
+++                vst1.8          {d24, d25, d26, d27}, [r0]!
+++
+++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+++                vst1.8          {d12, d13, d14, d15}, [r2]!
+++                vst1.8          {d20, d21, d22, d23}, [r2]!
+++                vst1.8          {d28, d29, d30, d31}, [r2]!
+++                bne             1b
+++11:
+++                subs            r7,  #1
+++                add             r5,  #128
+++                add             r0,  r1
+++                add             r2,  r3
+++                bne             10b
+++                vpop            {q4-q7}
+++                pop             {r4-r8,pc}
+++
+++2:
+++                cmp             r12, #64-128
+++                blt             1f
+++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
+++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+++                vst1.8          {d12, d13, d14, d15}, [r2]!
+++                beq             11b
+++                sub             r12, #64
+++                vmov            q0,  q8
+++                vmov            q1,  q9
+++                vmov            q2,  q10
+++                vmov            q3,  q11
+++                vmov            q4,  q12
+++                vmov            q5,  q13
+++                vmov            q6,  q14
+++                vmov            q7,  q15
+++1:
+++                cmp             r12, #32-128
+++                blt             1f
+++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
+++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
+++                beq             11b
+++                sub             r12, #32
+++                vmov            q0,  q4
+++                vmov            q1,  q5
+++                vmov            q2,  q6
+++                vmov            q3,  q7
+++1:
+++                cmp             r12, #16-128
+++                blt             1f
+++                vst1.8          {d0,  d1 }, [r0]!
+++                vst1.8          {d4,  d5 }, [r2]!
+++                beq             11b
+++                sub             r12, #16
+++                vmov            q0,  q1
+++                vmov            q2,  q3
+++1:
+++                cmp             r12, #8-128
+++                blt             1f
+++                vst1.8          {d0}, [r0]!
+++                vst1.8          {d4}, [r2]!
+++                beq             11b
+++                sub             r12, #8
+++                vmov            d0,  d1
+++                vmov            d4,  d5
+++1:
+++                cmp             r12, #4-128
+++                blt             1f
+++                vst1.32         {d0[0]}, [r0]!
+++                vst1.32         {d4[0]}, [r2]!
+++                beq             11b
+++                sub             r12, #4
+++                vmov            s0,  s1
+++                vmov            s8,  s9
+++1:
+++                cmp             r12, #2-128
+++                blt             1f
+++                vst1.16         {d0[0]}, [r0]!
+++                vst1.16         {d4[0]}, [r2]!
+++                beq             11b
+++                vst1.8          {d0[2]}, [r0]!
+++                vst1.8          {d4[2]}, [r2]!
+++                b               11b
+++1:
+++                vst1.8          {d0[0]}, [r0]!
+++                vst1.8          {d4[0]}, [r2]!
+++                b               11b
+++endfunc
+++
+++
+++
+++@ void ff_rpi_sand30_lines_to_planar_y16(
+++@   uint8_t * dest,             // [r0]
+++@   unsigned int dst_stride,    // [r1]
+++@   const uint8_t * src,        // [r2]
+++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++@   unsigned int src_stride2,   // [sp, #0]  -> r3
+++@   unsigned int _x,            // [sp, #4]  Ignored - 0
+++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++@   unsigned int h);            // [sp, #16] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for writing
+++
+++function ff_rpi_sand30_lines_to_planar_y16, export=1
+++                push            {r4-r8, lr}     @ +24
+++                ldr             r3,  [sp, #24]
+++                ldr             r6,  [sp, #36]
+++                ldr             r7,  [sp, #32]  @ y
+++                mov             r12, #48
+++                sub             r3,  #1
+++                lsl             r3,  #7
+++                sub             r1,  r1,  r6,  lsl #1
+++                add             r8,  r2,  r7,  lsl #7
+++                ldr             r7,  [sp, #40]
+++
+++10:
+++                mov             r2,  r8
+++                add             r4,  r0,  #24
+++                mov             r5,  r6
+++                mov             lr,  #0
+++1:
+++                vldm            r2!, {q10-q13}
+++                add             lr,  #64
+++
+++                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
+++                ands            lr,  #127
+++                vshrn.u32       d2,  q10, #10
+++                vmovn.u32       d0,  q10
+++
+++                vshrn.u32       d5,  q11, #14
+++                it              eq
+++                addeq           r2,  r3
+++                vshrn.u32       d3,  q11, #10
+++                vmovn.u32       d1,  q11
+++
+++                subs            r5,  #48
+++                vshr.u16        q2,  #6
+++                vbic.u16        q0,  #0xfc00
+++                vbic.u16        q1,  #0xfc00
+++
+++                vshrn.u32       d20, q12, #14
+++                vshrn.u32       d18, q12, #10
+++                vmovn.u32       d16, q12
+++
+++                vshrn.u32       d21, q13, #14
+++                vshrn.u32       d19, q13, #10
+++                vmovn.u32       d17, q13
+++
+++                vshr.u16        q10, #6
+++                vbic.u16        q8,  #0xfc00
+++                vbic.u16        q9 , #0xfc00
+++                blt             2f
+++
+++                vst3.16         {d0,  d2,  d4},  [r0], r12
+++                vst3.16         {d1,  d3,  d5},  [r4], r12
+++                vst3.16         {d16, d18, d20}, [r0], r12
+++                vst3.16         {d17, d19, d21}, [r4], r12
+++
+++                bne             1b
+++
+++11:
+++                subs            r7,  #1
+++                add             r0,  r1
+++                add             r8,  #128
+++                bne             10b
+++
+++                pop             {r4-r8, pc}
+++
+++@ Partial final write
+++2:
+++                cmp             r5,  #24-48
+++                blt             1f
+++                vst3.16         {d0,  d2,  d4},  [r0], r12
+++                vst3.16         {d1,  d3,  d5},  [r4]
+++                beq             11b
+++                vmov            q0,  q8
+++                sub             r5,  #24
+++                vmov            q1,  q9
+++                vmov            q2,  q10
+++1:
+++                cmp             r5,  #12-48
+++                blt             1f
+++                vst3.16         {d0,  d2,  d4},  [r0]!
+++                beq             11b
+++                vmov            d0, d1
+++                sub             r5, #12
+++                vmov            d2, d3
+++                vmov            d4, d5
+++1:
+++                cmp             r5,  #6-48
+++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
+++                blt             1f
+++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
+++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
+++                add             r0,  #12
+++                beq             11b
+++                vmov            s0,  s1
+++                sub             r5,  #6
+++                vmov            s4,  s5
+++                vmov            s8,  s9
+++1:
+++                cmp             r5, #3-48
+++                blt             1f
+++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
+++                beq             11b
+++                sub             r5, #3
+++                vshr.u32        d0, #16
+++                vshr.u32        d2, #16
+++1:
+++                cmp             r5, #2-48
+++                blt             1f
+++                vst2.16         {d0[0], d2[0]}, [r0]!
+++                b               11b
+++1:
+++                vst1.16         {d0[0]}, [r0]!
+++                b               11b
+++
+++endfunc
+++
+++
+++@ void ff_rpi_sand30_lines_to_planar_c16(
+++@   uint8_t * dst_u,            // [r0]
+++@   unsigned int dst_stride_u,  // [r1]
+++@   uint8_t * dst_v,            // [r2]
+++@   unsigned int dst_stride_v,  // [r3]
+++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
+++@   unsigned int stride1,       // [sp, #4]  128
+++@   unsigned int stride2,       // [sp, #8]  -> r8
+++@   unsigned int _x,            // [sp, #12] 0
+++@   unsigned int y,             // [sp, #16] (r7 in prefix)
+++@   unsigned int _w,            // [sp, #20] -> r6, r9
+++@   unsigned int h);            // [sp, #24] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for writing
+++
+++function ff_rpi_sand30_lines_to_planar_c16, export=1
+++                push            {r4-r10, lr}    @ +32
+++                ldr             r5,  [sp, #32]
+++                ldr             r8,  [sp, #40]
+++                ldr             r7,  [sp, #48]
+++                ldr             r9,  [sp, #52]
+++                mov             r12, #48
+++                sub             r8,  #1
+++                lsl             r8,  #7
+++                add             r5,  r5,  r7,  lsl #7
+++                sub             r1,  r1,  r9,  lsl #1
+++                sub             r3,  r3,  r9,  lsl #1
+++                ldr             r7,  [sp, #56]
+++10:
+++                mov             lr,  #0
+++                mov             r4,  r5
+++                mov             r6,  r9
+++1:
+++                vldm            r4!, {q0-q3}
+++                add             lr,  #64
+++
+++                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
+++                vshrn.u32       d20, q0,  #14
+++                vmovn.u32       d18, q0
+++                vshrn.u32       d0,  q0,  #10
+++                ands            lr,  #127
+++
+++                vshrn.u32       d21, q1,  #14
+++                vmovn.u32       d19, q1
+++                vshrn.u32       d1,  q1,  #10
+++
+++                vshrn.u32       d22, q2,  #10
+++                vmovn.u32       d2,  q2
+++                vshrn.u32       d4,  q2,  #14
+++
+++                add             r10, r0,  #24
+++                vshrn.u32       d23, q3,  #10
+++                vmovn.u32       d3,  q3
+++                vshrn.u32       d5,  q3,  #14
+++
+++                it              eq
+++                addeq           r4,  r8
+++                vuzp.16         q0,  q11
+++                vuzp.16         q9,  q1
+++                vuzp.16         q10, q2
+++
+++                @ q0   V0, V3,..
+++                @ q9   U0, U3...
+++                @ q10  U1, U4...
+++                @ q11  U2, U5,..
+++                @ q1   V1, V4,
+++                @ q2   V2, V5,..
+++
+++                subs            r6,  #24
+++                vbic.u16        q11, #0xfc00
+++                vbic.u16        q9,  #0xfc00
+++                vshr.u16        q10, #6
+++                vshr.u16        q2,  #6
+++                vbic.u16        q0,  #0xfc00
+++                vbic.u16        q1,  #0xfc00
+++
+++                blt             2f
+++
+++                vst3.16         {d18, d20, d22}, [r0],  r12
+++                vst3.16         {d19, d21, d23}, [r10]
+++                add             r10, r2,  #24
+++                vst3.16         {d0,  d2,  d4},  [r2],  r12
+++                vst3.16         {d1,  d3,  d5},  [r10]
+++
+++                bne             1b
+++
+++11:
+++                subs            r7,  #1
+++                add             r5,  #128
+++                add             r0,  r1
+++                add             r2,  r3
+++                bne             10b
+++
+++                pop             {r4-r10, pc}
+++
+++@ Partial final write
+++2:
+++                cmp             r6,  #-12
+++                blt             1f
+++                vst3.16         {d18, d20, d22}, [r0]!
+++                vst3.16         {d0,  d2,  d4},  [r2]!
+++                beq             11b
+++                vmov            d18, d19
+++                vmov            d20, d21
+++                vmov            d22, d23
+++                sub             r6,  #12
+++                vmov            d0,  d1
+++                vmov            d2,  d3
+++                vmov            d4,  d5
+++1:
+++                cmp             r6,  #-18
+++                @ Rezip here as it makes the remaining tail handling easier
+++                vzip.16         d0,  d18
+++                vzip.16         d2,  d20
+++                vzip.16         d4,  d22
+++                blt             1f
+++                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
+++                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
+++                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
+++                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
+++                beq             11b
+++                vmov            d0,  d18
+++                vmov            d2,  d20
+++                sub             r6,  #6
+++                vmov            d4,  d22
+++1:
+++                cmp             r6,  #-21
+++                blt             1f
+++                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
+++                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
+++                beq             11b
+++                vmov            s4,  s5
+++                sub             r6,  #3
+++                vmov            s0,  s1
+++1:
+++                cmp             r6,  #-22
+++                blt             1f
+++                vst2.16         {d0[1], d2[1]}, [r0]!
+++                vst2.16         {d0[0], d2[0]}, [r2]!
+++                b               11b
+++1:
+++                vst1.16         {d0[1]}, [r0]!
+++                vst1.16         {d0[0]}, [r2]!
+++                b               11b
+++
+++endfunc
+++
+++@ void ff_rpi_sand30_lines_to_planar_p010(
+++@   uint8_t * dest,             // [r0]
+++@   unsigned int dst_stride,    // [r1]
+++@   const uint8_t * src,        // [r2]
+++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++@   unsigned int src_stride2,   // [sp, #0]  -> r3
+++@   unsigned int _x,            // [sp, #4]  Ignored - 0
+++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++@   unsigned int h);            // [sp, #16] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for writing
+++
+++function ff_rpi_sand30_lines_to_planar_p010, export=1
+++                push            {r4-r8, lr}     @ +24
+++                ldr             r3,  [sp, #24]
+++                ldr             r6,  [sp, #36]
+++                ldr             r7,  [sp, #32]  @ y
+++                mov             r12, #48
+++                vmov.u16        q15, #0xffc0
+++                sub             r3,  #1
+++                lsl             r3,  #7
+++                sub             r1,  r1,  r6,  lsl #1
+++                add             r8,  r2,  r7,  lsl #7
+++                ldr             r7,  [sp, #40]
+++
+++10:
+++                mov             r2,  r8
+++                add             r4,  r0,  #24
+++                mov             r5,  r6
+++                mov             lr,  #0
+++1:
+++                vldm            r2!, {q10-q13}
+++                add             lr,  #64
+++
+++                vshl.u32        q14, q10, #6
+++                ands            lr,  #127
+++                vshrn.u32       d4,  q10, #14
+++                vshrn.u32       d2,  q10, #4
+++                vmovn.u32       d0,  q14
+++
+++                vshl.u32        q14, q11, #6
+++                it              eq
+++                addeq           r2,  r3
+++                vshrn.u32       d5,  q11, #14
+++                vshrn.u32       d3,  q11, #4
+++                vmovn.u32       d1,  q14
+++
+++                subs            r5,  #48
+++                vand            q2,  q15
+++                vand            q1,  q15
+++                vand            q0,  q15
+++
+++                vshl.u32        q14, q12, #6
+++                vshrn.u32       d20, q12, #14
+++                vshrn.u32       d18, q12, #4
+++                vmovn.u32       d16, q14
+++
+++                vshl.u32        q14, q13, #6
+++                vshrn.u32       d21, q13, #14
+++                vshrn.u32       d19, q13, #4
+++                vmovn.u32       d17, q14
+++
+++                vand            q10, q15
+++                vand            q9,  q15
+++                vand            q8,  q15
+++                blt             2f
+++
+++                vst3.16         {d0,  d2,  d4},  [r0], r12
+++                vst3.16         {d1,  d3,  d5},  [r4], r12
+++                vst3.16         {d16, d18, d20}, [r0], r12
+++                vst3.16         {d17, d19, d21}, [r4], r12
+++
+++                bne             1b
+++
+++11:
+++                subs            r7,  #1
+++                add             r0,  r1
+++                add             r8,  #128
+++                bne             10b
+++
+++                pop             {r4-r8, pc}
+++
+++@ Partial final write
+++2:
+++                cmp             r5,  #24-48
+++                blt             1f
+++                vst3.16         {d0,  d2,  d4},  [r0], r12
+++                vst3.16         {d1,  d3,  d5},  [r4]
+++                beq             11b
+++                vmov            q0,  q8
+++                sub             r5,  #24
+++                vmov            q1,  q9
+++                vmov            q2,  q10
+++1:
+++                cmp             r5,  #12-48
+++                blt             1f
+++                vst3.16         {d0,  d2,  d4},  [r0]!
+++                beq             11b
+++                vmov            d0, d1
+++                sub             r5, #12
+++                vmov            d2, d3
+++                vmov            d4, d5
+++1:
+++                cmp             r5,  #6-48
+++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
+++                blt             1f
+++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
+++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
+++                add             r0,  #12
+++                beq             11b
+++                vmov            s0,  s1
+++                sub             r5,  #6
+++                vmov            s4,  s5
+++                vmov            s8,  s9
+++1:
+++                cmp             r5, #3-48
+++                blt             1f
+++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
+++                beq             11b
+++                sub             r5, #3
+++                vshr.u32        d0, #16
+++                vshr.u32        d2, #16
+++1:
+++                cmp             r5, #2-48
+++                blt             1f
+++                vst2.16         {d0[0], d2[0]}, [r0]!
+++                b               11b
+++1:
+++                vst1.16         {d0[0]}, [r0]!
+++                b               11b
+++
+++endfunc
+++
+++
+++@ void ff_rpi_sand30_lines_to_planar_y8(
+++@   uint8_t * dest,             // [r0]
+++@   unsigned int dst_stride,    // [r1]
+++@   const uint8_t * src,        // [r2]
+++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++@   unsigned int src_stride2,   // [sp, #0]  -> r3
+++@   unsigned int _x,            // [sp, #4]  Ignored - 0
+++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
+++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++@   unsigned int h);            // [sp, #16] -> r7
+++@
+++@ Assumes that we are starting on a stripe boundary and that overreading
+++@ within the stripe is OK. However it does respect the dest size for wri
+++
+++function ff_rpi_sand30_lines_to_planar_y8, export=1
+++                push            {r4-r8, lr}     @ +24
+++                ldr             r3,  [sp, #24]
+++                ldr             r6,  [sp, #36]
+++                ldr             r7,  [sp, #32]  @ y
+++                mov             r12, #48
+++                lsl             r3,  #7
+++                sub             r1,  r1,  r6
+++                add             r8,  r2,  r7,  lsl #7
+++                ldr             r7,  [sp, #40]
+++
+++10:
+++                mov             r2,  r8
+++                add             r4,  r0,  #24
+++                mov             r5,  r6
+++1:
+++                vldm            r2,  {q8-q15}
+++
+++                subs            r5,  #96
+++
+++                vmovn.u32       d0,  q8
+++                vshrn.u32       d2,  q8,  #12
+++                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
+++
+++                add             r2,  r3
+++
+++                vmovn.u32       d1,  q9
+++                vshrn.u32       d3,  q9,  #12
+++                vshrn.u32       d5,  q9,  #16
+++
+++                pld             [r2, #0]
+++
+++                vshrn.u16       d0,  q0,  #2
+++                vmovn.u16       d1,  q1
+++                vshrn.u16       d2,  q2,  #6
+++
+++                vmovn.u32       d16, q10
+++                vshrn.u32       d18, q10, #12
+++                vshrn.u32       d20, q10, #16
+++
+++                vmovn.u32       d17, q11
+++                vshrn.u32       d19, q11, #12
+++                vshrn.u32       d21, q11, #16
+++
+++                pld             [r2, #64]
+++
+++                vshrn.u16       d4,  q8,  #2
+++                vmovn.u16       d5,  q9
+++                vshrn.u16       d6,  q10, #6
+++
+++                vmovn.u32       d16, q12
+++                vshrn.u32       d18, q12, #12
+++                vshrn.u32       d20, q12, #16
+++
+++                vmovn.u32       d17, q13
+++                vshrn.u32       d19, q13, #12
+++                vshrn.u32       d21, q13, #16
+++
+++                vshrn.u16       d16, q8,  #2
+++                vmovn.u16       d17, q9
+++                vshrn.u16       d18, q10, #6
+++
+++                vmovn.u32       d20, q14
+++                vshrn.u32       d22, q14, #12
+++                vshrn.u32       d24, q14, #16
+++
+++                vmovn.u32       d21, q15
+++                vshrn.u32       d23, q15, #12
+++                vshrn.u32       d25, q15, #16
+++
+++                vshrn.u16       d20, q10, #2
+++                vmovn.u16       d21, q11
+++                vshrn.u16       d22, q12, #6
+++
+++                blt             2f
+++
+++                vst3.8          {d0,  d1,  d2},  [r0], r12
+++                vst3.8          {d4,  d5,  d6},  [r4], r12
+++                vst3.8          {d16, d17, d18}, [r0], r12
+++                vst3.8          {d20, d21, d22}, [r4], r12
+++
+++                bne             1b
+++
+++11:
+++                subs            r7,  #1
+++                add             r0,  r1
+++                add             r8,  #128
+++                bne             10b
+++
+++                pop             {r4-r8, pc}
+++
+++@ Partial final write
+++2:
+++                cmp             r5,  #48-96
+++                blt             1f
+++                vst3.8          {d0,  d1,  d2},  [r0], r12
+++                vst3.8          {d4,  d5,  d6},  [r4], r12
+++                beq             11b
+++                vmov            q0,  q8
+++                vmov            q2,  q10
+++                sub             r5,  #48
+++                vmov            d2,  d18
+++                vmov            d6,  d22
+++1:
+++                cmp             r5,  #24-96
+++                blt             1f
+++                vst3.8          {d0,  d1,  d2},  [r0]!
+++                beq             11b
+++                vmov            q0,  q2
+++                sub             r5,  #24
+++                vmov            d2,  d6
+++1:
+++                cmp             r5,  #12-96
+++                blt             1f
+++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
+++                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
+++                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
+++                beq             11b
+++                vmov            s0,  s1
+++                sub             r5,  #12
+++                vmov            s2,  s3
+++                vmov            s4,  s5
+++1:
+++                cmp             r5,  #6-96
+++                blt             1f
+++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
+++                add             r0,  #12
+++                beq             11b
+++                vshr.u32        d0,  #16
+++                sub             r5,  #6
+++                vshr.u32        d1,  #16
+++                vshr.u32        d2,  #16
+++1:
+++                cmp             r5, #3-96
+++                blt             1f
+++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
+++                beq             11b
+++                sub             r5, #3
+++                vshr.u32        d0, #8
+++                vshr.u32        d1, #8
+++1:
+++                cmp             r5, #2-96
+++                blt             1f
+++                vst2.8          {d0[0], d1[0]}, [r0]!
+++                b               11b
+++1:
+++                vst1.8          {d0[0]}, [r0]!
+++                b               11b
+++
+++endfunc
+++
+++
++diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
++new file mode 100644
++index 0000000000..d457c10870
++--- /dev/null
+++++ b/libavutil/arm/rpi_sand_neon.h
++@@ -0,0 +1,110 @@
+++/*
+++Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
+++All rights reserved.
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: John Cox
+++*/
+++
+++#ifndef AVUTIL_ARM_SAND_NEON_H
+++#define AVUTIL_ARM_SAND_NEON_H
+++
+++void ff_rpi_sand128b_stripe_to_8_10(
+++  uint8_t * dest,             // [r0]
+++  const uint8_t * src1,       // [r1]
+++  const uint8_t * src2,       // [r2]
+++  unsigned int lines);        // [r3]
+++
+++void ff_rpi_sand8_lines_to_planar_y8(
+++  uint8_t * dest,             // [r0]
+++  unsigned int dst_stride,    // [r1]
+++  const uint8_t * src,        // [r2]
+++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++  unsigned int src_stride2,   // [sp, #0]  -> r3
+++  unsigned int _x,            // [sp, #4]  Ignored - 0
+++  unsigned int y,             // [sp, #8]  (r7 in prefix)
+++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++  unsigned int h);            // [sp, #16] -> r7
+++
+++void ff_rpi_sand8_lines_to_planar_c8(
+++  uint8_t * dst_u,            // [r0]
+++  unsigned int dst_stride_u,  // [r1]
+++  uint8_t * dst_v,            // [r2]
+++  unsigned int dst_stride_v,  // [r3]
+++  const uint8_t * src,        // [sp, #0]  -> r4, r5
+++  unsigned int stride1,       // [sp, #4]  128
+++  unsigned int stride2,       // [sp, #8]  -> r8
+++  unsigned int _x,            // [sp, #12] 0
+++  unsigned int y,             // [sp, #16] (r7 in prefix)
+++  unsigned int _w,            // [sp, #20] -> r12, r6
+++  unsigned int h);            // [sp, #24] -> r7
+++
+++void ff_rpi_sand30_lines_to_planar_y16(
+++  uint8_t * dest,             // [r0]
+++  unsigned int dst_stride,    // [r1]
+++  const uint8_t * src,        // [r2]
+++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++  unsigned int src_stride2,   // [sp, #0]  -> r3
+++  unsigned int _x,            // [sp, #4]  Ignored - 0
+++  unsigned int y,             // [sp, #8]  (r7 in prefix)
+++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++  unsigned int h);            // [sp, #16] -> r7
+++
+++void ff_rpi_sand30_lines_to_planar_c16(
+++  uint8_t * dst_u,            // [r0]
+++  unsigned int dst_stride_u,  // [r1]
+++  uint8_t * dst_v,            // [r2]
+++  unsigned int dst_stride_v,  // [r3]
+++  const uint8_t * src,        // [sp, #0]  -> r4, r5
+++  unsigned int stride1,       // [sp, #4]  128
+++  unsigned int stride2,       // [sp, #8]  -> r8
+++  unsigned int _x,            // [sp, #12] 0
+++  unsigned int y,             // [sp, #16] (r7 in prefix)
+++  unsigned int _w,            // [sp, #20] -> r6, r9
+++  unsigned int h);            // [sp, #24] -> r7
+++
+++void ff_rpi_sand30_lines_to_planar_p010(
+++  uint8_t * dest,             // [r0]
+++  unsigned int dst_stride,    // [r1]
+++  const uint8_t * src,        // [r2]
+++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++  unsigned int src_stride2,   // [sp, #0]  -> r3
+++  unsigned int _x,            // [sp, #4]  Ignored - 0
+++  unsigned int y,             // [sp, #8]  (r7 in prefix)
+++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++  unsigned int h);            // [sp, #16] -> r7
+++
+++void ff_rpi_sand30_lines_to_planar_y8(
+++  uint8_t * dest,             // [r0]
+++  unsigned int dst_stride,    // [r1]
+++  const uint8_t * src,        // [r2]
+++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
+++  unsigned int src_stride2,   // [sp, #0]  -> r3
+++  unsigned int _x,            // [sp, #4]  Ignored - 0
+++  unsigned int y,             // [sp, #8]  (r7 in prefix)
+++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
+++  unsigned int h);            // [sp, #16] -> r7
+++
+++#endif // AVUTIL_ARM_SAND_NEON_H
+++
++diff --git a/libavutil/frame.c b/libavutil/frame.c
++index 9545477acc..48621e4098 100644
++--- a/libavutil/frame.c
+++++ b/libavutil/frame.c
++@@ -16,6 +16,8 @@
++  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++  */
++ 
+++#include "config.h"
+++
++ #include "channel_layout.h"
++ #include "avassert.h"
++ #include "buffer.h"
++@@ -27,6 +29,9 @@
++ #include "mem.h"
++ #include "samplefmt.h"
++ #include "hwcontext.h"
+++#if CONFIG_SAND
+++#include "rpi_sand_fns.h"
+++#endif
++ 
++ #if FF_API_OLD_CHANNEL_LAYOUT
++ #define CHECK_CHANNELS_CONSISTENCY(frame) \
++@@ -874,6 +879,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
++         (frame->crop_top + frame->crop_bottom) >= frame->height)
++         return AVERROR(ERANGE);
++ 
+++#if CONFIG_SAND
+++    // Sand cannot be cropped - do not try
+++    if (av_rpi_is_sand_format(frame->format))
+++        return 0;
+++#endif
+++
++     desc = av_pix_fmt_desc_get(frame->format);
++     if (!desc)
++         return AVERROR_BUG;
++diff --git a/libavutil/frame.h b/libavutil/frame.h
++index 2580269549..3a9d323325 100644
++--- a/libavutil/frame.h
+++++ b/libavutil/frame.h
++@@ -957,6 +957,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
++  */
++ const char *av_frame_side_data_name(enum AVFrameSideDataType type);
++ 
+++
+++static inline int av_frame_cropped_width(const AVFrame * const frame)
+++{
+++    return frame->width - (frame->crop_left + frame->crop_right);
+++}
+++static inline int av_frame_cropped_height(const AVFrame * const frame)
+++{
+++    return frame->height - (frame->crop_top + frame->crop_bottom);
+++}
+++
++ /**
++  * @}
++  */
++diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
++index 7a9fdbd263..137a952d2c 100644
++--- a/libavutil/hwcontext_drm.c
+++++ b/libavutil/hwcontext_drm.c
++@@ -21,6 +21,7 @@
++ #include <fcntl.h>
++ #include <sys/mman.h>
++ #include <unistd.h>
+++#include <sys/ioctl.h>
++ 
++ /* This was introduced in version 4.6. And may not exist all without an
++  * optional package. So to prevent a hard dependency on needing the Linux
++@@ -31,6 +32,7 @@
++ #endif
++ 
++ #include <drm.h>
+++#include <libdrm/drm_fourcc.h>
++ #include <xf86drm.h>
++ 
++ #include "avassert.h"
++@@ -38,7 +40,9 @@
++ #include "hwcontext_drm.h"
++ #include "hwcontext_internal.h"
++ #include "imgutils.h"
++-
+++#if CONFIG_SAND
+++#include "libavutil/rpi_sand_fns.h"
+++#endif
++ 
++ static void drm_device_free(AVHWDeviceContext *hwdev)
++ {
++@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device,
++     AVDRMDeviceContext *hwctx = hwdev->hwctx;
++     drmVersionPtr version;
++ 
+++    if (device == NULL) {
+++        hwctx->fd = -1;
+++        return 0;
+++    }
+++
++     hwctx->fd = open(device, O_RDWR);
++     if (hwctx->fd < 0)
++         return AVERROR(errno);
++@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
++     if (flags & AV_HWFRAME_MAP_WRITE)
++         mmap_prot |= PROT_WRITE;
++ 
+++    if (dst->format == AV_PIX_FMT_NONE)
+++        dst->format = hwfc->sw_format;
++ #if HAVE_LINUX_DMA_BUF_H
++     if (flags & AV_HWFRAME_MAP_READ)
++         map->sync_flags |= DMA_BUF_SYNC_READ;
++@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
++ 
++     dst->width  = src->width;
++     dst->height = src->height;
+++    dst->crop_top    = src->crop_top;
+++    dst->crop_bottom = src->crop_bottom;
+++    dst->crop_left   = src->crop_left;
+++    dst->crop_right  = src->crop_right;
+++
+++#if CONFIG_SAND
+++    // Rework for sand frames
+++    if (av_rpi_is_sand_frame(dst)) {
+++        // As it stands the sand formats hold stride2 in linesize[3]
+++        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
+++        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
+++        dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
+++        dst->linesize[0] = 128;
+++        dst->linesize[1] = 128;
+++        // *** Are we sure src->height is actually what we want ???
+++    }
+++#endif
++ 
++     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
++                                 &drm_unmap_frame, map);
++@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
++                                     enum AVHWFrameTransferDirection dir,
++                                     enum AVPixelFormat **formats)
++ {
++-    enum AVPixelFormat *pix_fmts;
+++    enum AVPixelFormat *p;
++ 
++-    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
++-    if (!pix_fmts)
+++    p = *formats = av_malloc_array(3, sizeof(*p));
+++    if (!p)
++         return AVERROR(ENOMEM);
++ 
++-    pix_fmts[0] = ctx->sw_format;
++-    pix_fmts[1] = AV_PIX_FMT_NONE;
+++    // **** Offer native sand too ????
+++    *p++ =
+++#if CONFIG_SAND
+++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
+++            AV_PIX_FMT_YUV420P :
+++        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
+++            AV_PIX_FMT_YUV420P10LE :
+++#endif
+++            ctx->sw_format;
+++
+++#if CONFIG_SAND
+++    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
+++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
+++        *p++ = AV_PIX_FMT_NV12;
+++#endif
++ 
++-    *formats = pix_fmts;
+++    *p = AV_PIX_FMT_NONE;
++     return 0;
++ }
++ 
++@@ -231,18 +272,62 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
++     map = av_frame_alloc();
++     if (!map)
++         return AVERROR(ENOMEM);
++-    map->format = dst->format;
++ 
+++    // Map to default
+++    map->format = AV_PIX_FMT_NONE;
++     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
++     if (err)
++         goto fail;
++ 
++-    map->width  = dst->width;
++-    map->height = dst->height;
+++#if 0
+++    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
+++           hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
+++           map->width, map->height,
+++           map->linesize[0],
+++           map->linesize[1],
+++           map->linesize[2],
+++           map->linesize[3],
+++           dst->width, dst->height,
+++           dst->linesize[0],
+++           dst->linesize[1],
+++           dst->linesize[2]);
+++#endif
+++#if CONFIG_SAND
+++    if (av_rpi_is_sand_frame(map)) {
+++        // Preserve crop - later ffmpeg code assumes that we have in that it
+++        // overwrites any crop that we create with the old values
+++        const unsigned int w = FFMIN(dst->width, map->width);
+++        const unsigned int h = FFMIN(dst->height, map->height);
+++
+++        map->crop_top = 0;
+++        map->crop_bottom = 0;
+++        map->crop_left = 0;
+++        map->crop_right = 0;
+++
+++        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
+++        {
+++            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
+++            err = AVERROR(EINVAL);
+++            goto fail;
+++        }
+++
+++        dst->width = w;
+++        dst->height = h;
+++    }
+++    else
+++#endif
+++    {
+++        // Kludge mapped h/w s.t. frame_copy works
+++        map->width  = dst->width;
+++        map->height = dst->height;
+++        err = av_frame_copy(dst, map);
+++    }
++ 
++-    err = av_frame_copy(dst, map);
++     if (err)
+++    {
+++        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
++         goto fail;
+++    }
++ 
++     err = 0;
++ fail:
++@@ -257,7 +342,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
++     int err;
++ 
++     if (src->width > hwfc->width || src->height > hwfc->height)
+++    {
+++        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
++         return AVERROR(EINVAL);
+++    }
++ 
++     map = av_frame_alloc();
++     if (!map)
++diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
++index 2a9b5f4aac..11e7945f18 100644
++--- a/libavutil/hwcontext_vulkan.c
+++++ b/libavutil/hwcontext_vulkan.c
++@@ -57,6 +57,14 @@
++ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
++ #endif
++ 
+++// Sometimes missing definitions
+++#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME
+++#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264"
+++#endif
+++#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME
+++#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265"
+++#endif
+++
++ typedef struct VulkanQueueCtx {
++     VkFence fence;
++     VkQueue queue;
++diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
++index 62a2ae08d9..cb73521ea7 100644
++--- a/libavutil/pixdesc.c
+++++ b/libavutil/pixdesc.c
++@@ -2717,6 +2717,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
++         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT |
++                  AV_PIX_FMT_FLAG_ALPHA,
++     },
+++    [AV_PIX_FMT_SAND128] = {
+++        .name = "sand128",
+++        .nb_components = 3,
+++        .log2_chroma_w = 1,
+++        .log2_chroma_h = 1,
+++        .comp = {
+++            { 0, 1, 0, 0, 8 },        /* Y */
+++            { 1, 2, 0, 0, 8 },        /* U */
+++            { 1, 2, 1, 0, 8 },        /* V */
+++        },
+++        .flags = 0,
+++    },
+++    [AV_PIX_FMT_SAND64_10] = {
+++        .name = "sand64_10",
+++        .nb_components = 3,
+++        .log2_chroma_w = 1,
+++        .log2_chroma_h = 1,
+++        .comp = {
+++            { 0, 2, 0, 0, 10 },        /* Y */
+++            { 1, 4, 0, 0, 10 },        /* U */
+++            { 1, 4, 2, 0, 10 },        /* V */
+++        },
+++        .flags = 0,
+++    },
+++    [AV_PIX_FMT_SAND64_16] = {
+++        .name = "sand64_16",
+++        .nb_components = 3,
+++        .log2_chroma_w = 1,
+++        .log2_chroma_h = 1,
+++        .comp = {
+++            { 0, 2, 0, 0, 16 },        /* Y */
+++            { 1, 4, 0, 0, 16 },        /* U */
+++            { 1, 4, 2, 0, 16 },        /* V */
+++        },
+++        .flags = 0,
+++    },
+++    [AV_PIX_FMT_RPI4_8] = {
+++        .name = "rpi4_8",
+++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+++    },
+++    [AV_PIX_FMT_RPI4_10] = {
+++        .name = "rpi4_10",
+++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+++    },
++ };
++ 
++ static const char * const color_range_names[] = {
++diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
++index 37c2c79e01..5cc780e7d5 100644
++--- a/libavutil/pixfmt.h
+++++ b/libavutil/pixfmt.h
++@@ -377,6 +377,14 @@ enum AVPixelFormat {
++ 
++     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
++     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
+++// RPI - not on ifdef so can be got at by calling progs
+++// #define so code that uses this can know it is there
+++#define AVUTIL_HAVE_PIX_FMT_SAND 1
+++    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
+++    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
+++    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
+++    AV_PIX_FMT_RPI4_8,
+++    AV_PIX_FMT_RPI4_10,
++ 
++     AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
++     AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
++diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h
++new file mode 100644
++index 0000000000..0d5d203dc3
++--- /dev/null
+++++ b/libavutil/rpi_sand_fn_pw.h
++@@ -0,0 +1,227 @@
+++/*
+++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+++All rights reserved.
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: John Cox
+++*/
+++
+++// * Included twice from rpi_sand_fn with different PW
+++
+++#define STRCAT(x,y) x##y
+++
+++#if PW == 1
+++#define pixel uint8_t
+++#define FUNC(f) STRCAT(f, 8)
+++#elif PW == 2
+++#define pixel uint16_t
+++#define FUNC(f) STRCAT(f, 16)
+++#else
+++#error Unexpected PW
+++#endif
+++
+++// Fetches a single patch - offscreen fixup not done here
+++// w <= stride1
+++// unclipped
+++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x = _x;
+++    const unsigned int w = _w;
+++    const unsigned int mask = stride1 - 1;
+++
+++#if PW == 1 && HAVE_SAND_ASM
+++    if (_x == 0) {
+++        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
+++                                     src, stride1, stride2, _x, y, _w, h);
+++        return;
+++    }
+++#endif
+++
+++    if ((x & ~mask) == ((x + w) & ~mask)) {
+++        // All in one sand stripe
+++        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
+++            memcpy(dst, p, w);
+++        }
+++    }
+++    else
+++    {
+++        // Two+ stripe
+++        const unsigned int sstride = stride1 * stride2;
+++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        const uint8_t * p2 = p1 + sstride - (x & mask);
+++        const unsigned int w1 = stride1 - (x & mask);
+++        const unsigned int w3 = (x + w) & mask;
+++        const unsigned int w2 = w - (w1 + w3);
+++
+++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
+++            unsigned int j;
+++            const uint8_t * p = p2;
+++            uint8_t * d = dst;
+++            memcpy(d, p1, w1);
+++            d += w1;
+++            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
+++                memcpy(d, p, stride1);
+++            }
+++            memcpy(d, p, w3);
+++        }
+++    }
+++}
+++
+++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
+++
+++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
+++                             uint8_t * dst_v, const unsigned int dst_stride_v,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x = _x * 2;
+++    const unsigned int w = _w * 2;
+++    const unsigned int mask = stride1 - 1;
+++
+++#if PW == 1 && HAVE_SAND_ASM
+++    if (_x == 0) {
+++        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
+++                                     src, stride1, stride2, _x, y, _w, h);
+++        return;
+++    }
+++#endif
+++
+++    if ((x & ~mask) == ((x + w) & ~mask)) {
+++        // All in one sand stripe
+++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
+++            pixel * du = (pixel *)dst_u;
+++            pixel * dv = (pixel *)dst_v;
+++            const pixel * p = (const pixel *)p1;
+++            for (unsigned int k = 0; k < w; k += 2 * PW) {
+++                *du++ = *p++;
+++                *dv++ = *p++;
+++            }
+++        }
+++    }
+++    else
+++    {
+++        // Two+ stripe
+++        const unsigned int sstride = stride1 * stride2;
+++        const unsigned int sstride_p = (sstride - stride1) / PW;
+++
+++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        const uint8_t * p2 = p1 + sstride - (x & mask);
+++        const unsigned int w1 = stride1 - (x & mask);
+++        const unsigned int w3 = (x + w) & mask;
+++        const unsigned int w2 = w - (w1 + w3);
+++
+++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
+++            unsigned int j;
+++            const pixel * p = (const pixel *)p1;
+++            pixel * du = (pixel *)dst_u;
+++            pixel * dv = (pixel *)dst_v;
+++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
+++                *du++ = *p++;
+++                *dv++ = *p++;
+++            }
+++            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
+++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
+++                    *du++ = *p++;
+++                    *dv++ = *p++;
+++                }
+++            }
+++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
+++                *du++ = *p++;
+++                *dv++ = *p++;
+++            }
+++        }
+++    }
+++}
+++
+++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
+++                             unsigned int stride1, unsigned int stride2,
+++                             const uint8_t * src_u, const unsigned int src_stride_u,
+++                             const uint8_t * src_v, const unsigned int src_stride_v,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x = _x * 2;
+++    const unsigned int w = _w * 2;
+++    const unsigned int mask = stride1 - 1;
+++    if ((x & ~mask) == ((x + w) & ~mask)) {
+++        // All in one sand stripe
+++        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
+++            const pixel * su = (const pixel *)src_u;
+++            const pixel * sv = (const pixel *)src_v;
+++            pixel * p = (pixel *)p1;
+++            for (unsigned int k = 0; k < w; k += 2 * PW) {
+++                *p++ = *su++;
+++                *p++ = *sv++;
+++            }
+++        }
+++    }
+++    else
+++    {
+++        // Two+ stripe
+++        const unsigned int sstride = stride1 * stride2;
+++        const unsigned int sstride_p = (sstride - stride1) / PW;
+++
+++        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
+++        const uint8_t * p2 = p1 + sstride - (x & mask);
+++        const unsigned int w1 = stride1 - (x & mask);
+++        const unsigned int w3 = (x + w) & mask;
+++        const unsigned int w2 = w - (w1 + w3);
+++
+++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
+++            unsigned int j;
+++            const pixel * su = (const pixel *)src_u;
+++            const pixel * sv = (const pixel *)src_v;
+++            pixel * p = (pixel *)p1;
+++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
+++                *p++ = *su++;
+++                *p++ = *sv++;
+++            }
+++            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
+++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
+++                    *p++ = *su++;
+++                    *p++ = *sv++;
+++                }
+++            }
+++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
+++                *p++ = *su++;
+++                *p++ = *sv++;
+++            }
+++        }
+++    }
+++}
+++
+++
+++#undef pixel
+++#undef STRCAT
+++#undef FUNC
+++
++diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
++new file mode 100644
++index 0000000000..b6071e2928
++--- /dev/null
+++++ b/libavutil/rpi_sand_fns.c
++@@ -0,0 +1,445 @@
+++/*
+++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+++All rights reserved.
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: John Cox
+++*/
+++
+++#include "config.h"
+++#include <stdint.h>
+++#include <string.h>
+++#include "rpi_sand_fns.h"
+++#include "avassert.h"
+++#include "frame.h"
+++
+++#if ARCH_ARM && HAVE_NEON
+++#include "arm/rpi_sand_neon.h"
+++#define HAVE_SAND_ASM 1
+++#elif ARCH_AARCH64 && HAVE_NEON
+++#include "aarch64/rpi_sand_neon.h"
+++#define HAVE_SAND_ASM 1
+++#else
+++#define HAVE_SAND_ASM 0
+++#endif
+++
+++#define PW 1
+++#include "rpi_sand_fn_pw.h"
+++#undef PW
+++
+++#define PW 2
+++#include "rpi_sand_fn_pw.h"
+++#undef PW
+++
+++#if 1
+++// Simple round
+++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
+++{
+++    const unsigned int rnd = (1 << shr) >> 1;
+++    const uint16_t * src = (const uint16_t *)_src;
+++
+++    for (; n != 0; --n) {
+++        *dst++ = (*src++ + rnd) >> shr;
+++    }
+++}
+++#else
+++// Dithered variation
+++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
+++{
+++    unsigned int rnd = (1 << shr) >> 1;
+++    const unsigned int mask = ((1 << shr) - 1);
+++    const uint16_t * src = (const uint16_t *)_src;
+++
+++    for (; n != 0; --n) {
+++        rnd = *src++ + (rnd & mask);
+++        *dst++ = rnd >> shr;
+++    }
+++}
+++#endif
+++
+++// Fetches a single patch - offscreen fixup not done here
+++// w <= stride1
+++// unclipped
+++// _x & _w in pixels, strides in bytes
+++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
+++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
+++    const unsigned int x1 = ((_x + _w) / 3) * 4;
+++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
+++    const unsigned int mask = stride1 - 1;
+++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+++
+++#if HAVE_SAND_ASM
+++    if (_x == 0) {
+++        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
+++        return;
+++    }
+++#endif
+++
+++    if (x0 == x1) {
+++        // *******************
+++        // Partial single word xfer
+++        return;
+++    }
+++
+++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
+++    {
+++        unsigned int x = x0;
+++        const uint32_t * p = (const uint32_t *)p0;
+++        uint16_t * d = (uint16_t *)dst;
+++
+++        if (xskip0 != 0) {
+++            const uint32_t p3 = *p++;
+++
+++            if (xskip0 == 1)
+++                *d++ = (p3 >> 10) & 0x3ff;
+++            *d++ = (p3 >> 20) & 0x3ff;
+++
+++            if (((x += 4) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        while (x != x1) {
+++            const uint32_t p3 = *p++;
+++            *d++ = p3 & 0x3ff;
+++            *d++ = (p3 >> 10) & 0x3ff;
+++            *d++ = (p3 >> 20) & 0x3ff;
+++
+++            if (((x += 4) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        if (xrem1 != 0) {
+++            const uint32_t p3 = *p;
+++
+++            *d++ = p3 & 0x3ff;
+++            if (xrem1 == 2)
+++                *d++ = (p3 >> 10) & 0x3ff;
+++        }
+++    }
+++}
+++
+++
+++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+++                             uint8_t * dst_v, const unsigned int dst_stride_v,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
+++    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
+++    const unsigned int x1 = ((_x + _w) / 3) * 8;
+++    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
+++    const unsigned int mask = stride1 - 1;
+++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+++
+++#if HAVE_SAND_ASM
+++    if (_x == 0) {
+++        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
+++                                       src, stride1, stride2, _x, y, _w, h);
+++        return;
+++    }
+++#endif
+++
+++    if (x0 == x1) {
+++        // *******************
+++        // Partial single word xfer
+++        return;
+++    }
+++
+++    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
+++    {
+++        unsigned int x = x0;
+++        const uint32_t * p = (const uint32_t *)p0;
+++        uint16_t * du = (uint16_t *)dst_u;
+++        uint16_t * dv = (uint16_t *)dst_v;
+++
+++        if (xskip0 != 0) {
+++            const uint32_t p3a = *p++;
+++            const uint32_t p3b = *p++;
+++
+++            if (xskip0 == 1)
+++            {
+++                *du++ = (p3a >> 20) & 0x3ff;
+++                *dv++ = (p3b >>  0) & 0x3ff;
+++            }
+++            *du++ = (p3b >> 10) & 0x3ff;
+++            *dv++ = (p3b >> 20) & 0x3ff;
+++
+++            if (((x += 8) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        while (x != x1) {
+++            const uint32_t p3a = *p++;
+++            const uint32_t p3b = *p++;
+++
+++            *du++ = p3a & 0x3ff;
+++            *dv++ = (p3a >> 10) & 0x3ff;
+++            *du++ = (p3a >> 20) & 0x3ff;
+++            *dv++ = p3b & 0x3ff;
+++            *du++ = (p3b >> 10) & 0x3ff;
+++            *dv++ = (p3b >> 20) & 0x3ff;
+++
+++            if (((x += 8) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        if (xrem1 != 0) {
+++            const uint32_t p3a = *p++;
+++            const uint32_t p3b = *p++;
+++
+++            *du++ = p3a & 0x3ff;
+++            *dv++ = (p3a >> 10) & 0x3ff;
+++            if (xrem1 == 2)
+++            {
+++                *du++ = (p3a >> 20) & 0x3ff;
+++                *dv++ = p3b & 0x3ff;
+++            }
+++        }
+++    }
+++}
+++
+++// Fetches a single patch - offscreen fixup not done here
+++// w <= stride1
+++// single lose bottom 2 bits truncation
+++// _x & _w in pixels, strides in bytes
+++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h)
+++{
+++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
+++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
+++    const unsigned int x1 = ((_x + _w) / 3) * 4;
+++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
+++    const unsigned int mask = stride1 - 1;
+++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
+++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
+++
+++#if HAVE_SAND_ASM
+++    if (_x == 0) {
+++        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
+++        return;
+++    }
+++#endif
+++
+++    if (x0 == x1) {
+++        // *******************
+++        // Partial single word xfer
+++        return;
+++    }
+++
+++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
+++    {
+++        unsigned int x = x0;
+++        const uint32_t * p = (const uint32_t *)p0;
+++        uint8_t * d = dst;
+++
+++        if (xskip0 != 0) {
+++            const uint32_t p3 = *p++;
+++
+++            if (xskip0 == 1)
+++                *d++ = (p3 >> 12) & 0xff;
+++            *d++ = (p3 >> 22) & 0xff;
+++
+++            if (((x += 4) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        while (x != x1) {
+++            const uint32_t p3 = *p++;
+++            *d++ = (p3 >> 2) & 0xff;
+++            *d++ = (p3 >> 12) & 0xff;
+++            *d++ = (p3 >> 22) & 0xff;
+++
+++            if (((x += 4) & mask) == 0)
+++                p += slice_inc;
+++        }
+++
+++        if (xrem1 != 0) {
+++            const uint32_t p3 = *p;
+++
+++            *d++ = (p3 >> 2) & 0xff;
+++            if (xrem1 == 2)
+++                *d++ = (p3 >> 12) & 0xff;
+++        }
+++    }
+++}
+++
+++
+++
+++// w/h in pixels
+++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
+++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
+++                         unsigned int w, unsigned int h, const unsigned int shr)
+++{
+++    const unsigned int n = dst_stride1 / 2;
+++    unsigned int j;
+++
+++    // This is true for our current layouts
+++    av_assert0(dst_stride1 == src_stride1);
+++
+++    // As we have the same stride1 for src & dest and src is wider than dest
+++    // then if we loop on src we can always write contiguously to dest
+++    // We make no effort to copy an exact width - round up to nearest src stripe
+++    // as we will always have storage in dest for that
+++
+++#if ARCH_ARM && HAVE_NEON
+++    if (shr == 3 && src_stride1 == 128) {
+++        for (j = 0; j + n < w; j += dst_stride1) {
+++            uint8_t * d = dst + j * dst_stride2;
+++            const uint8_t * s1 = src + j * 2 * src_stride2;
+++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
+++
+++            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
+++        }
+++    }
+++    else
+++#endif
+++    {
+++        for (j = 0; j + n < w; j += dst_stride1) {
+++            uint8_t * d = dst + j * dst_stride2;
+++            const uint8_t * s1 = src + j * 2 * src_stride2;
+++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
+++
+++            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
+++                cpy16_to_8(d, s1, n, shr);
+++                cpy16_to_8(d + n, s2, n, shr);
+++            }
+++        }
+++    }
+++
+++    // Fix up a trailing dest half stripe
+++    if (j < w) {
+++        uint8_t * d = dst + j * dst_stride2;
+++        const uint8_t * s1 = src + j * 2 * src_stride2;
+++
+++        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
+++            cpy16_to_8(d, s1, n, shr);
+++        }
+++    }
+++}
+++
+++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
+++{
+++    const int w = av_frame_cropped_width(src);
+++    const int h = av_frame_cropped_height(src);
+++    const int x = src->crop_left;
+++    const int y = src->crop_top;
+++
+++    // We will crop as part of the conversion
+++    dst->crop_top = 0;
+++    dst->crop_left = 0;
+++    dst->crop_bottom = 0;
+++    dst->crop_right = 0;
+++
+++    switch (src->format){
+++        case AV_PIX_FMT_SAND128:
+++        case AV_PIX_FMT_RPI4_8:
+++            switch (dst->format){
+++                case AV_PIX_FMT_YUV420P:
+++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
+++                                             src->data[0],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x, y, w, h);
+++                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
+++                                             dst->data[2], dst->linesize[2],
+++                                             src->data[1],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x/2, y/2,  w/2, h/2);
+++                    break;
+++                case AV_PIX_FMT_NV12:
+++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
+++                                             src->data[0],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x, y, w, h);
+++                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
+++                                             src->data[1],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x/2, y/2, w, h/2);
+++                    break;
+++                default:
+++                    return -1;
+++            }
+++            break;
+++        case AV_PIX_FMT_SAND64_10:
+++            switch (dst->format){
+++                case AV_PIX_FMT_YUV420P10:
+++                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
+++                                             src->data[0],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x*2, y, w*2, h);
+++                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
+++                                             dst->data[2], dst->linesize[2],
+++                                             src->data[1],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x, y/2,  w, h/2);
+++                    break;
+++                default:
+++                    return -1;
+++            }
+++            break;
+++        case AV_PIX_FMT_RPI4_10:
+++            switch (dst->format){
+++                case AV_PIX_FMT_YUV420P10:
+++                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
+++                                             src->data[0],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x, y, w, h);
+++                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
+++                                             dst->data[2], dst->linesize[2],
+++                                             src->data[1],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x/2, y/2, w/2, h/2);
+++                    break;
+++                case AV_PIX_FMT_NV12:
+++                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
+++                                             src->data[0],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x, y, w, h);
+++                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
+++                                             src->data[1],
+++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
+++                                             x/2, y/2, w, h/2);
+++                    break;
+++                default:
+++                    return -1;
+++            }
+++            break;
+++        default:
+++            return -1;
+++    }
+++
+++    return av_frame_copy_props(dst, src);
+++}
++diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
++new file mode 100644
++index 0000000000..462ccb8abd
++--- /dev/null
+++++ b/libavutil/rpi_sand_fns.h
++@@ -0,0 +1,188 @@
+++/*
+++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
+++All rights reserved.
+++
+++Redistribution and use in source and binary forms, with or without
+++modification, are permitted provided that the following conditions are met:
+++    * Redistributions of source code must retain the above copyright
+++      notice, this list of conditions and the following disclaimer.
+++    * Redistributions in binary form must reproduce the above copyright
+++      notice, this list of conditions and the following disclaimer in the
+++      documentation and/or other materials provided with the distribution.
+++    * Neither the name of the copyright holder nor the
+++      names of its contributors may be used to endorse or promote products
+++      derived from this software without specific prior written permission.
+++
+++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
+++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+++
+++Authors: John Cox
+++*/
+++
+++#ifndef AVUTIL_RPI_SAND_FNS
+++#define AVUTIL_RPI_SAND_FNS
+++
+++#include "libavutil/frame.h"
+++
+++// For all these fns _x & _w are measured as coord * PW
+++// For the C fns coords are in chroma pels (so luma / 2)
+++// Strides are in bytes
+++
+++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++
+++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
+++                             uint8_t * dst_v, const unsigned int dst_stride_v,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+++                             uint8_t * dst_v, const unsigned int dst_stride_v,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++
+++void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
+++                             unsigned int stride1, unsigned int stride2,
+++                             const uint8_t * src_u, const unsigned int src_stride_u,
+++                             const uint8_t * src_v, const unsigned int src_stride_v,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
+++                             unsigned int stride1, unsigned int stride2,
+++                             const uint8_t * src_u, const unsigned int src_stride_u,
+++                             const uint8_t * src_v, const unsigned int src_stride_v,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++
+++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
+++                             uint8_t * dst_v, const unsigned int dst_stride_v,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++
+++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
+++                             const uint8_t * src,
+++                             unsigned int stride1, unsigned int stride2,
+++                             unsigned int _x, unsigned int y,
+++                             unsigned int _w, unsigned int h);
+++
+++// w/h in pixels
+++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
+++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
+++                         unsigned int w, unsigned int h, const unsigned int shr);
+++
+++
+++// dst must contain required pixel format & allocated data buffers
+++// Cropping on the src buffer will be honoured and dst crop will be set to zero
+++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
+++
+++
+++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
+++{
+++#ifdef RPI_ZC_SAND128_ONLY
+++    // If we are sure we only only support 128 byte sand formats replace the
+++    // var with a constant which should allow for better optimisation
+++    return 128;
+++#else
+++    return frame->linesize[0];
+++#endif
+++}
+++
+++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
+++{
+++    return frame->linesize[3];
+++}
+++
+++
+++static inline int av_rpi_is_sand_format(const int format)
+++{
+++    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
+++}
+++
+++static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
+++{
+++    return av_rpi_is_sand_format(frame->format);
+++}
+++
+++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
+++{
+++    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
+++}
+++
+++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
+++{
+++    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
+++}
+++
+++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
+++{
+++    return (frame->format == AV_PIX_FMT_RPI4_10);
+++}
+++
+++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
+++{
+++    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
+++}
+++
+++// If x is measured in bytes (not pixels) then this works for sand64_16 as
+++// well as sand128 - but in the general case we work that out
+++
+++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
+++{
+++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
+++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
+++    const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
+++    const unsigned int x1 = x & (stride1 - 1);
+++    const unsigned int x2 = x ^ x1;
+++
+++    return x1 + stride1 * y + stride2 * x2;
+++}
+++
+++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
+++{
+++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
+++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
+++    const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
+++    const unsigned int x1 = x & (stride1 - 1);
+++    const unsigned int x2 = x ^ x1;
+++
+++    return x1 + stride1 * y_c + stride2 * x2;
+++}
+++
+++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
+++{
+++    return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
+++}
+++
+++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
+++{
+++    return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
+++}
+++
+++#endif
+++
++diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
++index a9bf6ff9e0..6a0e2dcc09 100644
++--- a/libswscale/aarch64/rgb2rgb.c
+++++ b/libswscale/aarch64/rgb2rgb.c
++@@ -30,6 +30,12 @@
++ void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
++                               uint8_t *dest, int width, int height,
++                               int src1Stride, int src2Stride, int dstStride);
+++void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv);
+++void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv);
++ 
++ av_cold void rgb2rgb_init_aarch64(void)
++ {
++@@ -37,5 +43,7 @@ av_cold void rgb2rgb_init_aarch64(void)
++ 
++     if (have_neon(cpu_flags)) {
++         interleaveBytes = ff_interleave_bytes_neon;
+++        ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
+++        ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
++     }
++ }
++diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
++index d81110ec57..476ca723a0 100644
++--- a/libswscale/aarch64/rgb2rgb_neon.S
+++++ b/libswscale/aarch64/rgb2rgb_neon.S
++@@ -77,3 +77,448 @@ function ff_interleave_bytes_neon, export=1
++ 0:
++         ret
++ endfunc
+++
+++// void ff_rgb24toyv12_aarch64(
+++//              const uint8_t *src,             // x0
+++//              uint8_t *ydst,                  // x1
+++//              uint8_t *udst,                  // x2
+++//              uint8_t *vdst,                  // x3
+++//              int width,                      // w4
+++//              int height,                     // w5
+++//              int lumStride,                  // w6
+++//              int chromStride,                // w7
+++//              int srcStr,                     // [sp, #0]
+++//              int32_t *rgb2yuv);              // [sp, #8]
+++
+++function ff_rgb24toyv12_aarch64, export=1
+++        ldr             x15, [sp, #8]
+++        ld1             {v3.s}[2], [x15], #4
+++        ld1             {v3.s}[1], [x15], #4
+++        ld1             {v3.s}[0], [x15], #4
+++        ld1             {v4.s}[2], [x15], #4
+++        ld1             {v4.s}[1], [x15], #4
+++        ld1             {v4.s}[0], [x15], #4
+++        ld1             {v5.s}[2], [x15], #4
+++        ld1             {v5.s}[1], [x15], #4
+++        ld1             {v5.s}[0], [x15]
+++        b               99f
+++endfunc
+++
+++// void ff_bgr24toyv12_aarch64(
+++//              const uint8_t *src,             // x0
+++//              uint8_t *ydst,                  // x1
+++//              uint8_t *udst,                  // x2
+++//              uint8_t *vdst,                  // x3
+++//              int width,                      // w4
+++//              int height,                     // w5
+++//              int lumStride,                  // w6
+++//              int chromStride,                // w7
+++//              int srcStr,                     // [sp, #0]
+++//              int32_t *rgb2yuv);              // [sp, #8]
+++
+++// regs
+++// v0-2         Src bytes - reused as chroma src
+++// v3-5         Coeffs (packed very inefficiently - could be squashed)
+++// v6           128b
+++// v7           128h
+++// v8-15        Reserved
+++// v16-18       Lo Src expanded as H
+++// v19          -
+++// v20-22       Hi Src expanded as H
+++// v23          -
+++// v24          U out
+++// v25          U tmp
+++// v26          Y out
+++// v27-29       Y tmp
+++// v30          V out
+++// v31          V tmp
+++
+++// Assumes Little Endian in tail stores & conversion matrix
+++
+++function ff_bgr24toyv12_aarch64, export=1
+++        ldr             x15, [sp, #8]
+++        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
+++        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
+++        ld3             {v3.s, v4.s, v5.s}[2], [x15]
+++99:
+++        ldr             w14, [sp, #0]
+++        movi            v7.8b, #128
+++        uxtl            v6.8h, v7.8b
+++        // Ensure if nothing to do then we do nothing
+++        cmp             w4, #0
+++        b.le            90f
+++        cmp             w5, #0
+++        b.le            90f
+++        // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
+++        // the remainder done in the tail
+++        tst             w4, #15
+++        b.eq            1f
+++        sub             w4, w4, #16
+++1:
+++
+++// -------------------- Even line body - YUV
+++11:
+++        subs            w9,  w4, #0
+++        mov             x10, x0
+++        mov             x11, x1
+++        mov             x12, x2
+++        mov             x13, x3
+++        b.lt            12f
+++
+++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+++        subs            w9, w9, #16
+++        b.le            13f
+++
+++10:
+++        uxtl            v16.8h, v0.8b
+++        uxtl            v17.8h, v1.8b
+++        uxtl            v18.8h, v2.8b
+++
+++        uxtl2           v20.8h, v0.16b
+++        uxtl2           v21.8h, v1.16b
+++        uxtl2           v22.8h, v2.16b
+++
+++        bic             v0.8h, #0xff, LSL #8
+++        bic             v1.8h, #0xff, LSL #8
+++        bic             v2.8h, #0xff, LSL #8
+++
+++        // Testing shows it is faster to stack the smull/smlal ops together
+++        // rather than interleave them between channels and indeed even the
+++        // shift/add sections seem happier not interleaved
+++
+++        // Y0
+++        smull           v26.4s, v16.4h, v3.h[0]
+++        smlal           v26.4s, v17.4h, v4.h[0]
+++        smlal           v26.4s, v18.4h, v5.h[0]
+++        smull2          v27.4s, v16.8h, v3.h[0]
+++        smlal2          v27.4s, v17.8h, v4.h[0]
+++        smlal2          v27.4s, v18.8h, v5.h[0]
+++        // Y1
+++        smull           v28.4s, v20.4h, v3.h[0]
+++        smlal           v28.4s, v21.4h, v4.h[0]
+++        smlal           v28.4s, v22.4h, v5.h[0]
+++        smull2          v29.4s, v20.8h, v3.h[0]
+++        smlal2          v29.4s, v21.8h, v4.h[0]
+++        smlal2          v29.4s, v22.8h, v5.h[0]
+++        shrn            v26.4h, v26.4s, #12
+++        shrn2           v26.8h, v27.4s, #12
+++        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
+++        sqrshrun        v26.8b, v26.8h, #3
+++        shrn            v28.4h, v28.4s, #12
+++        shrn2           v28.8h, v29.4s, #12
+++        add             v28.8h, v28.8h, v6.8h
+++        sqrshrun2       v26.16b, v28.8h, #3
+++        // Y0/Y1
+++
+++        // U
+++        // Vector subscript *2 as we loaded into S but are only using H
+++        smull           v24.4s, v0.4h, v3.h[2]
+++        smlal           v24.4s, v1.4h, v4.h[2]
+++        smlal           v24.4s, v2.4h, v5.h[2]
+++        smull2          v25.4s, v0.8h, v3.h[2]
+++        smlal2          v25.4s, v1.8h, v4.h[2]
+++        smlal2          v25.4s, v2.8h, v5.h[2]
+++
+++        // V
+++        smull           v30.4s, v0.4h, v3.h[4]
+++        smlal           v30.4s, v1.4h, v4.h[4]
+++        smlal           v30.4s, v2.4h, v5.h[4]
+++        smull2          v31.4s, v0.8h, v3.h[4]
+++        smlal2          v31.4s, v1.8h, v4.h[4]
+++        smlal2          v31.4s, v2.8h, v5.h[4]
+++
+++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+++
+++        shrn            v24.4h, v24.4s, #14
+++        shrn2           v24.8h, v25.4s, #14
+++        sqrshrn         v24.8b, v24.8h, #1
+++        add             v24.8b, v24.8b, v7.8b     // +128
+++        shrn            v30.4h, v30.4s, #14
+++        shrn2           v30.8h, v31.4s, #14
+++        sqrshrn         v30.8b, v30.8h, #1
+++        add             v30.8b, v30.8b, v7.8b     // +128
+++
+++        subs            w9, w9, #16
+++
+++        st1             {v26.16b}, [x11], #16
+++        st1             {v24.8b}, [x12], #8
+++        st1             {v30.8b}, [x13], #8
+++
+++        b.gt            10b
+++
+++// -------------------- Even line tail - YUV
+++// If width % 16 == 0 then simply runs once with preloaded RGB
+++// If other then deals with preload & then does remaining tail
+++
+++13:
+++        // Body is simple copy of main loop body minus preload
+++
+++        uxtl            v16.8h, v0.8b
+++        uxtl            v17.8h, v1.8b
+++        uxtl            v18.8h, v2.8b
+++
+++        uxtl2           v20.8h, v0.16b
+++        uxtl2           v21.8h, v1.16b
+++        uxtl2           v22.8h, v2.16b
+++
+++        bic             v0.8h, #0xff, LSL #8
+++        bic             v1.8h, #0xff, LSL #8
+++        bic             v2.8h, #0xff, LSL #8
+++
+++        // Y0
+++        smull           v26.4s, v16.4h, v3.h[0]
+++        smlal           v26.4s, v17.4h, v4.h[0]
+++        smlal           v26.4s, v18.4h, v5.h[0]
+++        smull2          v27.4s, v16.8h, v3.h[0]
+++        smlal2          v27.4s, v17.8h, v4.h[0]
+++        smlal2          v27.4s, v18.8h, v5.h[0]
+++        // Y1
+++        smull           v28.4s, v20.4h, v3.h[0]
+++        smlal           v28.4s, v21.4h, v4.h[0]
+++        smlal           v28.4s, v22.4h, v5.h[0]
+++        smull2          v29.4s, v20.8h, v3.h[0]
+++        smlal2          v29.4s, v21.8h, v4.h[0]
+++        smlal2          v29.4s, v22.8h, v5.h[0]
+++        shrn            v26.4h, v26.4s, #12
+++        shrn2           v26.8h, v27.4s, #12
+++        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
+++        sqrshrun        v26.8b, v26.8h, #3
+++        shrn            v28.4h, v28.4s, #12
+++        shrn2           v28.8h, v29.4s, #12
+++        add             v28.8h, v28.8h, v6.8h
+++        sqrshrun2       v26.16b, v28.8h, #3
+++        // Y0/Y1
+++
+++        // U
+++        // Vector subscript *2 as we loaded into S but are only using H
+++        smull           v24.4s, v0.4h, v3.h[2]
+++        smlal           v24.4s, v1.4h, v4.h[2]
+++        smlal           v24.4s, v2.4h, v5.h[2]
+++        smull2          v25.4s, v0.8h, v3.h[2]
+++        smlal2          v25.4s, v1.8h, v4.h[2]
+++        smlal2          v25.4s, v2.8h, v5.h[2]
+++
+++        // V
+++        smull           v30.4s, v0.4h, v3.h[4]
+++        smlal           v30.4s, v1.4h, v4.h[4]
+++        smlal           v30.4s, v2.4h, v5.h[4]
+++        smull2          v31.4s, v0.8h, v3.h[4]
+++        smlal2          v31.4s, v1.8h, v4.h[4]
+++        smlal2          v31.4s, v2.8h, v5.h[4]
+++
+++        cmp             w9, #-16
+++
+++        shrn            v24.4h, v24.4s, #14
+++        shrn2           v24.8h, v25.4s, #14
+++        sqrshrn         v24.8b, v24.8h, #1
+++        add             v24.8b, v24.8b, v7.8b     // +128
+++        shrn            v30.4h, v30.4s, #14
+++        shrn2           v30.8h, v31.4s, #14
+++        sqrshrn         v30.8b, v30.8h, #1
+++        add             v30.8b, v30.8b, v7.8b     // +128
+++
+++        // Here:
+++        // w9 == 0      width % 16 == 0, tail done
+++        // w9 > -16     1st tail done (16 pels), remainder still to go
+++        // w9 == -16    shouldn't happen
+++        // w9 > -32     2nd tail done
+++        // w9 <= -32    shouldn't happen
+++
+++        b.lt            2f
+++        st1             {v26.16b}, [x11], #16
+++        st1             {v24.8b}, [x12], #8
+++        st1             {v30.8b}, [x13], #8
+++        cbz             w9, 3f
+++
+++12:
+++        sub             w9, w9, #16
+++
+++        tbz             w9, #3, 1f
+++        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
+++1:      tbz             w9, #2, 1f
+++        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
+++1:      tbz             w9, #1, 1f
+++        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
+++1:      tbz             w9, #0, 13b
+++        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
+++        b               13b
+++
+++2:
+++        tbz             w9, #3, 1f
+++        st1             {v26.8b},    [x11], #8
+++        st1             {v24.s}[0],  [x12], #4
+++        st1             {v30.s}[0],  [x13], #4
+++1:      tbz             w9, #2, 1f
+++        st1             {v26.s}[2],  [x11], #4
+++        st1             {v24.h}[2],  [x12], #2
+++        st1             {v30.h}[2],  [x13], #2
+++1:      tbz             w9, #1, 1f
+++        st1             {v26.h}[6],  [x11], #2
+++        st1             {v24.b}[6],  [x12], #1
+++        st1             {v30.b}[6],  [x13], #1
+++1:      tbz             w9, #0, 1f
+++        st1             {v26.b}[14], [x11]
+++        st1             {v24.b}[7],  [x12]
+++        st1             {v30.b}[7],  [x13]
+++1:
+++3:
+++
+++// -------------------- Odd line body - Y only
+++
+++        subs            w5, w5, #1
+++        b.eq            90f
+++
+++        subs            w9,  w4, #0
+++        add             x0, x0, w14, SXTX
+++        add             x1, x1, w6, SXTX
+++        mov             x10, x0
+++        mov             x11, x1
+++        b.lt            12f
+++
+++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+++        subs            w9, w9, #16
+++        b.le            13f
+++
+++10:
+++        uxtl            v16.8h, v0.8b
+++        uxtl            v17.8h, v1.8b
+++        uxtl            v18.8h, v2.8b
+++
+++        uxtl2           v20.8h, v0.16b
+++        uxtl2           v21.8h, v1.16b
+++        uxtl2           v22.8h, v2.16b
+++
+++        // Testing shows it is faster to stack the smull/smlal ops together
+++        // rather than interleave them between channels and indeed even the
+++        // shift/add sections seem happier not interleaved
+++
+++        // Y0
+++        smull           v26.4s, v16.4h, v3.h[0]
+++        smlal           v26.4s, v17.4h, v4.h[0]
+++        smlal           v26.4s, v18.4h, v5.h[0]
+++        smull2          v27.4s, v16.8h, v3.h[0]
+++        smlal2          v27.4s, v17.8h, v4.h[0]
+++        smlal2          v27.4s, v18.8h, v5.h[0]
+++        // Y1
+++        smull           v28.4s, v20.4h, v3.h[0]
+++        smlal           v28.4s, v21.4h, v4.h[0]
+++        smlal           v28.4s, v22.4h, v5.h[0]
+++        smull2          v29.4s, v20.8h, v3.h[0]
+++        smlal2          v29.4s, v21.8h, v4.h[0]
+++        smlal2          v29.4s, v22.8h, v5.h[0]
+++
+++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
+++
+++        shrn            v26.4h, v26.4s, #12
+++        shrn2           v26.8h, v27.4s, #12
+++        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
+++        sqrshrun        v26.8b, v26.8h, #3
+++        shrn            v28.4h, v28.4s, #12
+++        shrn2           v28.8h, v29.4s, #12
+++        add             v28.8h, v28.8h, v6.8h
+++        sqrshrun2       v26.16b, v28.8h, #3
+++        // Y0/Y1
+++
+++        subs            w9, w9, #16
+++
+++        st1             {v26.16b}, [x11], #16
+++
+++        b.gt            10b
+++
+++// -------------------- Odd line tail - Y
+++// If width % 16 == 0 then simply runs once with preloaded RGB
+++// If other then deals with preload & then does remaining tail
+++
+++13:
+++        // Body is simple copy of main loop body minus preload
+++
+++        uxtl            v16.8h, v0.8b
+++        uxtl            v17.8h, v1.8b
+++        uxtl            v18.8h, v2.8b
+++
+++        uxtl2           v20.8h, v0.16b
+++        uxtl2           v21.8h, v1.16b
+++        uxtl2           v22.8h, v2.16b
+++
+++        // Y0
+++        smull           v26.4s, v16.4h, v3.h[0]
+++        smlal           v26.4s, v17.4h, v4.h[0]
+++        smlal           v26.4s, v18.4h, v5.h[0]
+++        smull2          v27.4s, v16.8h, v3.h[0]
+++        smlal2          v27.4s, v17.8h, v4.h[0]
+++        smlal2          v27.4s, v18.8h, v5.h[0]
+++        // Y1
+++        smull           v28.4s, v20.4h, v3.h[0]
+++        smlal           v28.4s, v21.4h, v4.h[0]
+++        smlal           v28.4s, v22.4h, v5.h[0]
+++        smull2          v29.4s, v20.8h, v3.h[0]
+++        smlal2          v29.4s, v21.8h, v4.h[0]
+++        smlal2          v29.4s, v22.8h, v5.h[0]
+++
+++        cmp             w9, #-16
+++
+++        shrn            v26.4h, v26.4s, #12
+++        shrn2           v26.8h, v27.4s, #12
+++        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
+++        sqrshrun        v26.8b, v26.8h, #3
+++        shrn            v28.4h, v28.4s, #12
+++        shrn2           v28.8h, v29.4s, #12
+++        add             v28.8h, v28.8h, v6.8h
+++        sqrshrun2       v26.16b, v28.8h, #3
+++        // Y0/Y1
+++
+++        // Here:
+++        // w9 == 0      width % 16 == 0, tail done
+++        // w9 > -16     1st tail done (16 pels), remainder still to go
+++        // w9 == -16    shouldn't happen
+++        // w9 > -32     2nd tail done
+++        // w9 <= -32    shouldn't happen
+++
+++        b.lt            2f
+++        st1             {v26.16b}, [x11], #16
+++        cbz             w9, 3f
+++
+++12:
+++        sub             w9, w9, #16
+++
+++        tbz             w9, #3, 1f
+++        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
+++1:      tbz             w9, #2, 1f
+++        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
+++1:      tbz             w9, #1, 1f
+++        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
+++        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
+++1:      tbz             w9, #0, 13b
+++        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
+++        b               13b
+++
+++2:
+++        tbz             w9, #3, 1f
+++        st1             {v26.8b},    [x11], #8
+++1:      tbz             w9, #2, 1f
+++        st1             {v26.s}[2],  [x11], #4
+++1:      tbz             w9, #1, 1f
+++        st1             {v26.h}[6],  [x11], #2
+++1:      tbz             w9, #0, 1f
+++        st1             {v26.b}[14], [x11]
+++1:
+++3:
+++
+++// ------------------- Loop to start
+++
+++        add             x0, x0, w14, SXTX
+++        add             x1, x1, w6, SXTX
+++        add             x2, x2, w7, SXTX
+++        add             x3, x3, w7, SXTX
+++        subs            w5, w5, #1
+++        b.gt            11b
+++90:
+++        ret
+++endfunc
++diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
++index e98fdac8ea..c3b9079d2b 100644
++--- a/libswscale/rgb2rgb.c
+++++ b/libswscale/rgb2rgb.c
++@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
++                        int width, int height,
++                        int lumStride, int chromStride, int srcStride,
++                        int32_t *rgb2yuv);
+++void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
+++                       uint8_t *udst, uint8_t *vdst,
+++                       int width, int height,
+++                       int lumStride, int chromStride, int srcStride,
+++                       int32_t *rgb2yuv);
+++void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
+++					  uint8_t *udst, uint8_t *vdst,
+++					  int width, int height,
+++					  int lumStride, int chromStride, int srcStride,
+++					  int32_t *rgb2yuv);
+++void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
+++					  uint8_t *udst, uint8_t *vdst,
+++					  int width, int height,
+++					  int lumStride, int chromStride, int srcStride,
+++					  int32_t *rgb2yuv);
+++void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
+++					  uint8_t *udst, uint8_t *vdst,
+++					  int width, int height,
+++					  int lumStride, int chromStride, int srcStride,
+++					  int32_t *rgb2yuv);
+++void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
+++					  uint8_t *udst, uint8_t *vdst,
+++					  int width, int height,
+++					  int lumStride, int chromStride, int srcStride,
+++					  int32_t *rgb2yuv);
++ void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
++                  int srcStride, int dstStride);
++ void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
++diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
++index f3951d523e..a0dd3ffb79 100644
++--- a/libswscale/rgb2rgb.h
+++++ b/libswscale/rgb2rgb.h
++@@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
++ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                       uint8_t *vdst, int width, int height, int lumStride,
++                       int chromStride, int srcStride, int32_t *rgb2yuv);
+++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                      uint8_t *vdst, int width, int height, int lumStride,
+++                      int chromStride, int srcStride, int32_t *rgb2yuv);
++ 
++ /**
++  * Height should be a multiple of 2 and width should be a multiple of 16.
++@@ -128,6 +131,26 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                               int width, int height,
++                               int lumStride, int chromStride, int srcStride,
++                               int32_t *rgb2yuv);
+++extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+++                              int width, int height,
+++                              int lumStride, int chromStride, int srcStride,
+++                              int32_t *rgb2yuv);
+++extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+++                             int width, int height,
+++                             int lumStride, int chromStride, int srcStride,
+++                             int32_t *rgb2yuv);
+++extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+++                             int width, int height,
+++                             int lumStride, int chromStride, int srcStride,
+++                             int32_t *rgb2yuv);
+++extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+++                             int width, int height,
+++                             int lumStride, int chromStride, int srcStride,
+++                             int32_t *rgb2yuv);
+++extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+++                             int width, int height,
+++                             int lumStride, int chromStride, int srcStride,
+++                             int32_t *rgb2yuv);
++ extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
++                         int srcStride, int dstStride);
++ 
++diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
++index 42c69801ba..e711589e1e 100644
++--- a/libswscale/rgb2rgb_template.c
+++++ b/libswscale/rgb2rgb_template.c
++@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
++  * others are ignored in the C version.
++  * FIXME: Write HQ version.
++  */
++-void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                    uint8_t *vdst, int width, int height, int lumStride,
++-                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++                   int chromStride, int srcStride, int32_t *rgb2yuv,
+++                   const uint8_t x[9])
++ {
++-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
++-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
++-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+++    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+++    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+++    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
++     int y;
++     const int chromWidth = width >> 1;
++ 
++@@ -678,6 +679,19 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++             ydst[2 * i + 1] = Y;
++         }
+++        if ((width & 1) != 0) {
+++            unsigned int b = src[6 * i + 0];
+++            unsigned int g = src[6 * i + 1];
+++            unsigned int r = src[6 * i + 2];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+++
+++            udst[i]     = U;
+++            vdst[i]     = V;
+++            ydst[2 * i] = Y;
+++        }
++         ydst += lumStride;
++         src  += srcStride;
++ 
++@@ -700,6 +714,125 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++             ydst[2 * i + 1] = Y;
++         }
+++        if ((width & 1) != 0) {
+++            unsigned int b = src[6 * i + 0];
+++            unsigned int g = src[6 * i + 1];
+++            unsigned int r = src[6 * i + 2];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+++
+++            ydst[2 * i] = Y;
+++        }
+++        udst += chromStride;
+++        vdst += chromStride;
+++        ydst += lumStride;
+++        src  += srcStride;
+++    }
+++}
+++
+++static const uint8_t x_rgb[9] = {
+++    RY_IDX, GY_IDX, BY_IDX,
+++    RU_IDX, GU_IDX, BU_IDX,
+++    RV_IDX, GV_IDX, BV_IDX,
+++};
+++
+++static const uint8_t x_bgr[9] = {
+++     BY_IDX, GY_IDX, RY_IDX,
+++     BU_IDX, GU_IDX, RU_IDX,
+++     BV_IDX, GV_IDX, RV_IDX,
+++};
+++
+++void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+++}
+++
+++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+++}
+++
+++static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv,
+++                   const uint8_t x[9])
+++{
+++    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
+++    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
+++    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+++    int y;
+++    const int chromWidth = width >> 1;
+++
+++    for (y = 0; y < height; y += 2) {
+++        int i;
+++        for (i = 0; i < chromWidth; i++) {
+++            unsigned int b = src[8 * i + 2];
+++            unsigned int g = src[8 * i + 1];
+++            unsigned int r = src[8 * i + 0];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+++
+++            udst[i]     = U;
+++            vdst[i]     = V;
+++            ydst[2 * i] = Y;
+++
+++            b = src[8 * i + 6];
+++            g = src[8 * i + 5];
+++            r = src[8 * i + 4];
+++
+++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+++            ydst[2 * i + 1] = Y;
+++        }
+++        if ((width & 1) != 0) {
+++            unsigned int b = src[8 * i + 2];
+++            unsigned int g = src[8 * i + 1];
+++            unsigned int r = src[8 * i + 0];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
+++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
+++
+++            udst[i]     = U;
+++            vdst[i]     = V;
+++            ydst[2 * i] = Y;
+++        }
+++        ydst += lumStride;
+++        src  += srcStride;
+++
+++        if (y+1 == height)
+++            break;
+++
+++        for (i = 0; i < chromWidth; i++) {
+++            unsigned int b = src[8 * i + 2];
+++            unsigned int g = src[8 * i + 1];
+++            unsigned int r = src[8 * i + 0];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+++
+++            ydst[2 * i] = Y;
+++
+++            b = src[8 * i + 6];
+++            g = src[8 * i + 5];
+++            r = src[8 * i + 4];
+++
+++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+++            ydst[2 * i + 1] = Y;
+++        }
+++        if ((width & 1) != 0) {
+++            unsigned int b = src[8 * i + 2];
+++            unsigned int g = src[8 * i + 1];
+++            unsigned int r = src[8 * i + 0];
+++
+++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+++
+++            ydst[2 * i] = Y;
+++        }
++         udst += chromStride;
++         vdst += chromStride;
++         ydst += lumStride;
++@@ -707,6 +840,37 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++     }
++ }
++ 
+++static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+++}
+++
+++static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+++}
+++
+++// As the general code does no SIMD-like ops simply adding 1 to the src address
+++// will fix the ignored alpha position
+++static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
+++}
+++
+++static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+++                   uint8_t *vdst, int width, int height, int lumStride,
+++                   int chromStride, int srcStride, int32_t *rgb2yuv)
+++{
+++    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
+++}
+++
+++
++ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
++                               uint8_t *dest, int width, int height,
++                               int src1Stride, int src2Stride, int dstStride)
++@@ -980,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void)
++     yuy2toyv12         = yuy2toyv12_c;
++     planar2x           = planar2x_c;
++     ff_rgb24toyv12     = ff_rgb24toyv12_c;
+++    ff_bgr24toyv12     = ff_bgr24toyv12_c;
+++    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
+++    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
+++    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
+++    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
++     interleaveBytes    = interleaveBytes_c;
++     deinterleaveBytes  = deinterleaveBytes_c;
++     vu9_to_vu12        = vu9_to_vu12_c;
++diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
++index 9af2e7ecc3..52469b2e4a 100644
++--- a/libswscale/swscale_unscaled.c
+++++ b/libswscale/swscale_unscaled.c
++@@ -1654,6 +1654,91 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++     return srcSliceH;
++ }
++ 
+++static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+++                              int srcStride[], int srcSliceY, int srcSliceH,
+++                              uint8_t *dst[], int dstStride[])
+++{
+++    ff_bgr24toyv12(
+++        src[0],
+++        dst[0] +  srcSliceY       * dstStride[0],
+++        dst[1] + (srcSliceY >> 1) * dstStride[1],
+++        dst[2] + (srcSliceY >> 1) * dstStride[2],
+++        c->srcW, srcSliceH,
+++        dstStride[0], dstStride[1], srcStride[0],
+++        c->input_rgb2yuv_table);
+++    if (dst[3])
+++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+++    return srcSliceH;
+++}
+++
+++static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+++                             int srcStride[], int srcSliceY, int srcSliceH,
+++                             uint8_t *dst[], int dstStride[])
+++{
+++    ff_bgrxtoyv12(
+++        src[0],
+++        dst[0] +  srcSliceY       * dstStride[0],
+++        dst[1] + (srcSliceY >> 1) * dstStride[1],
+++        dst[2] + (srcSliceY >> 1) * dstStride[2],
+++        c->srcW, srcSliceH,
+++        dstStride[0], dstStride[1], srcStride[0],
+++        c->input_rgb2yuv_table);
+++    if (dst[3])
+++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+++    return srcSliceH;
+++}
+++
+++static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+++                             int srcStride[], int srcSliceY, int srcSliceH,
+++                             uint8_t *dst[], int dstStride[])
+++{
+++    ff_rgbxtoyv12(
+++        src[0],
+++        dst[0] +  srcSliceY       * dstStride[0],
+++        dst[1] + (srcSliceY >> 1) * dstStride[1],
+++        dst[2] + (srcSliceY >> 1) * dstStride[2],
+++        c->srcW, srcSliceH,
+++        dstStride[0], dstStride[1], srcStride[0],
+++        c->input_rgb2yuv_table);
+++    if (dst[3])
+++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+++    return srcSliceH;
+++}
+++
+++static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+++                             int srcStride[], int srcSliceY, int srcSliceH,
+++                             uint8_t *dst[], int dstStride[])
+++{
+++    ff_xbgrtoyv12(
+++        src[0],
+++        dst[0] +  srcSliceY       * dstStride[0],
+++        dst[1] + (srcSliceY >> 1) * dstStride[1],
+++        dst[2] + (srcSliceY >> 1) * dstStride[2],
+++        c->srcW, srcSliceH,
+++        dstStride[0], dstStride[1], srcStride[0],
+++        c->input_rgb2yuv_table);
+++    if (dst[3])
+++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+++    return srcSliceH;
+++}
+++
+++static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+++                             int srcStride[], int srcSliceY, int srcSliceH,
+++                             uint8_t *dst[], int dstStride[])
+++{
+++    ff_xrgbtoyv12(
+++        src[0],
+++        dst[0] +  srcSliceY       * dstStride[0],
+++        dst[1] + (srcSliceY >> 1) * dstStride[1],
+++        dst[2] + (srcSliceY >> 1) * dstStride[2],
+++        c->srcW, srcSliceH,
+++        dstStride[0], dstStride[1], srcStride[0],
+++        c->input_rgb2yuv_table);
+++    if (dst[3])
+++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+++    return srcSliceH;
+++}
+++
++ static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                              int srcStride[], int srcSliceY, int srcSliceH,
++                              uint8_t *dst[], int dstStride[])
++@@ -1977,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
++     const enum AVPixelFormat dstFormat = c->dstFormat;
++     const int flags = c->flags;
++     const int dstH = c->dstH;
++-    const int dstW = c->dstW;
++     int needsDither;
++ 
++     needsDither = isAnyRGB(dstFormat) &&
++@@ -2035,8 +2119,34 @@ void ff_get_unscaled_swscale(SwsContext *c)
++     /* bgr24toYV12 */
++     if (srcFormat == AV_PIX_FMT_BGR24 &&
++         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
++-        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
+++        !(flags & SWS_ACCURATE_RND))
++         c->convert_unscaled = bgr24ToYv12Wrapper;
+++    /* rgb24toYV12 */
+++    if (srcFormat == AV_PIX_FMT_RGB24 &&
+++        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
+++        !(flags & SWS_ACCURATE_RND))
+++        c->convert_unscaled = rgb24ToYv12Wrapper;
+++
+++    /* bgrxtoYV12 */
+++    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
+++         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+++        !(flags & SWS_ACCURATE_RND))
+++        c->convert_unscaled = bgrxToYv12Wrapper;
+++    /* rgbx24toYV12 */
+++    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
+++         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+++        !(flags & SWS_ACCURATE_RND))
+++        c->convert_unscaled = rgbxToYv12Wrapper;
+++    /* xbgrtoYV12 */
+++    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
+++         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+++        !(flags & SWS_ACCURATE_RND))
+++        c->convert_unscaled = xbgrToYv12Wrapper;
+++    /* xrgb24toYV12 */
+++    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
+++         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
+++        !(flags & SWS_ACCURATE_RND))
+++        c->convert_unscaled = xrgbToYv12Wrapper;
++ 
++     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
++     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
++diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
++index 6c38041ddb..12776ffec7 100644
++--- a/libswscale/tests/swscale.c
+++++ b/libswscale/tests/swscale.c
++@@ -23,6 +23,7 @@
++ #include <string.h>
++ #include <inttypes.h>
++ #include <stdarg.h>
+++#include <time.h>
++ 
++ #undef HAVE_AV_CONFIG_H
++ #include "libavutil/cpu.h"
++@@ -78,6 +79,15 @@ struct Results {
++     uint32_t crc;
++ };
++ 
+++static int time_rep = 0;
+++
+++static uint64_t utime(void)
+++{
+++    struct timespec ts;
+++    clock_gettime(CLOCK_MONOTONIC, &ts);
+++    return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
+++}
+++
++ // test by ref -> src -> dst -> out & compare out against ref
++ // ref & out are YV12
++ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
++@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
++         goto end;
++     }
++ 
++-    printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
+++    printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d",
++            desc_src->name, srcW, srcH,
++            desc_dst->name, dstW, dstH,
++            flags);
++@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
++ 
++     sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
++ 
+++    if (time_rep != 0)
+++    {
+++        const uint64_t now = utime();
+++        uint64_t done;
+++        for (i = 1; i != time_rep; ++i) {
+++            sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
+++        }
+++        done = utime();
+++        printf(" T=%7"PRId64"us ", done-now);
+++    }
+++
++     for (i = 0; i < 4 && dstStride[i]; i++)
++         crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
++                      dstStride[i] * dstH);
++@@ -355,56 +376,78 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4],
++     return 0;
++ }
++ 
++-#define W 96
++-#define H 96
++-
++ int main(int argc, char **argv)
++ {
+++    unsigned int W = 96;
+++    unsigned int H = 96;
+++    unsigned int W2;
+++    unsigned int H2;
+++    unsigned int S;
++     enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
++     enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
++-    uint8_t *rgb_data   = av_malloc(W * H * 4);
++-    const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
++-    int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
++-    uint8_t *data       = av_malloc(4 * W * H);
++-    const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
++-    int stride[4]       = { W, W, W, W };
++     int x, y;
++     struct SwsContext *sws;
++     AVLFG rand;
++     int res = -1;
++     int i;
++     FILE *fp = NULL;
++-
++-    if (!rgb_data || !data)
++-        return -1;
+++    uint8_t *rgb_data;
+++    uint8_t * rgb_src[4] = { NULL };
+++    int rgb_stride[4]   = { 0 };
+++    uint8_t *data;
+++    uint8_t * src[4] = { NULL };
+++    int stride[4]       = { 0 };
++ 
++     for (i = 1; i < argc; i += 2) {
+++        const char * const arg2 = argv[i+1];
+++
++         if (argv[i][0] != '-' || i + 1 == argc)
++             goto bad_option;
++         if (!strcmp(argv[i], "-ref")) {
++-            fp = fopen(argv[i + 1], "r");
+++            fp = fopen(arg2, "r");
++             if (!fp) {
++-                fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
+++                fprintf(stderr, "could not open '%s'\n", arg2);
++                 goto error;
++             }
++         } else if (!strcmp(argv[i], "-cpuflags")) {
++             unsigned flags = av_get_cpu_flags();
++-            int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
+++            int ret = av_parse_cpu_caps(&flags, arg2);
++             if (ret < 0) {
++-                fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
+++                fprintf(stderr, "invalid cpu flags %s\n", arg2);
++                 return ret;
++             }
++             av_force_cpu_flags(flags);
++         } else if (!strcmp(argv[i], "-src")) {
++-            srcFormat = av_get_pix_fmt(argv[i + 1]);
+++            srcFormat = av_get_pix_fmt(arg2);
++             if (srcFormat == AV_PIX_FMT_NONE) {
++-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
+++                fprintf(stderr, "invalid pixel format %s\n", arg2);
++                 return -1;
++             }
++         } else if (!strcmp(argv[i], "-dst")) {
++-            dstFormat = av_get_pix_fmt(argv[i + 1]);
+++            dstFormat = av_get_pix_fmt(arg2);
++             if (dstFormat == AV_PIX_FMT_NONE) {
++-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
+++                fprintf(stderr, "invalid pixel format %s\n", arg2);
+++                return -1;
+++            }
+++        } else if (!strcmp(argv[i], "-w")) {
+++            char * p = NULL;
+++            W = strtoul(arg2, &p, 0);
+++            if (!W || *p) {
+++                fprintf(stderr, "bad width %s\n", arg2);
+++                return -1;
+++            }
+++        } else if (!strcmp(argv[i], "-h")) {
+++            char * p = NULL;
+++            H = strtoul(arg2, &p, 0);
+++            if (!H || *p) {
+++                fprintf(stderr, "bad height '%s'\n", arg2);
+++                return -1;
+++            }
+++        } else if (!strcmp(argv[i], "-t")) {
+++            char * p = NULL;
+++            time_rep = (int)strtol(arg2, &p, 0);
+++            if (*p) {
+++                fprintf(stderr, "bad time repetitions '%s'\n", arg2);
++                 return -1;
++             }
++         } else {
++@@ -414,15 +457,34 @@ bad_option:
++         }
++     }
++ 
++-    sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
+++    S = (W + 15) & ~15;
+++    rgb_data   = av_mallocz(S * H * 4);
+++    rgb_src[0] = rgb_data;
+++    rgb_stride[0]   = 4 * S;
+++    data       = av_mallocz(4 * S * H);
+++    src[0] = data;
+++    src[1] = data + S * H;
+++    src[2] = data + S * H * 2;
+++    src[3] = data + S * H * 3;
+++    stride[0] = S;
+++    stride[1] = S;
+++    stride[2] = S;
+++    stride[3] = S;
+++    H2 = H < 96 ? 8 : H / 12;
+++    W2 = W < 96 ? 8 : W / 12;
+++
+++    if (!rgb_data || !data)
+++        return -1;
+++
+++    sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H,
++                          AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
++ 
++     av_lfg_init(&rand, 1);
++ 
++     for (y = 0; y < H; y++)
++         for (x = 0; x < W * 4; x++)
++-            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
++-    res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
+++            rgb_data[ x + y * 4 * S] = av_lfg_get(&rand);
+++    res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride);
++     if (res < 0 || res != H) {
++         res = -1;
++         goto error;
++@@ -431,10 +493,10 @@ bad_option:
++     av_free(rgb_data);
++ 
++     if(fp) {
++-        res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
+++        res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat);
++         fclose(fp);
++     } else {
++-        selfTest(src, stride, W, H, srcFormat, dstFormat);
+++        selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat);
++         res = 0;
++     }
++ error:
++diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt
++new file mode 100644
++index 0000000000..2b62d660c0
++--- /dev/null
+++++ b/pi-util/BUILD.txt
++@@ -0,0 +1,67 @@
+++Building Pi FFmpeg
+++==================
+++
+++Current only building on a Pi is supported.
+++This builds ffmpeg the way I've tested it
+++
+++Get all dependencies - the current package dependencies are good enough
+++
+++$ sudo apt-get build-dep ffmpeg
+++
+++Configure using the pi-util/conf_native.sh script
+++-------------------------------------------------
+++
+++This sets the normal release options and creates an ouutput dir to build into
+++The directory name will depend on system and options but will be under out/
+++
+++There are a few choices here
+++ --mmal  build including the legacy mmal-based decoders and zero-copy code
+++         this requires appropriate libraries which currently will exist for
+++         armv7 but not arm64
+++ --noshared
+++         Build a static image rather than a shared library one.  Static is
+++         easier for testing as there is no need to worry about library
+++         paths being confused and therefore running the wrong code,  Shared
+++         is what is needed, in most cases, when building for use by other
+++         programs.
+++ --usr   Set install dir to /usr (i.e. system default) rather than in
+++         <builddir>/install
+++
+++So for a static build
+++---------------------
+++
+++$ pi-util/conf_native.sh --noshared
+++
+++$ make -j8 -C out/<wherever the script said it was building to>
+++
+++You can now run ffmpeg directly from where it was built
+++
+++For a shared build
+++------------------
+++
+++There are two choices here
+++
+++$ pi-util/conf_native.sh
+++$ make -j8 -C out/<builddir> install
+++
+++This sets the install prefix to <builddir>/install and is probably what you
+++want if you don't want to overwrite the system files.
+++
+++You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
+++built. You can copy the contents of <build dir>/install to /usr and that mostly
+++works. The only downside is that paths in pkgconfig end up being set to the
+++install directory in your build directory which may be less than ideal when
+++building other packages.
+++
+++The alternative if you just want to replace the system libs is:
+++
+++$ pi-util/conf_native.sh --usr
+++$ make -j8 -C out/<builddir>
+++$ sudo pi-util/clean_usr_libs.sh
+++$ sudo make -j8 -C out/<builddir> install
+++
+++The clean_usr_libs.sh step wipes any existing libs & includes (for all
+++architectures) from the system which helps avoid confusion when running other
+++progs as you can be sure you're not running old code which is unfortunately
+++easy to do otherwise.
+++
++diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt
++new file mode 100644
++index 0000000000..fcce72226a
++--- /dev/null
+++++ b/pi-util/NOTES.txt
++@@ -0,0 +1,69 @@
+++Notes on the hevc_rpi decoder & associated support code
+++-------------------------------------------------------
+++
+++There are 3 main parts to the existing code:
+++
+++1) The decoder - this is all in libavcodec as rpi_hevc*.
+++
+++2) A few filters to deal with Sand frames and a small patch to
+++automatically select the sand->i420 converter when required.
+++
+++3) A kludge in ffmpeg.c to display the decoded video. This could & should
+++be converted into a proper ffmpeg display module.
+++
+++
+++Decoder
+++-------
+++
+++The decoder is a modified version of the existing ffmpeg hevc decoder.
+++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
+++More complex bitstreams can be up to ~200% faster but particularly easy
+++streams can cut its advantage down to ~50%.  This means that a Pi3+ can
+++display nearly all 8-bit 1080p30 streams and with some overclocking it can
+++display most lower bitrate 10-bit 1080p30 streams - this latter case is
+++not helped by the requirement to downsample to 8-bit before display on a
+++Pi.
+++
+++It has had co-processor offload added for inter-pred and large block
+++residual transform.  Various parts have had optimized ARM NEON assembler
+++added and the existing ARM asm sections have been profiled and
+++re-optimized for A53. The main C code has been substantially reworked at
+++its lower levels in an attempt to optimize it and minimize memory
+++bandwidth. To some extent code paths that deal with frame types that it
+++doesn't support have been pruned.
+++
+++It outputs frames in Broadcom Sand format. This is a somewhat annoying
+++layout that doesn't fit into ffmpegs standard frame descriptions. It has
+++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
+++the stripe followed by interleaved U & V, that is then followed by the Y
+++for the next stripe, etc. The final stripe is always padded to
+++stripe-width. This is used in an attempt to help with cache locality and
+++cut down on the number of dram bank switches. It is annoying to use for
+++inter-pred with conventional processing but the way the Pi QPU (which is
+++used for inter-pred) works means that it has negligible downsides here and
+++the improved memory performance exceeds the overhead of the increased
+++complexity in the rest of the code.
+++
+++Frames must be allocated out of GPU memory (as otherwise they can't be
+++accessed by the co-processors). Utility functions (in rpi_zc.c) have been
+++written to make this easier. As the frames are already in GPU memory they
+++can be displayed by the Pi h/w without any further copying.
+++
+++
+++Known non-features
+++------------------
+++
+++Frame allocation should probably be done in some other way in order to fit
+++into the standard framework better.
+++
+++Sand frames are currently declared as software frames, there is an
+++argument that they should be hardware frames but they aren't really.
+++
+++There must be a better way of auto-selecting the hevc_rpi decoder over the
+++normal s/w hevc decoder, but I became confused by the existing h/w
+++acceleration framework and what I wanted to do didn't seem to fit in
+++neatly.
+++
+++Display should be a proper device rather than a kludge in ffmpeg.c
+++
+++
++diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt
++new file mode 100644
++index 0000000000..92bc13a3df
++--- /dev/null
+++++ b/pi-util/TESTMESA.txt
++@@ -0,0 +1,82 @@
+++# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
+++
+++# These assume that the drm_mmal test for Sand8 has been built on this Pi
+++# as build relies on many of the same files
+++
+++# 1st get everything required to build ffmpeg
+++# If sources aren't already enabled on your Pi then enable them
+++sudo su
+++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
+++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
+++mv /tmp/sources.list /etc/apt/
+++mv /tmp/raspi.list /etc/apt/sources.list.d/
+++apt update
+++
+++# Get dependancies
+++sudo apt build-dep ffmpeg
+++
+++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
+++
+++# Enable H265 V4L2 request decoder
+++sudo su
+++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
+++# You may also want to add more CMA if you are going to try 4k videos
+++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
+++# dtoverlay=vc4-fkms-v3d,cma-512
+++reboot
+++# Check it has turned up
+++ls -la /dev/video*
+++# This should include video19
+++# crw-rw----+ 1 root video 81, 7 Aug  4 17:25 /dev/video19
+++
+++# Currently on the Pi the linux headers from the debian distro don't match
+++# the kernel that we ship and we need to update them - hopefully this step
+++# will be unneeded in the future
+++sudo apt install git bc bison flex libssl-dev make
+++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
+++cd linux
+++KERNEL=kernel7l
+++make bcm2711_defconfig
+++make headers_install
+++sudo cp -r usr/include/linux /usr/include
+++cd ..
+++
+++# Config - this builds a staticly linked ffmpeg which is easier for testing
+++pi-util/conf_native.sh --noshared
+++
+++# Build (this is a bit dull)
+++# If you want to poke the source the libavdevice/egl_vout.c contains the
+++# output code -
+++cd out/armv7-static-rel
+++
+++# Check that you have actually configured V4L2 request
+++grep HEVC_V4L2REQUEST config.h
+++# You are hoping for
+++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
+++# if you get 0 then the config has failed
+++
+++make -j6
+++
+++# Grab test streams
+++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
+++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
+++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
+++
+++# Test i420 output (works currently)
+++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
+++
+++# Test Sand8 output - doesn't currently work but should once you have
+++# Sand8 working in drm_mmal. I can't guarantee that this will work as
+++# I can't test this path with a known working format, but the debug looks
+++# good.  If this doesn't work & drm_mmal does with sand8 then come back to me
+++# The "show_all 1" forces vout to display every frame otherwise it drops any
+++# frame that would cause it to block
+++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
+++
+++# Test Sand30 - doesn't currently work
+++# (Beware that when FFmpeg errors out it often leaves your teminal window
+++# in a state where you need to reset it)
+++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
+++
+++
+++
++diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh
++new file mode 100755
++index 0000000000..01bd6a6a22
++--- /dev/null
+++++ b/pi-util/clean_usr_libs.sh
++@@ -0,0 +1,42 @@
+++set -e
+++U=/usr/include/arm-linux-gnueabihf
+++rm -rf $U/libavcodec
+++rm -rf $U/libavdevice
+++rm -rf $U/libavfilter
+++rm -rf $U/libavformat
+++rm -rf $U/libavutil
+++rm -rf $U/libswresample
+++rm -rf $U/libswscale
+++U=/usr/include/aarch64-linux-gnu
+++rm -rf $U/libavcodec
+++rm -rf $U/libavdevice
+++rm -rf $U/libavfilter
+++rm -rf $U/libavformat
+++rm -rf $U/libavutil
+++rm -rf $U/libswresample
+++rm -rf $U/libswscale
+++U=/usr/lib/arm-linux-gnueabihf
+++rm -f $U/libavcodec.*
+++rm -f $U/libavdevice.*
+++rm -f $U/libavfilter.*
+++rm -f $U/libavformat.*
+++rm -f $U/libavutil.*
+++rm -f $U/libswresample.*
+++rm -f $U/libswscale.*
+++U=/usr/lib/arm-linux-gnueabihf/neon/vfp
+++rm -f $U/libavcodec.*
+++rm -f $U/libavdevice.*
+++rm -f $U/libavfilter.*
+++rm -f $U/libavformat.*
+++rm -f $U/libavutil.*
+++rm -f $U/libswresample.*
+++rm -f $U/libswscale.*
+++U=/usr/lib/aarch64-linux-gnu
+++rm -f $U/libavcodec.*
+++rm -f $U/libavdevice.*
+++rm -f $U/libavfilter.*
+++rm -f $U/libavformat.*
+++rm -f $U/libavutil.*
+++rm -f $U/libswresample.*
+++rm -f $U/libswscale.*
+++
++diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh
++new file mode 100644
++index 0000000000..9e3bbfa190
++--- /dev/null
+++++ b/pi-util/conf_arm64_native.sh
++@@ -0,0 +1,45 @@
+++echo "Configure for ARM64 native build"
+++
+++#RPI_KEEPS="-save-temps=obj"
+++
+++SHARED_LIBS="--enable-shared"
+++if [ "$1" == "--noshared" ]; then
+++  SHARED_LIBS="--disable-shared"
+++  echo Static libs
+++  OUT=out/arm64-static-rel
+++else
+++  echo Shared libs
+++  OUT=out/arm64-shared-rel
+++fi
+++
+++mkdir -p $OUT
+++cd $OUT
+++
+++A=aarch64-linux-gnu
+++USR_PREFIX=`pwd`/install
+++LIB_PREFIX=$USR_PREFIX/lib/$A
+++INC_PREFIX=$USR_PREFIX/include/$A
+++
+++../../configure \
+++ --prefix=$USR_PREFIX\
+++ --libdir=$LIB_PREFIX\
+++ --incdir=$INC_PREFIX\
+++ --disable-stripping\
+++ --disable-thumb\
+++ --disable-mmal\
+++ --enable-sand\
+++ --enable-v4l2-request\
+++ --enable-libdrm\
+++ --enable-epoxy\
+++ --enable-libudev\
+++ --enable-vout-drm\
+++ --enable-vout-egl\
+++ $SHARED_LIBS\
+++ --extra-cflags="-ggdb"
+++
+++# --enable-decoder=hevc_rpi\
+++# --enable-extra-warnings\
+++# --arch=armv71\
+++
+++# gcc option for getting asm listing
+++# -Wa,-ahls
++diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv
++new file mode 100644
++index 0000000000..4efd5d1c67
++--- /dev/null
+++++ b/pi-util/conf_h265.2016.csv
++@@ -0,0 +1,195 @@
+++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
+++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
+++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
+++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
+++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
+++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
+++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
+++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
+++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
+++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
+++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
+++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
+++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
+++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
+++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
+++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
+++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
+++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
+++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
+++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
+++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
+++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
+++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
+++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
+++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
+++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
+++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
+++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
+++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
+++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
+++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
+++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
+++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
+++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
+++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
+++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
+++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
+++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
+++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
+++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
+++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
+++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
+++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
+++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
+++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
+++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
+++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
+++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
+++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
+++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
+++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
+++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
+++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
+++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
+++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
+++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
+++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
+++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
+++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
+++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
+++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
+++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
+++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
+++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
+++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
+++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
+++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
+++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
+++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
+++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
+++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
+++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
+++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
+++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
+++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
+++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
+++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
+++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
+++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
+++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
+++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
+++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
+++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
+++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
+++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
+++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
+++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
+++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
+++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
+++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
+++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
+++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
+++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
+++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
+++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
+++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
+++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
+++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
+++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
+++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
+++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
+++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
+++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
+++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
+++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
+++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
+++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
+++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
+++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
+++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
+++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
+++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
+++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
+++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
+++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
+++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
+++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
+++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
+++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
+++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
+++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
+++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
+++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
+++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
+++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
+++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
+++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
+++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
+++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
+++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
+++3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
+++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
+++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
+++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
+++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
+++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
+++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
+++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
+++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
+++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
+++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
+++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
+++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
+++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
+++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
+++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
+++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
+++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
+++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
+++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
+++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
+++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
+++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
+++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
+++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
+++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
+++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
+++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
+++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
+++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
+++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
+++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
+++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
+++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
+++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
+++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
+++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
+++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
+++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
+++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
+++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
+++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
+++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
+++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
+++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
+++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
+++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
+++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
+++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
+++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
+++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
+++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
+++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
+++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
+++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
+++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
+++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
+++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
+++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
+++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
++diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv
++new file mode 100644
++index 0000000000..6082641271
++--- /dev/null
+++++ b/pi-util/conf_h265.2016_HEVC_v1.csv
++@@ -0,0 +1,147 @@
+++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
+++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
+++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
+++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
+++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
+++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
+++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
+++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
+++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
+++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
+++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
+++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
+++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
+++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
+++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
+++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
+++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
+++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
+++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
+++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
+++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
+++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
+++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
+++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
+++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
+++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
+++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
+++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
+++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
+++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
+++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
+++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
+++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
+++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
+++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
+++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
+++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
+++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
+++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
+++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
+++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
+++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
+++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
+++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
+++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
+++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
+++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
+++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
+++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
+++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
+++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
+++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
+++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
+++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
+++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
+++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
+++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
+++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
+++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
+++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
+++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
+++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
+++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
+++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
+++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
+++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
+++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
+++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
+++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
+++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
+++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
+++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
+++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
+++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
+++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
+++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
+++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
+++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
+++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
+++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
+++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
+++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
+++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
+++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
+++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
+++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
+++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
+++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
+++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
+++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
+++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
+++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
+++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
+++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
+++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
+++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
+++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
+++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
+++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
+++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
+++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
+++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
+++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
+++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
+++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
+++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
+++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
+++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
+++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
+++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
+++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
+++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
+++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
+++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
+++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
+++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
+++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
+++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
+++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
+++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
+++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
+++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
+++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
+++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
+++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
+++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
+++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
+++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
+++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
+++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
+++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
+++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
+++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
+++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
+++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
+++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
+++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
+++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
+++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
+++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
+++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
+++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
+++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
+++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
+++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
+++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
+++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
++diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv
++new file mode 100644
++index 0000000000..fc14f2a3c2
++--- /dev/null
+++++ b/pi-util/conf_h265.csv
++@@ -0,0 +1,144 @@
+++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
+++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
+++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
+++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
+++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
+++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
+++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
+++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
+++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
+++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
+++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
+++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
+++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
+++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
+++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
+++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
+++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
+++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
+++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
+++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
+++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
+++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
+++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
+++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
+++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
+++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
+++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
+++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
+++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
+++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
+++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
+++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
+++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
+++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
+++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
+++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
+++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
+++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
+++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
+++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
+++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
+++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
+++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
+++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
+++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
+++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
+++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
+++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
+++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
+++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
+++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
+++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
+++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
+++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
+++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
+++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
+++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
+++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
+++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
+++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
+++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
+++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
+++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
+++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
+++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
+++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
+++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
+++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
+++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
+++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
+++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
+++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
+++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
+++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
+++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
+++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
+++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
+++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
+++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
+++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
+++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
+++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
+++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
+++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
+++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
+++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
+++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
+++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
+++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
+++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
+++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
+++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
+++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
+++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
+++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
+++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
+++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
+++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
+++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
+++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
+++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
+++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
+++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
+++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
+++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
+++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
+++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
+++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
+++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
+++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
+++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
+++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
+++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
+++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
+++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
+++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
+++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
+++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
+++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
+++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
+++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
+++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
+++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
+++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
+++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
+++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
+++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
+++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
+++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
+++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
+++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
+++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
+++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
+++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
+++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
+++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
+++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
+++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
+++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
+++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
+++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
+++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
+++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
+++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
++diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
++new file mode 100755
++index 0000000000..1dbbcf154a
++--- /dev/null
+++++ b/pi-util/conf_native.sh
++@@ -0,0 +1,157 @@
+++echo "Configure for native build"
+++
+++FFSRC=`pwd`
+++MC=`dpkg --print-architecture`
+++BUILDBASE=$FFSRC/out
+++
+++#RPI_KEEPS="-save-temps=obj"
+++RPI_KEEPS=""
+++
+++NOSHARED=
+++MMAL=
+++USR_PREFIX=
+++DO_MAKE=
+++DO_INSTALL=
+++INSTALL_SUDO=
+++
+++while [ "$1" != "" ] ; do
+++    case $1 in
+++	--noshared)
+++	    NOSHARED=1
+++	    ;;
+++	--mmal)
+++	    MMAL=1
+++	    ;;
+++	--usr)
+++	    INSTALL_SUDO=1
+++	    USR_PREFIX=/usr
+++	    ;;
+++	--make)
+++	    DO_MAKE=1
+++	    ;;
+++        --install)
+++	    DO_MAKE=1
+++	    DO_INSTALL=1
+++	    ;;
+++	*)
+++	    echo "Usage $0: [--noshared] [--mmal] [--usr]"
+++	    echo "  noshared  Build static libs and executable - good for testing"
+++	    echo "  mmal      Build mmal decoders"
+++	    echo "  usr       Set install prefix to /usr [default=<build-dir>/install]"
+++	    echo "  make      Make after configure"
+++	    echo "  install   Make & install after configure - does sudo on install if --usr"
+++	    exit 1
+++	    ;;
+++    esac
+++    shift
+++done
+++
+++
+++MCOPTS=
+++RPI_INCLUDES=
+++RPI_LIBDIRS=
+++RPI_DEFINES=
+++RPI_EXTRALIBS=
+++
+++# uname -m gives kernel type which may not have the same
+++# 32/64bitness as userspace :-( getconf shoudl provide the answer
+++# but use uname to check we are on the right processor
+++MC=`uname -m`
+++LB=`getconf LONG_BIT`
+++if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then
+++  if [ "$LB" == "32" ]; then
+++    echo "M/C armv7"
+++    A=arm-linux-gnueabihf
+++    B=armv7
+++    MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
+++    RPI_DEFINES=-mfpu=neon-vfpv4
+++  elif [ "$LB" == "64" ]; then
+++    echo "M/C aarch64"
+++    A=aarch64-linux-gnu
+++    B=arm64
+++  else
+++    echo "Unknown LONG_BIT name: $LB"
+++    exit 1
+++  fi
+++else
+++  echo "Unknown machine name: $MC"
+++  exit 1
+++fi
+++
+++if [ $MMAL ]; then
+++  RPI_OPT_VC=/opt/vc
+++  RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
+++  RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
+++  RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
+++  RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
+++  RPIOPTS="--enable-mmal"
+++else
+++  RPIOPTS="--disable-mmal"
+++fi
+++
+++C=`lsb_release -sc`
+++V=`cat RELEASE`
+++
+++SHARED_LIBS="--enable-shared"
+++if [ $NOSHARED ]; then
+++  SHARED_LIBS="--disable-shared"
+++  OUT=$BUILDBASE/$B-$C-$V-static-rel
+++  echo Static libs
+++else
+++  echo Shared libs
+++  OUT=$BUILDBASE/$B-$C-$V-shared-rel
+++fi
+++
+++if [ ! $USR_PREFIX ]; then
+++  USR_PREFIX=$OUT/install
+++fi
+++LIB_PREFIX=$USR_PREFIX/lib/$A
+++INC_PREFIX=$USR_PREFIX/include/$A
+++
+++echo Destination directory: $OUT
+++mkdir -p $OUT
+++# Nothing under here need worry git - including this .gitignore!
+++echo "**" > $BUILDBASE/.gitignore
+++cd $OUT
+++
+++$FFSRC/configure \
+++ --prefix=$USR_PREFIX\
+++ --libdir=$LIB_PREFIX\
+++ --incdir=$INC_PREFIX\
+++ $MCOPTS\
+++ --disable-stripping\
+++ --disable-thumb\
+++ --enable-sand\
+++ --enable-v4l2-request\
+++ --enable-libdrm\
+++ --enable-vout-egl\
+++ --enable-vout-drm\
+++ --enable-gpl\
+++ $SHARED_LIBS\
+++ $RPIOPTS\
+++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
+++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
+++ --extra-ldflags="$RPI_LIBDIRS"\
+++ --extra-libs="$RPI_EXTRALIBS"\
+++ --extra-version="rpi"
+++
+++echo "Configured into $OUT"
+++
+++if [ $DO_MAKE ]; then
+++  echo "Making..."
+++  make -j8
+++  echo "Made"
+++fi
+++if [ $DO_INSTALL ]; then
+++  echo "Installing..."
+++  if [ $INSTALL_SUDO ]; then
+++    sudo make -j8 install
+++  else
+++    make -j8 install
+++  fi
+++  echo "Installed"
+++fi
+++
+++
+++# gcc option for getting asm listing
+++# -Wa,-ahls
++diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
++new file mode 100755
++index 0000000000..657568014e
++--- /dev/null
+++++ b/pi-util/ffconf.py
++@@ -0,0 +1,215 @@
+++#!/usr/bin/env python3
+++
+++import string
+++import os
+++import subprocess
+++import re
+++import argparse
+++import sys
+++import csv
+++from stat import *
+++
+++CODEC_HEVC_RPI  = 1
+++HWACCEL_RPI     = 2
+++HWACCEL_DRM     = 3
+++HWACCEL_VAAPI   = 4
+++
+++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
+++    hwaccel = ""
+++    if dectype == HWACCEL_RPI:
+++        hwaccel = "rpi"
+++    elif dectype == HWACCEL_DRM:
+++        hwaccel = "drm"
+++    elif dectype == HWACCEL_VAAPI:
+++        hwaccel = "vaapi"
+++
+++    pix_fmt = []
+++    if pix == "8":
+++        pix_fmt = ["-pix_fmt", "yuv420p"]
+++    elif pix == "10":
+++        pix_fmt = ["-pix_fmt", "yuv420p10le"]
+++    elif pix == "12":
+++        pix_fmt = ["-pix_fmt", "yuv420p12le"]
+++
+++    tmp_root = "/tmp"
+++
+++    names = srcname.split('/')
+++    while len(names) > 1:
+++        tmp_root = os.path.join(tmp_root, names[0])
+++        del names[0]
+++    name = names[0]
+++
+++    if not os.path.exists(tmp_root):
+++        os.makedirs(tmp_root)
+++
+++    dec_file = os.path.join(tmp_root, name + ".dec.md5")
+++    try:
+++        os.remove(dec_file)
+++    except:
+++        pass
+++
+++    flog = open(os.path.join(tmp_root, name + ".log"), "wt")
+++
+++    ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
+++
+++    # Unaligned needed for cropping conformance
+++    if hwaccel:
+++        rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
+++    else:
+++        rstr = subprocess.call(
+++            [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
+++            stdout=flog, stderr=subprocess.STDOUT)
+++
+++    try:
+++        m1 = None
+++        m2 = None
+++        with open(os.path.join(fileroot, md5_file)) as f:
+++            for line in f:
+++                m1 = re.search("[0-9a-f]{32}", line.lower())
+++                if m1:
+++                    break
+++
+++        with open(dec_file) as f:
+++            m2 = re.search("[0-9a-f]{32}", f.readline())
+++    except:
+++        pass
+++
+++    if  m1 and m2 and m1.group() == m2.group():
+++        print("Match: " + m1.group(), file=flog)
+++        rv = 0
+++    elif not m1:
+++        print("****** Cannot find m1", file=flog)
+++        rv = 3
+++    elif not m2:
+++        print("****** Cannot find m2", file=flog)
+++        rv = 2
+++    else:
+++        print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog)
+++        rv = 1
+++    flog.close()
+++    return rv
+++
+++def scandir(root):
+++    aconf = []
+++    ents = os.listdir(root)
+++    ents.sort(key=str.lower)
+++    for name in ents:
+++        test_path = os.path.join(root, name)
+++        if S_ISDIR(os.stat(test_path).st_mode):
+++            files = os.listdir(test_path)
+++            es_file = "?"
+++            md5_file = "?"
+++            for f in files:
+++                (base, ext) = os.path.splitext(f)
+++                if base[0] == '.':
+++                    pass
+++                elif ext == ".bit" or ext == ".bin":
+++                    es_file = f
+++                elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
+++                    if md5_file == "?":
+++                        md5_file = f
+++                    elif base[-3:] == "yuv":
+++                        md5_file = f
+++            aconf.append((1, name, es_file, md5_file))
+++    return aconf
+++
+++def runtest(name, tests):
+++    if not tests:
+++        return True
+++    for t in tests:
+++        if name[0:len(t)] == t or name.find("/" + t) != -1:
+++            return True
+++    return False
+++
+++def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
+++    unx_failures = []
+++    unx_success = []
+++    failures = 0
+++    successes = 0
+++    for a in csva:
+++        exp_test = int(a[0])
+++        if (exp_test and runtest(a[1], tests)):
+++            name = a[1]
+++            print ("==== ", name, end="")
+++            sys.stdout.flush()
+++
+++            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
+++            if (rv == 0):
+++                successes += 1
+++            else:
+++                failures += 1
+++
+++            if (rv == 0):
+++                if exp_test == 2:
+++                    print(": * OK *")
+++                    unx_success.append(name)
+++                else:
+++                    print(": ok")
+++            elif exp_test == 2 and rv == 1:
+++                print(": fail")
+++            elif exp_test == 3 and rv == 2:
+++                # Call an expected "crash" an abort
+++                print(": abort")
+++            else:
+++                unx_failures.append(name)
+++                if rv == 1:
+++                    print(": * FAIL *")
+++                elif (rv == 2) :
+++                    print(": * CRASH *")
+++                elif (rv == 3) :
+++                    print(": * MD5 MISSING *")
+++                else :
+++                    print(": * BANG *")
+++
+++    if unx_failures or unx_success:
+++        print("Unexpected Failures:", unx_failures)
+++        print("Unexpected Success: ", unx_success)
+++    else:
+++        print("All tests normal:", successes, "ok,", failures, "failed")
+++
+++
+++class ConfCSVDialect(csv.Dialect):
+++    delimiter = ','
+++    doublequote = True
+++    lineterminator = '\n'
+++    quotechar='"'
+++    quoting = csv.QUOTE_MINIMAL
+++    skipinitialspace = True
+++    strict = True
+++
+++if __name__ == '__main__':
+++
+++    argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
+++    argp.add_argument("tests", nargs='*')
+++    argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
+++    argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
+++    argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
+++    argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
+++    argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
+++    argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
+++    argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
+++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
+++    args = argp.parse_args()
+++
+++    if args.csvgen:
+++        csv.writer(sys.stdout).writerows(scandir(args.test_root))
+++        exit(0)
+++
+++    with open(args.csv, 'rt') as csvfile:
+++        csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
+++
+++    dectype = CODEC_HEVC_RPI
+++    if os.path.exists("/dev/rpivid-hevcmem"):
+++        dectype = HWACCEL_RPI
+++    if args.drm or os.path.exists("/sys/module/rpivid_hevc"):
+++        dectype = HWACCEL_DRM
+++
+++    if args.pi4:
+++        dectype = HWACCEL_RPI
+++    elif args.drm:
+++        dectype = HWACCEL_DRM
+++    elif args.vaapi:
+++        dectype = HWACCEL_VAAPI
+++
+++    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
+++
++diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py
++new file mode 100755
++index 0000000000..65c5224cd8
++--- /dev/null
+++++ b/pi-util/ffperf.py
++@@ -0,0 +1,128 @@
+++#!/usr/bin/env python3
+++
+++import time
+++import string
+++import os
+++import tempfile
+++import subprocess
+++import re
+++import argparse
+++import sys
+++import csv
+++from stat import *
+++
+++class tstats:
+++    close_threshold = 0.01
+++
+++    def __init__(self, stats_dict=None):
+++        if stats_dict != None:
+++            self.name = stats_dict["name"]
+++            self.elapsed = float(stats_dict["elapsed"])
+++            self.user = float(stats_dict["user"])
+++            self.sys = float(stats_dict["sys"])
+++
+++    def times_str(self):
+++        ctime = self.sys + self.user
+++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
+++
+++    def dict(self):
+++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
+++
+++    def is_close(self, other):
+++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
+++
+++    def __lt__(self, other):
+++        return self.elapsed < other.elapsed
+++    def __gt__(self, other):
+++        return self.elapsed > other.elapsed
+++
+++    def time_file(name, prefix, ffmpeg="./ffmpeg"):
+++        stats = tstats()
+++        stats.name = name
+++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
+++        cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
+++                                  "-vcodec", "hevc_rpi",
+++                                  "-t", "30", "-i", prefix + name,
+++                                  "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
+++        pinfo = os.wait4(cproc.pid, 0)
+++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
+++        stats.elapsed = end_time - start_time
+++        stats.user = pinfo[2].ru_utime
+++        stats.sys = pinfo[2].ru_stime
+++        return stats
+++
+++
+++def common_prefix(s1, s2):
+++    for i in range(min(len(s1),len(s2))):
+++        if s1[i] != s2[i]:
+++            return s1[:i]
+++    return s1[:i+1]
+++
+++def main():
+++    global flog
+++
+++    argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
+++To blank the screen before starting use "xdg-screensaver activate"
+++(For some reason this doesn't seem to work from within python).
+++""")
+++
+++    argp.add_argument("streams", nargs='*')
+++    argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
+++    argp.add_argument("--csv_in", help="CSV input filename")
+++    argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
+++    argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
+++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
+++
+++    args = argp.parse_args()
+++
+++    csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
+++    csv_out.writeheader()
+++
+++    stats_in = {}
+++    if args.csv_in != None:
+++        with open(args.csv_in, 'r', newline='') as f_in:
+++            stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
+++
+++    flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
+++
+++    streams = args.streams
+++    if not streams:
+++        if not stats_in:
+++            print ("No source streams specified")
+++            return 1
+++        prefix = "" if args.prefix == None else args.prefix
+++        streams = [k for k in stats_in]
+++    elif args.prefix != None:
+++        prefix = args.prefix
+++    else:
+++        prefix = streams[0]
+++        for f in streams[1:]:
+++            prefix = common_prefix(prefix, f)
+++        pp = prefix.rpartition(os.sep)
+++        prefix = pp[0] + pp[1]
+++        streams = [s[len(prefix):] for s in streams]
+++
+++    for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
+++        print ("====", f)
+++
+++        t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
+++        for i in range(args.repeat):
+++            t = tstats.time_file(f, prefix, args.ffmpeg)
+++            print ("...", t.times_str())
+++            if t0 > t:
+++                t0 = t
+++
+++        if t0.name in stats_in:
+++            pstat = stats_in[t0.name]
+++            print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
+++
+++        csv_out.writerow(t0.dict())
+++
+++        print ()
+++
+++    return 0
+++
+++
+++if __name__ == '__main__':
+++    exit(main())
+++
++diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh
++new file mode 100755
++index 0000000000..0948a68a7a
++--- /dev/null
+++++ b/pi-util/genpatch.sh
++@@ -0,0 +1,35 @@
+++set -e
+++
+++NOPATCH=
+++if [ "$1" == "--notag" ]; then
+++  shift
+++  NOPATCH=1
+++fi
+++
+++if [ "$1" == "" ]; then
+++  echo Usage: $0 [--notag] \<patch_tag\>
+++  echo e.g.: $0 mmal_4
+++  exit 1
+++fi
+++
+++VERSION=`cat RELEASE`
+++if [ "$VERSION" == "" ]; then
+++  echo Can\'t find version RELEASE
+++  exit 1
+++fi
+++
+++PATCHFILE=../ffmpeg-$VERSION-$1.patch
+++
+++if [ $NOPATCH ]; then
+++  echo Not tagged
+++else
+++  # Only continue if we are all comitted
+++  git diff --name-status --exit-code
+++
+++  PATCHTAG=pi/$VERSION/$1
+++  echo Tagging: $PATCHTAG
+++
+++  git tag $PATCHTAG
+++fi
+++echo Generating patch: $PATCHFILE
+++git diff n$VERSION -- > $PATCHFILE
++diff --git a/pi-util/make_array.py b/pi-util/make_array.py
++new file mode 100755
++index 0000000000..67b22d2d51
++--- /dev/null
+++++ b/pi-util/make_array.py
++@@ -0,0 +1,23 @@
+++#!/usr/bin/env python
+++
+++# Usage
+++#   make_array file.bin
+++#   Produces file.h with array of bytes.
+++#
+++import sys
+++for file in sys.argv[1:]:
+++  prefix,suffix = file.split('.')
+++  assert suffix=='bin'
+++  name=prefix.split('/')[-1]
+++  print 'Converting',file
+++  with open(prefix+'.h','wb') as out:
+++    print >>out, 'static const unsigned char',name,'[] = {'
+++    with open(file,'rb') as fd:
+++      i = 0
+++      for byte in fd.read():
+++        print >>out, '0x%02x, ' % ord(byte),
+++        i = i + 1
+++        if i % 8 == 0:
+++          print >>out, ' // %04x' % (i - 8)
+++    print >>out,'};'
+++
++diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh
++new file mode 100755
++index 0000000000..271a39e846
++--- /dev/null
+++++ b/pi-util/mkinst.sh
++@@ -0,0 +1,5 @@
+++set -e
+++
+++make install
+++
+++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
++diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh
++new file mode 100644
++index 0000000000..dcd05a606e
++--- /dev/null
+++++ b/pi-util/patkodi.sh
++@@ -0,0 +1,9 @@
+++set -e
+++KODIBASE=/home/jc/rpi/kodi/xbmc
+++JOBS=-j20
+++make $JOBS
+++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
+++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
+++make -C $KODIBASE/build install
+++
+++
++diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py
++new file mode 100755
++index 0000000000..e44cfa0c3c
++--- /dev/null
+++++ b/pi-util/perfcmp.py
++@@ -0,0 +1,101 @@
+++#!/usr/bin/env python3
+++
+++import time
+++import string
+++import os
+++import tempfile
+++import subprocess
+++import re
+++import argparse
+++import sys
+++import csv
+++from stat import *
+++
+++class tstats:
+++    close_threshold = 0.01
+++
+++    def __init__(self, stats_dict=None):
+++        if stats_dict != None:
+++            self.name = stats_dict["name"]
+++            self.elapsed = float(stats_dict["elapsed"])
+++            self.user = float(stats_dict["user"])
+++            self.sys = float(stats_dict["sys"])
+++
+++    def times_str(self):
+++        ctime = self.sys + self.user
+++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
+++
+++    def dict(self):
+++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
+++
+++    def is_close(self, other):
+++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
+++
+++    def __lt__(self, other):
+++        return self.elapsed < other.elapsed
+++    def __gt__(self, other):
+++        return self.elapsed > other.elapsed
+++
+++    def time_file(name, prefix):
+++        stats = tstats()
+++        stats.name = name
+++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
+++        cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
+++                                  "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
+++        pinfo = os.wait4(cproc.pid, 0)
+++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
+++        stats.elapsed = end_time - start_time
+++        stats.user = pinfo[2].ru_utime
+++        stats.sys = pinfo[2].ru_stime
+++        return stats
+++
+++
+++def common_prefix(s1, s2):
+++    for i in range(min(len(s1),len(s2))):
+++        if s1[i] != s2[i]:
+++            return s1[:i]
+++    return s1[:i+1]
+++
+++def main():
+++    argp = argparse.ArgumentParser(description="FFmpeg performance compare")
+++
+++    argp.add_argument("stream0", help="CSV to compare")
+++    argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
+++
+++    args = argp.parse_args()
+++
+++    with open(args.stream0, 'r', newline='') as f_in:
+++        stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
+++    with open(args.stream1, 'r', newline='') as f_in:
+++        stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
+++
+++    print (args.stream0, "<<-->>", args.stream1)
+++    print ()
+++
+++    for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
+++       if not (f in stats0) :
+++           print ("           XX               :", f)
+++           continue
+++       if not (f in stats1) :
+++           print ("       XX                   :", f)
+++           continue
+++
+++       s0 = stats0[f]
+++       s1 = stats1[f]
+++
+++       pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
+++       thresh = 0.3
+++       tc = 6
+++
+++       nchar = min(tc - 1, int(abs(pcent) / thresh))
+++       cc = "  --  " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
+++
+++       print ("%6.2f %s%6.2f (%+5.2f) : %s" %
+++           (s0.elapsed, cc, s1.elapsed, pcent, f))
+++
+++    return 0
+++
+++
+++if __name__ == '__main__':
+++    exit(main())
+++
++diff --git a/pi-util/qem.sh b/pi-util/qem.sh
++new file mode 100755
++index 0000000000..a4dbb6eacd
++--- /dev/null
+++++ b/pi-util/qem.sh
++@@ -0,0 +1,9 @@
+++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
+++QASM=python\ ../local/bin/qasm.py
+++SRC_FILE=libavcodec/rpi_hevc_shader.qasm
+++DST_BASE=shader
+++
+++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
+++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
+++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
+++
++diff --git a/pi-util/testfilt.py b/pi-util/testfilt.py
++new file mode 100755
++index 0000000000..b322dac0c2
++--- /dev/null
+++++ b/pi-util/testfilt.py
++@@ -0,0 +1,83 @@
+++#!/usr/bin/env python3
+++
+++import string
+++import os
+++import subprocess
+++import re
+++import argparse
+++import sys
+++import csv
+++from stat import *
+++
+++class validator:
+++    def __init__(self):
+++        self.ok = False
+++
+++    def isok(self):
+++        return self.ok
+++
+++    def setok(self):
+++        self.ok = True
+++
+++class valid_regex(validator):
+++    def __init__(self, regex):
+++        super().__init__()
+++        self.regex = re.compile(regex)
+++
+++    def scanline(self, line):
+++        if self.isok() or self.regex.search(line):
+++            self.setok()
+++
+++
+++def validate(validators, flog):
+++    for line in flog:
+++        for v in validators:
+++            v.scanline(line)
+++
+++    ok = True
+++    for v in validators:
+++        if not v.isok():
+++            ok = False
+++            # complain
+++            print("Test failed")
+++
+++    if ok:
+++        print("OK")
+++    return ok
+++
+++def runtest(name, ffmpeg, args, suffix, validators):
+++    log_root = os.path.join("/tmp", "testfilt", name)
+++    ofilename = os.path.join(log_root, name + suffix)
+++
+++    if not os.path.exists(log_root):
+++        os.makedirs(log_root)
+++
+++    try:
+++        os.remove(ofilename)
+++    except:
+++        pass
+++
+++    flog = open(os.path.join(log_root, name + ".log"), "wb")
+++    ffargs = [ffmpeg] + args + [ofilename]
+++
+++    subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT, text=False)
+++    flog.close
+++
+++    flog = open(os.path.join(log_root, name + ".log"), "rt")
+++    return validate(validators, flog)
+++
+++def sayok(log_root, flog):
+++    print("Woohoo")
+++    return True
+++
+++if __name__ == '__main__':
+++
+++    argp = argparse.ArgumentParser(description="FFmpeg filter tester")
+++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
+++    args = argp.parse_args()
+++
+++    runtest("ATest", args.ffmpeg, ["-v", "verbose", "-no_cvt_hw", "-an", "-c:v", "h264_v4l2m2m", "-i",
+++                                   "/home/johncox/server/TestMedia/Sony/jellyfish-10-mbps-hd-h264.mkv",
+++#                                    "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv",
+++                                   "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv",
+++            [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')])
++diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py
++new file mode 100755
++index 0000000000..5935a11ca5
++--- /dev/null
+++++ b/pi-util/v3dusage.py
++@@ -0,0 +1,128 @@
+++#!/usr/bin/env python
+++
+++import sys
+++import argparse
+++import re
+++
+++def do_logparse(logname):
+++
+++    rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
+++    rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
+++    rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
+++    rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
+++
+++    ttotal = {'idle':0.0}
+++    tstart = {}
+++    qctotal = {}
+++    qtstotal = {}
+++    l2hits = {}
+++    l2total = {}
+++    time0 = None
+++    idle_start = None
+++    qpu_op_no = 0
+++    op_count = 0
+++
+++    with open(logname, "rt") as infile:
+++        for line in infile:
+++            match = rmatch.match(line)
+++            if match:
+++#                print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
+++                time = float(match.group(1))
+++                unit = match.group(3)
+++                opstart = not match.group(2)
+++                optype = match.group(7)
+++                hascb = match.group(8) != "0"
+++
+++                if unit == 'qpu1':
+++                    unit = unit + "." + str(qpu_op_no)
+++                    if not opstart:
+++                        if hascb or optype == 'EXECUTE_SYNC':
+++                            qpu_op_no = 0
+++                        else:
+++                            qpu_op_no += 1
+++
+++                # Ignore sync type
+++                if optype == 'EXECUTE_SYNC':
+++                    continue
+++
+++                if not time0:
+++                    time0 = time
+++
+++                if opstart:
+++                    tstart[unit] = time;
+++                elif unit in tstart:
+++                    op_count += 1
+++                    if not unit in ttotal:
+++                        ttotal[unit] = 0.0
+++                    ttotal[unit] += time - tstart[unit]
+++                    del tstart[unit]
+++
+++                if not idle_start and not tstart:
+++                    idle_start = time
+++                elif idle_start and tstart:
+++                    ttotal['idle'] += time - idle_start
+++                    idle_start = None
+++
+++            match = rqcycle.match(line)
+++            if match:
+++                unit = "qpu1." + str(qpu_op_no)
+++                if not unit in qctotal:
+++                    qctotal[unit] = 0
+++                qctotal[unit] += int(match.group(2))
+++
+++            match = rqtscycle.match(line)
+++            if match:
+++                unit = "qpu1." + str(qpu_op_no)
+++                if not unit in qtstotal:
+++                    qtstotal[unit] = 0
+++                qtstotal[unit] += int(match.group(2))
+++
+++            match = rl2hits.match(line)
+++            if match:
+++                unit = "qpu1." + str(qpu_op_no)
+++                if not unit in l2total:
+++                    l2total[unit] = 0
+++                    l2hits[unit] = 0
+++                l2total[unit] += int(match.group(3))
+++                if match.group(2) == "hits":
+++                    l2hits[unit] += int(match.group(3))
+++
+++
+++    if not time0:
+++        print "No v3d profile records found"
+++    else:
+++        tlogged = time - time0
+++
+++        print "Logged time:", tlogged, "  Op count:", op_count
+++        for unit in sorted(ttotal):
+++            print b'%6s: %10.3f    %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
+++        print
+++        for unit in sorted(qctotal):
+++            if not unit in qtstotal:
+++                qtstotal[unit] = 0;
+++            print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
+++            if unit in l2total:
+++                print b'        L2Total: %10d, hits:      %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
+++
+++
+++
+++if __name__ == '__main__':
+++    argp = argparse.ArgumentParser(
+++        formatter_class=argparse.RawDescriptionHelpFormatter,
+++        description="QPU/VPU perf summary from VC logging",
+++        epilog = """
+++Will also summarise TMU stalls if logging requests set in qpu noflush param
+++in the profiled code.
+++
+++Example use:
+++  vcgencmd set_logging level=0xc0
+++  <command to profile>
+++  sudo vcdbg log msg >& t.log
+++  v3dusage.py t.log
+++""")
+++
+++    argp.add_argument("logfile")
+++    args = argp.parse_args()
+++
+++    do_logparse(args.logfile)
+++
+diff --git a/tools/depends/target/ffmpeg/CMakeLists.txt b/tools/depends/target/ffmpeg/CMakeLists.txt
+index 0bbc23540a..1bf9f53499 100644
+--- a/tools/depends/target/ffmpeg/CMakeLists.txt
++++ b/tools/depends/target/ffmpeg/CMakeLists.txt
+@@ -40,6 +40,11 @@ list(APPEND ffmpeg_conf --disable-doc
+                         --extra-version="Kodi"
+             )
+ 
++  string(CONCAT CMAKE_C_FLAGS ${CMAKE_C_FLAGS} " -I/opt/vc/include -I/opt/vc/include/interface/vcos/pthreads -I/opt/vc/include/interface/vmcs_host/linux")
++  string(CONCAT CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} " -L/opt/vc/lib")
++  string(CONCAT CMAKE_MODULE_LINKER_FLAGS ${CMAKE_MODULE_LINKER_FLAGS} " -L/opt/vc/lib")
++  list(APPEND ffmpeg_conf --enable-sand --enable-v4l2-request --enable-libdrm --enable-libudev --disable-hwaccel=h264_v4l2request --disable-hwaccel=mpeg2_v4l2request --disable-hwaccel=vp8_v4l2request)
++
+ if(CMAKE_C_FLAGS)
+   list(APPEND ffmpeg_conf --extra-cflags=${CMAKE_C_FLAGS})
+ endif()
+-- 
+2.34.1
+
+
+From 13b7bc03c1be11acdc93a68aaabecb8b0483bb3c Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Thu, 14 Jan 2021 18:36:57 +0000
+Subject: [PATCH 05/24] DVDVideoCodecDRMPRIME: Discard corrupt frames
+
+ffmpeg/V4L2 decoder can set AV_FRAME_FLAG_CORRUPT if the frame failed to decode.#
+Pass that onto VideoPlayer so the frame can skip renderer
+---
+ xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index eb2943bb8c..d8827e8296 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -582,6 +582,7 @@ void CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+ 
+   pVideoPicture->iRepeatPicture = 0;
+   pVideoPicture->iFlags = 0;
++  pVideoPicture->iFlags |= !(m_pFrame->flags & AV_FRAME_FLAG_CORRUPT) ? 0 : DVP_FLAG_DROPPED;
+   pVideoPicture->iFlags |= m_pFrame->interlaced_frame ? DVP_FLAG_INTERLACED : 0;
+   pVideoPicture->iFlags |= m_pFrame->top_field_first ? DVP_FLAG_TOP_FIELD_FIRST : 0;
+   pVideoPicture->iFlags |= m_pFrame->data[0] ? 0 : DVP_FLAG_DROPPED;
+-- 
+2.34.1
+
+
+From 49ab54b99684f7a364b3b94b56c71b36625eb2fb Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Fri, 3 Dec 2021 16:00:50 +0000
+Subject: [PATCH 06/24] gbm: Set max bpc for high bit depth videos
+
+---
+ .../HwDecRender/VideoLayerBridgeDRMPRIME.cpp      | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/VideoLayerBridgeDRMPRIME.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/VideoLayerBridgeDRMPRIME.cpp
+index 34d1ab6235..f1e73ee364 100644
+--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/VideoLayerBridgeDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/VideoLayerBridgeDRMPRIME.cpp
+@@ -34,6 +34,14 @@ void CVideoLayerBridgeDRMPRIME::Disable()
+ {
+   // disable video plane
+   auto plane = m_DRM->GetVideoPlane();
++  auto connector = m_DRM->GetConnector();
++
++  // reset max bpc back to default of 8
++  int bpc = 8;
++  bool result = m_DRM->AddProperty(connector, "max bpc", bpc);
++  CLog::Log(LOGDEBUG, "CVideoLayerBridgeDRMPRIME::{} - setting max bpc to {} ({})",
++            __FUNCTION__, bpc, result);
++
+   m_DRM->AddProperty(plane, "FB_ID", 0);
+   m_DRM->AddProperty(plane, "CRTC_ID", 0);
+ 
+@@ -175,6 +183,13 @@ void CVideoLayerBridgeDRMPRIME::Configure(CVideoBufferDRMPRIME* buffer)
+       plane->GetPropertyValue("COLOR_RANGE", GetColorRange(picture));
+   if (colorRange)
+     m_DRM->AddProperty(plane, "COLOR_RANGE", colorRange.value());
++
++  // set max bpc to allow the drm driver to choose a deep colour mode
++  int bpc = buffer->GetPicture().colorBits > 8 ? 12 : 8;
++  auto connector = m_DRM->GetConnector();
++  bool result = m_DRM->AddProperty(connector, "max bpc", bpc);
++  CLog::Log(LOGDEBUG, "CVideoLayerBridgeDRMPRIME::{} - setting max bpc to {} ({})", __FUNCTION__,
++            bpc, result);
+ }
+ 
+ void CVideoLayerBridgeDRMPRIME::SetVideoPlane(CVideoBufferDRMPRIME* buffer, const CRect& destRect)
+-- 
+2.34.1
+
+
+From 413cf5370b1f14888c95565b05c54343a698dc0b Mon Sep 17 00:00:00 2001
+From: Jonas Karlman <jonas@kwiboo.se>
+Date: Sun, 20 Oct 2019 17:10:07 +0000
+Subject: [PATCH 07/24] WIP: DVDVideoCodecDRMPRIME: add support for filters
+
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 62 +++++++++++++++++--
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.h   | 10 +++
+ 2 files changed, 66 insertions(+), 6 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index d8827e8296..c8e1d28bc5 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -28,6 +28,8 @@
+ extern "C"
+ {
+ #include <libavcodec/avcodec.h>
++#include <libavfilter/buffersink.h>
++#include <libavfilter/buffersrc.h>
+ #include <libavutil/error.h>
+ #include <libavutil/imgutils.h>
+ #include <libavutil/opt.h>
+@@ -599,12 +601,30 @@ void CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+   pVideoPicture->dts = DVD_NOPTS_VALUE;
+ }
+ 
+-CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideoPicture)
++CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+ {
+-  if (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)
+-    Drain();
++  if (!m_pFilterIn)
++    return VC_PICTURE;
++
++  int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
++  if (ret < 0)
++  {
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - buffersrc add frame failed: {} ({})",
++              __FUNCTION__, err, ret);
++    return VC_ERROR;
++  }
+ 
+-  int ret = avcodec_receive_frame(m_pCodecContext, m_pFrame);
++  return ProcessFilterOut();
++}
++
++CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterOut()
++{
++  if (!m_pFilterOut)
++    return VC_EOF;
++
++  int ret = av_buffersink_get_frame(m_pFilterOut, m_pFrame);
+   if (ret == AVERROR(EAGAIN))
+     return VC_BUFFER;
+   else if (ret == AVERROR_EOF)
+@@ -621,11 +641,41 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+   {
+     char err[AV_ERROR_MAX_STRING_SIZE] = {};
+     av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
+-    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - receive frame failed: {} ({})", __FUNCTION__,
+-              err, ret);
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - buffersink get frame failed: {} ({})",
++              __FUNCTION__, err, ret);
+     return VC_ERROR;
+   }
+ 
++  return VC_PICTURE;
++}
++
++CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideoPicture)
++{
++  if (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)
++    Drain();
++
++  auto result = ProcessFilterOut();
++  if (result != VC_PICTURE)
++  {
++    int ret = avcodec_receive_frame(m_pCodecContext, m_pFrame);
++    if (ret == AVERROR(EAGAIN))
++      return VC_BUFFER;
++    else if (ret == AVERROR_EOF)
++      return VC_EOF;
++    else if (ret)
++    {
++      char err[AV_ERROR_MAX_STRING_SIZE] = {};
++      av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
++      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - receive frame failed: {} ({})",
++                __FUNCTION__, err, ret);
++      return VC_ERROR;
++    }
++
++    result = ProcessFilterIn();
++    if (result != VC_PICTURE)
++      return result;
++  }
++
+   SetPictureParams(pVideoPicture);
+ 
+   if (pVideoPicture->videoBuffer)
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+index db49d165e7..b5cacf1a3c 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+@@ -14,6 +14,11 @@
+ 
+ #include <memory>
+ 
++extern "C"
++{
++#include <libavfilter/avfilter.h>
++}
++
+ class CDVDVideoCodecDRMPRIME : public CDVDVideoCodec
+ {
+ public:
+@@ -35,6 +40,8 @@ protected:
+   void Drain();
+   void SetPictureParams(VideoPicture* pVideoPicture);
+   void UpdateProcessInfo(struct AVCodecContext* avctx, const enum AVPixelFormat fmt);
++  CDVDVideoCodec::VCReturn ProcessFilterIn();
++  CDVDVideoCodec::VCReturn ProcessFilterOut();
+   static enum AVPixelFormat GetFormat(struct AVCodecContext* avctx, const enum AVPixelFormat* fmt);
+   static int GetBuffer(struct AVCodecContext* avctx, AVFrame* frame, int flags);
+ 
+@@ -44,5 +51,8 @@ protected:
+   double m_DAR = 1.0;
+   AVCodecContext* m_pCodecContext = nullptr;
+   AVFrame* m_pFrame = nullptr;
++  AVFilterGraph* m_pFilterGraph = nullptr;
++  AVFilterContext* m_pFilterIn = nullptr;
++  AVFilterContext* m_pFilterOut = nullptr;
+   std::shared_ptr<IVideoBufferPool> m_videoBufferPool;
+ };
+-- 
+2.34.1
+
+
+From f84f77a9c1f0b8a3a24a9f3cd9dd9de1c8df8f66 Mon Sep 17 00:00:00 2001
+From: Jernej Skrabec <jernej.skrabec@siol.net>
+Date: Thu, 26 Dec 2019 11:01:51 +0100
+Subject: [PATCH 08/24] WIP: DRMPRIME deinterlace filter
+
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 379 +++++++++++++++---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.h   |   9 +-
+ 2 files changed, 328 insertions(+), 60 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index c8e1d28bc5..8476a3981d 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -20,6 +20,7 @@
+ #include "utils/CPUInfo.h"
+ #include "utils/StringUtils.h"
+ #include "utils/log.h"
++#include "utils/StringUtils.h"
+ 
+ #if defined(HAVE_GBM)
+ #include "windowing/gbm/WinSystemGbm.h"
+@@ -92,12 +93,15 @@ CDVDVideoCodecDRMPRIME::CDVDVideoCodecDRMPRIME(CProcessInfo& processInfo)
+   : CDVDVideoCodec(processInfo)
+ {
+   m_pFrame = av_frame_alloc();
++  m_pFilterFrame = av_frame_alloc();
+   m_videoBufferPool = std::make_shared<CVideoBufferPoolDRMPRIMEFFmpeg>();
+ }
+ 
+ CDVDVideoCodecDRMPRIME::~CDVDVideoCodecDRMPRIME()
+ {
+   av_frame_free(&m_pFrame);
++  av_frame_free(&m_pFilterFrame);
++  FilterClose();
+   avcodec_free_context(&m_pCodecContext);
+ }
+ 
+@@ -379,8 +383,19 @@ bool CDVDVideoCodecDRMPRIME::Open(CDVDStreamInfo& hints, CDVDCodecOptions& optio
+   }
+ 
+   UpdateProcessInfo(m_pCodecContext, m_pCodecContext->pix_fmt);
+-  m_processInfo.SetVideoDeintMethod("none");
++  m_processInfo.SetVideoInterlaced(false);
+   m_processInfo.SetVideoDAR(hints.aspect);
++  m_processInfo.SetVideoDeintMethod("none");
++
++  FilterTest();
++
++  if (!m_deintFilterName.empty())
++  {
++    std::list<EINTERLACEMETHOD> methods;
++    methods.push_back(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
++    m_processInfo.UpdateDeinterlacingMethods(methods);
++    m_processInfo.SetDeinterlacingMethodDefault(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
++  }
+ 
+   return true;
+ }
+@@ -456,6 +471,8 @@ void CDVDVideoCodecDRMPRIME::Reset()
+     return;
+ 
+   Drain();
++  m_filters.clear();
++  FilterClose();
+ 
+   do
+   {
+@@ -503,7 +520,7 @@ void CDVDVideoCodecDRMPRIME::Drain()
+   av_packet_free(&avpkt);
+ }
+ 
+-void CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
++bool CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+ {
+   pVideoPicture->iWidth = m_pFrame->width;
+   pVideoPicture->iHeight = m_pFrame->height;
+@@ -599,13 +616,238 @@ void CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+                            ? DVD_NOPTS_VALUE
+                            : static_cast<double>(pts) * DVD_TIME_BASE / AV_TIME_BASE;
+   pVideoPicture->dts = DVD_NOPTS_VALUE;
++
++  if (pVideoPicture->videoBuffer)
++  {
++    pVideoPicture->videoBuffer->Release();
++    pVideoPicture->videoBuffer = nullptr;
++  }
++
++  if (IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
++  {
++    CVideoBufferDRMPRIMEFFmpeg* buffer =
++        dynamic_cast<CVideoBufferDRMPRIMEFFmpeg*>(m_videoBufferPool->Get());
++    buffer->SetPictureParams(*pVideoPicture);
++    buffer->SetRef(m_pFrame);
++    pVideoPicture->videoBuffer = buffer;
++  }
++  else if (m_pFrame->opaque)
++  {
++    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(m_pFrame->opaque);
++    buffer->SetPictureParams(*pVideoPicture);
++    buffer->Acquire();
++    buffer->SyncEnd();
++    buffer->SetDimensions(m_pFrame->width, m_pFrame->height);
++
++    pVideoPicture->videoBuffer = buffer;
++    av_frame_unref(m_pFrame);
++  }
++
++  if (!pVideoPicture->videoBuffer)
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - videoBuffer:nullptr format:{}", __FUNCTION__,
++              av_get_pix_fmt_name(static_cast<AVPixelFormat>(m_pFrame->format)));
++    av_frame_unref(m_pFrame);
++    return false;
++  }
++
++  return true;
+ }
+ 
+-CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
++void CDVDVideoCodecDRMPRIME::FilterTest()
+ {
+-  if (!m_pFilterIn)
+-    return VC_PICTURE;
++  const AVFilter* filter;
++  void* opaque{};
++
++  m_deintFilterName.clear();
++
++  while ((filter = av_filter_iterate(&opaque)) != nullptr)
++  {
++    std::string name(filter->name);
++
++    if (name.find("deinterlace") != std::string::npos)
++    {
++      if (FilterOpen(name, true))
++      {
++        m_deintFilterName = name;
+ 
++        CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - found deinterlacing filter {}",
++                  __FUNCTION__, name);
++
++        return;
++      }
++    }
++  }
++
++  CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - no deinterlacing filter found",
++            __FUNCTION__);
++}
++
++bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
++{
++  int result;
++
++  if (m_pFilterGraph)
++    FilterClose();
++
++  if (filters.empty())
++    return true;
++
++  if (!(m_pFilterGraph = avfilter_graph_alloc()))
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - unable to alloc filter graph");
++    return false;
++  }
++
++  const AVFilter* srcFilter = avfilter_get_by_name("buffer");
++  const AVFilter* outFilter = avfilter_get_by_name("buffersink");
++  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
++
++  std::string args = StringUtils::Format("video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:"
++                                         "pixel_aspect=%d/%d:sws_param=flags=2",
++                                         m_pCodecContext->width,
++                                         m_pCodecContext->height,
++                                         m_pCodecContext->pix_fmt,
++                                         m_pCodecContext->time_base.num ?
++                                           m_pCodecContext->time_base.num : 1,
++                                         m_pCodecContext->time_base.num ?
++                                           m_pCodecContext->time_base.den : 1,
++                                         m_pCodecContext->sample_aspect_ratio.num != 0 ?
++                                           m_pCodecContext->sample_aspect_ratio.num : 1,
++                                         m_pCodecContext->sample_aspect_ratio.num != 0 ?
++                                           m_pCodecContext->sample_aspect_ratio.den : 1);
++
++  result = avfilter_graph_create_filter(&m_pFilterIn, srcFilter, "src",
++                                        args.c_str(), NULL, m_pFilterGraph);
++  if (result < 0)
++  {
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR,
++              "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: src: {} ({})",
++              err, result);
++    return false;
++  }
++
++  AVBufferSrcParameters *par = av_buffersrc_parameters_alloc();
++  if (!par)
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - unable to alloc buffersrc");
++    return false;
++  }
++
++  memset(par, 0, sizeof(*par));
++  par->format = AV_PIX_FMT_NONE;
++  par->hw_frames_ctx = m_pCodecContext->hw_device_ctx;
++
++  result = av_buffersrc_parameters_set(m_pFilterIn, par);
++  if (result < 0)
++  {
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR,
++              "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersrc_parameters_set:  {} ({})",
++              err, result);
++    return false;
++  }
++  av_freep(&par);
++
++  result = avfilter_graph_create_filter(&m_pFilterOut, outFilter, "out",
++                                        NULL, NULL, m_pFilterGraph);
++  if (result < 0)
++  {
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR,
++              "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: out: {} ({})",
++              err, result);
++    return false;
++  }
++
++  result = av_opt_set_int_list(m_pFilterOut, "pix_fmts", &pix_fmts[0],
++                               AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
++  if (result < 0)
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - failed settings pix formats");
++    return false;
++  }
++
++  AVFilterInOut* outputs = avfilter_inout_alloc();
++  AVFilterInOut* inputs  = avfilter_inout_alloc();
++
++  outputs->name = av_strdup("in");
++  outputs->filter_ctx = m_pFilterIn;
++  outputs->pad_idx = 0;
++  outputs->next = nullptr;
++
++  inputs->name = av_strdup("out");
++  inputs->filter_ctx = m_pFilterOut;
++  inputs->pad_idx = 0;
++  inputs->next = nullptr;
++
++  result = avfilter_graph_parse_ptr(m_pFilterGraph, filters.c_str(), &inputs, &outputs, NULL);
++  avfilter_inout_free(&outputs);
++  avfilter_inout_free(&inputs);
++
++  if (result < 0)
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
++    return false;
++  }
++
++  if ((result = avfilter_graph_config(m_pFilterGraph,  nullptr)) < 0)
++  {
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_config:  {} ({})",
++              err, result);
++    return false;
++  }
++
++  if (test)
++  {
++    FilterClose();
++    return true;
++  }
++
++  if (filters.find("deinterlace") != std::string::npos)
++  {
++    m_processInfo.SetVideoDeintMethod(filters);
++  }
++  else
++  {
++    m_processInfo.SetVideoDeintMethod("none");
++  }
++
++  if (CServiceBroker::GetLogging().CanLogComponent(LOGVIDEO))
++  {
++    char* graphDump = avfilter_graph_dump(m_pFilterGraph, nullptr);
++    if (graphDump)
++    {
++      CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::FilterOpen - Final filter graph:\n%s",
++                graphDump);
++      av_freep(&graphDump);
++    }
++  }
++
++  return true;
++}
++
++void CDVDVideoCodecDRMPRIME::FilterClose()
++{
++  if (m_pFilterGraph)
++  {
++    CLog::Log(LOGDEBUG, LOGVIDEO, "CDVDVideoCodecDRMPRIME::FilterClose - Freeing filter graph");
++    avfilter_graph_free(&m_pFilterGraph);
++
++    // Disposed by above code
++    m_pFilterIn = nullptr;
++    m_pFilterOut = nullptr;
++  }
++}
++
++CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
++{
+   int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
+   if (ret < 0)
+   {
+@@ -621,21 +863,14 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+ 
+ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterOut()
+ {
+-  if (!m_pFilterOut)
+-    return VC_EOF;
+-
+-  int ret = av_buffersink_get_frame(m_pFilterOut, m_pFrame);
++  int ret = av_buffersink_get_frame(m_pFilterOut, m_pFilterFrame);
+   if (ret == AVERROR(EAGAIN))
+     return VC_BUFFER;
+   else if (ret == AVERROR_EOF)
+   {
+-    if (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)
+-    {
+-      CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - flush buffers", __FUNCTION__);
+-      avcodec_flush_buffers(m_pCodecContext);
+-      SetCodecControl(m_codecControlFlags & ~DVD_CODEC_CTRL_DRAIN);
+-    }
+-    return VC_EOF;
++    ret = av_buffersink_get_frame(m_pFilterOut, m_pFilterFrame);
++    if (ret < 0)
++      return VC_BUFFER;
+   }
+   else if (ret)
+   {
+@@ -646,71 +881,97 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterOut()
+     return VC_ERROR;
+   }
+ 
++  av_frame_unref(m_pFrame);
++  av_frame_move_ref(m_pFrame, m_pFilterFrame);
++
+   return VC_PICTURE;
+ }
+ 
++std::string CDVDVideoCodecDRMPRIME::GetFilterChain(bool interlaced)
++{
++  // ask codec to do deinterlacing if possible
++  EINTERLACEMETHOD mInt = m_processInfo.GetVideoSettings().m_InterlaceMethod;
++  std::string filterChain;
++
++  if (!m_processInfo.Supports(mInt))
++    mInt = m_processInfo.GetFallbackDeintMethod();
++
++  if (mInt != VS_INTERLACEMETHOD_NONE && interlaced && !m_deintFilterName.empty())
++    filterChain += m_deintFilterName;
++
++  return filterChain;
++}
++
+ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideoPicture)
+ {
+   if (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)
+     Drain();
+ 
+-  auto result = ProcessFilterOut();
+-  if (result != VC_PICTURE)
++  if (m_pFilterGraph)
+   {
+-    int ret = avcodec_receive_frame(m_pCodecContext, m_pFrame);
+-    if (ret == AVERROR(EAGAIN))
+-      return VC_BUFFER;
+-    else if (ret == AVERROR_EOF)
+-      return VC_EOF;
+-    else if (ret)
++    auto ret = ProcessFilterOut();
++    if (ret == VC_PICTURE)
+     {
+-      char err[AV_ERROR_MAX_STRING_SIZE] = {};
+-      av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
+-      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - receive frame failed: {} ({})",
+-                __FUNCTION__, err, ret);
+-      return VC_ERROR;
++      if (!SetPictureParams(pVideoPicture))
++        return VC_ERROR;
++      return VC_PICTURE;
++    }
++    else if (ret != VC_BUFFER)
++    {
++      return ret;
+     }
+-
+-    result = ProcessFilterIn();
+-    if (result != VC_PICTURE)
+-      return result;
+   }
+ 
+-  SetPictureParams(pVideoPicture);
+-
+-  if (pVideoPicture->videoBuffer)
++  int ret = avcodec_receive_frame(m_pCodecContext, m_pFrame);
++  if (ret == AVERROR(EAGAIN))
++    return VC_BUFFER;
++  else if (ret == AVERROR_EOF)
++    return VC_EOF;
++  else if (ret)
+   {
+-    pVideoPicture->videoBuffer->Release();
+-    pVideoPicture->videoBuffer = nullptr;
++    char err[AV_ERROR_MAX_STRING_SIZE] = {};
++    av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - receive frame failed: {} ({})",
++              __FUNCTION__, err, ret);
++    return VC_ERROR;
+   }
+ 
+-  if (IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
++  if (!m_processInfo.GetVideoInterlaced() && m_pFrame->interlaced_frame)
++    m_processInfo.SetVideoInterlaced(true);
++
++  std::string filterChain = GetFilterChain(m_pFrame->interlaced_frame);
++  if (!filterChain.empty())
+   {
+-    CVideoBufferDRMPRIMEFFmpeg* buffer =
+-        dynamic_cast<CVideoBufferDRMPRIMEFFmpeg*>(m_videoBufferPool->Get());
+-    buffer->SetPictureParams(*pVideoPicture);
+-    buffer->SetRef(m_pFrame);
+-    pVideoPicture->videoBuffer = buffer;
++    bool reopenFilter = false;
++    if (m_filters != filterChain)
++      reopenFilter = true;
++
++    if (m_pFilterGraph &&
++        (m_pFilterIn->outputs[0]->w != m_pCodecContext->width ||
++         m_pFilterIn->outputs[0]->h != m_pCodecContext->height))
++      reopenFilter = true;
++
++    if (reopenFilter)
++    {
++      m_filters = filterChain;
++      if (!FilterOpen(filterChain, false))
++        FilterClose();
++    }
++
++    if (m_pFilterGraph)
++    {
++      if (ProcessFilterIn() != VC_PICTURE)
++        return VC_NONE;
++    }
+   }
+-  else if (m_pFrame->opaque)
++  else
+   {
+-    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(m_pFrame->opaque);
+-    buffer->SetPictureParams(*pVideoPicture);
+-    buffer->Acquire();
+-    buffer->SyncEnd();
+-    buffer->SetDimensions(m_pFrame->width, m_pFrame->height);
+-
+-    pVideoPicture->videoBuffer = buffer;
+-    av_frame_unref(m_pFrame);
++    m_filters.clear();
++    FilterClose();
+   }
+ 
+-  if (!pVideoPicture->videoBuffer)
+-  {
+-    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - videoBuffer:nullptr format:{}", __FUNCTION__,
+-              av_get_pix_fmt_name(static_cast<AVPixelFormat>(m_pFrame->format)));
+-    av_frame_unref(m_pFrame);
++  if (!SetPictureParams(pVideoPicture))
+     return VC_ERROR;
+-  }
+ 
+   return VC_PICTURE;
+ }
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+index b5cacf1a3c..fab3431d40 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+@@ -38,19 +38,26 @@ public:
+ 
+ protected:
+   void Drain();
+-  void SetPictureParams(VideoPicture* pVideoPicture);
++  bool SetPictureParams(VideoPicture* pVideoPicture);
+   void UpdateProcessInfo(struct AVCodecContext* avctx, const enum AVPixelFormat fmt);
+   CDVDVideoCodec::VCReturn ProcessFilterIn();
+   CDVDVideoCodec::VCReturn ProcessFilterOut();
+   static enum AVPixelFormat GetFormat(struct AVCodecContext* avctx, const enum AVPixelFormat* fmt);
+   static int GetBuffer(struct AVCodecContext* avctx, AVFrame* frame, int flags);
++  bool FilterOpen(const std::string& filters, bool test);
++  void FilterClose();
++  void FilterTest();
++  std::string GetFilterChain(bool interlaced);
+ 
+   std::string m_name;
++  std::string m_deintFilterName;
++  std::string m_filters;
+   int m_codecControlFlags = 0;
+   CDVDStreamInfo m_hints;
+   double m_DAR = 1.0;
+   AVCodecContext* m_pCodecContext = nullptr;
+   AVFrame* m_pFrame = nullptr;
++  AVFrame* m_pFilterFrame = nullptr;
+   AVFilterGraph* m_pFilterGraph = nullptr;
+   AVFilterContext* m_pFilterIn = nullptr;
+   AVFilterContext* m_pFilterOut = nullptr;
+-- 
+2.34.1
+
+
+From 6328258a5eafd638962f1ae7ab69ee99d0a3fbcf Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 24 Nov 2021 20:22:41 +0000
+Subject: [PATCH 09/24] CDVDVideoCodecDRMPRIME: Fix Format calls and some
+ logging
+
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp   | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 8476a3981d..ef8819c72b 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -703,8 +703,8 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   const AVFilter* outFilter = avfilter_get_by_name("buffersink");
+   enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
+ 
+-  std::string args = StringUtils::Format("video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:"
+-                                         "pixel_aspect=%d/%d:sws_param=flags=2",
++  std::string args = StringUtils::Format("video_size={}x{}:pix_fmt={}:time_base={}/{}:"
++                                         "pixel_aspect={}/{}:sws_param=flags=2",
+                                          m_pCodecContext->width,
+                                          m_pCodecContext->height,
+                                          m_pCodecContext->pix_fmt,
+@@ -824,7 +824,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     char* graphDump = avfilter_graph_dump(m_pFilterGraph, nullptr);
+     if (graphDump)
+     {
+-      CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::FilterOpen - Final filter graph:\n%s",
++      CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::FilterOpen - Final filter graph:\n{}",
+                 graphDump);
+       av_freep(&graphDump);
+     }
+-- 
+2.34.1
+
+
+From 9e3d889343073fda5fc3e02fbfc205eeb5bbc929 Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Fri, 27 Aug 2021 20:29:50 +0100
+Subject: [PATCH 10/24] DVDVideoCodecDRMPRIME: Avoid exception with
+ AV_PIX_FMT_NONE
+
+---
+ .../cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index ef8819c72b..98b0830488 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -646,7 +646,7 @@ bool CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+   if (!pVideoPicture->videoBuffer)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - videoBuffer:nullptr format:{}", __FUNCTION__,
+-              av_get_pix_fmt_name(static_cast<AVPixelFormat>(m_pFrame->format)));
++              m_pFrame->format == AV_PIX_FMT_NONE ? "AV_PIX_FMT_NONE" : av_get_pix_fmt_name(static_cast<AVPixelFormat>(m_pFrame->format)));
+     av_frame_unref(m_pFrame);
+     return false;
+   }
+-- 
+2.34.1
+
+
+From b117d37af068b5958dcc5de2e3395c7664fa7077 Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Sat, 11 Sep 2021 14:03:05 +0100
+Subject: [PATCH 11/24] CDVDVideoCodecDRMPRIME: Also support YUV420 buffers
+
+CDVDVideoCodecDRMPRIME: Add support for deinterlace of sw decoded buffers
+
+Need to call SetDimensions earlier and store the drm descriptor in expected place
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp      | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 98b0830488..72064b8310 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -623,7 +623,7 @@ bool CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+     pVideoPicture->videoBuffer = nullptr;
+   }
+ 
+-  if (IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
++  if (m_pFrame->format == AV_PIX_FMT_DRM_PRIME)
+   {
+     CVideoBufferDRMPRIMEFFmpeg* buffer =
+         dynamic_cast<CVideoBufferDRMPRIMEFFmpeg*>(m_videoBufferPool->Get());
+@@ -701,7 +701,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+ 
+   const AVFilter* srcFilter = avfilter_get_by_name("buffer");
+   const AVFilter* outFilter = avfilter_get_by_name("buffersink");
+-  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
++  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
+ 
+   std::string args = StringUtils::Format("video_size={}x{}:pix_fmt={}:time_base={}/{}:"
+                                          "pixel_aspect={}/{}:sws_param=flags=2",
+@@ -848,6 +848,16 @@ void CDVDVideoCodecDRMPRIME::FilterClose()
+ 
+ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+ {
++  // sw decoded buffers need cache flush and for descripter to be set
++  if (!IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && m_pFrame->opaque != nullptr)
++  {
++    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(m_pFrame->opaque);
++    buffer->SetDimensions(m_pFrame->width, m_pFrame->height);
++    buffer->SyncEnd();
++    auto descriptor = buffer->GetDescriptor();
++    m_pFrame->data[0] = reinterpret_cast<uint8_t*>(descriptor);
++  }
++
+   int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
+   if (ret < 0)
+   {
+-- 
+2.34.1
+
+
+From b40ce61b487f4d2883da72de7cc650f12fcd47e8 Mon Sep 17 00:00:00 2001
+From: popcornmix <popcornmix@gmail.com>
+Date: Fri, 17 Sep 2021 15:23:16 +0100
+Subject: [PATCH 12/24] DVDVideoCodecDRMPRIME: Leave deinterlace filter active
+ on a progressive frame
+
+Interlaced content often has strange mixtures of interlace and progressive frames (e.g. IIPPPPIIPPPP)
+and currently we can be creating and destroying the deinterlace filter graph almost every frame.
+
+If it's been created, then leave it active until end of file. The frames marked as progressive should
+be just copied by deinterlace filter
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp     | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 72064b8310..76f9ad49cd 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -906,6 +906,10 @@ std::string CDVDVideoCodecDRMPRIME::GetFilterChain(bool interlaced)
+   if (!m_processInfo.Supports(mInt))
+     mInt = m_processInfo.GetFallbackDeintMethod();
+ 
++  // avoid disabling deinterlace graph for occasional progressive frames - they will be copied by deinterlace
++  if (!m_filters.empty())
++    interlaced = true;
++
+   if (mInt != VS_INTERLACEMETHOD_NONE && interlaced && !m_deintFilterName.empty())
+     filterChain += m_deintFilterName;
+ 
+-- 
+2.34.1
+
+
+From 74ad14a451626fec56a25803e8ffa6f20fef4307 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Tue, 30 Nov 2021 16:05:06 +0000
+Subject: [PATCH 13/24] SetVideoInterlaced: Set and unset deinterlace method
+ name reported
+
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp          | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 76f9ad49cd..a49418bc0e 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -810,14 +810,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     return true;
+   }
+ 
+-  if (filters.find("deinterlace") != std::string::npos)
+-  {
+-    m_processInfo.SetVideoDeintMethod(filters);
+-  }
+-  else
+-  {
+-    m_processInfo.SetVideoDeintMethod("none");
+-  }
++  m_processInfo.SetVideoDeintMethod(filters);
+ 
+   if (CServiceBroker::GetLogging().CanLogComponent(LOGVIDEO))
+   {
+@@ -835,6 +828,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+ 
+ void CDVDVideoCodecDRMPRIME::FilterClose()
+ {
++  m_processInfo.SetVideoDeintMethod("none");
+   if (m_pFilterGraph)
+   {
+     CLog::Log(LOGDEBUG, LOGVIDEO, "CDVDVideoCodecDRMPRIME::FilterClose - Freeing filter graph");
+-- 
+2.34.1
+
+
+From 6285a4d1465c6dc193d49a1752b7a9b6e90c3686 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 24 Nov 2021 20:21:28 +0000
+Subject: [PATCH 14/24] DVDVideoCodecDRMPRIME: Close deinterlace filter on
+ error
+
+Otherwise we crash later with an invalid m_pFilterGraph pointer
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp  | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index a49418bc0e..fa7c4b28e5 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -726,6 +726,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: src: {} ({})",
+               err, result);
++    FilterClose();
+     return false;
+   }
+ 
+@@ -733,6 +734,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   if (!par)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - unable to alloc buffersrc");
++    FilterClose();
+     return false;
+   }
+ 
+@@ -748,6 +750,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersrc_parameters_set:  {} ({})",
+               err, result);
++    FilterClose();
+     return false;
+   }
+   av_freep(&par);
+@@ -761,6 +764,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: out: {} ({})",
+               err, result);
++    FilterClose();
+     return false;
+   }
+ 
+@@ -769,6 +773,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   if (result < 0)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - failed settings pix formats");
++    FilterClose();
+     return false;
+   }
+ 
+@@ -792,6 +797,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   if (result < 0)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
++    FilterClose();
+     return false;
+   }
+ 
+@@ -801,6 +807,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_config:  {} ({})",
+               err, result);
++    FilterClose();
+     return false;
+   }
+ 
+-- 
+2.34.1
+
+
+From 9b2aa82a0a69fe6c06249a43496256f29c87b19e Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 18 Jan 2023 16:41:00 +0000
+Subject: [PATCH 15/24] CDVDVideoCodecDRMPRIME: Adjust av formats to match
+ recent ffmpeg changes
+
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index fa7c4b28e5..ced0ebac8d 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -355,6 +355,7 @@ bool CDVDVideoCodecDRMPRIME::Open(CDVDStreamInfo& hints, CDVDCodecOptions& optio
+   m_pCodecContext->bits_per_coded_sample = hints.bitsperpixel;
+   m_pCodecContext->time_base.num = 1;
+   m_pCodecContext->time_base.den = DVD_TIME_BASE;
++  m_pCodecContext->thread_safe_callbacks = 1;
+   m_pCodecContext->thread_count = CServiceBroker::GetCPUInfo()->GetCPUCount();
+ 
+   if (hints.extradata)
+@@ -701,13 +702,13 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+ 
+   const AVFilter* srcFilter = avfilter_get_by_name("buffer");
+   const AVFilter* outFilter = avfilter_get_by_name("buffersink");
+-  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
++  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
+ 
+   std::string args = StringUtils::Format("video_size={}x{}:pix_fmt={}:time_base={}/{}:"
+-                                         "pixel_aspect={}/{}:sws_param=flags=2",
++                                         "pixel_aspect={}/{}",
+                                          m_pCodecContext->width,
+                                          m_pCodecContext->height,
+-                                         m_pCodecContext->pix_fmt,
++                                         AV_PIX_FMT_DRM_PRIME,
+                                          m_pCodecContext->time_base.num ?
+                                            m_pCodecContext->time_base.num : 1,
+                                          m_pCodecContext->time_base.num ?
+@@ -859,6 +860,7 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+     m_pFrame->data[0] = reinterpret_cast<uint8_t*>(descriptor);
+   }
+ 
++  m_pFrame->format = AV_PIX_FMT_DRM_PRIME;
+   int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
+   if (ret < 0)
+   {
+-- 
+2.34.1
+
+
+From 3524b47a3153011d6c5afddddac3c280b7f37c8a Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Mon, 6 Feb 2023 15:19:51 +0000
+Subject: [PATCH 16/24] DVDVideoCodecDRMPRIME: Add support for arbitrary output
+ pixel formats
+
+This enables any ffmpeg pixel formats to be supported by DRMPRIME decoder
+by creating a scale ffmpeg filter to convert it to a supported format.
+
+This allows formats like h264 Hi10P and hevc 12-bit 444 to be software decoded,
+converted and displayed through DRM.
+
+This will be a cheaper path than disabling DRMPRIME, which is also
+software decode, convert, but then needs convert to texture and display through GL.
+
+And it happens automatically without requiring user video settings
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 124 +++++++++++-------
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.h   |   3 +-
+ 2 files changed, 77 insertions(+), 50 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index ced0ebac8d..62fc0cf822 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -219,7 +219,7 @@ enum AVPixelFormat CDVDVideoCodecDRMPRIME::GetFormat(struct AVCodecContext* avct
+ {
+   for (int n = 0; fmt[n] != AV_PIX_FMT_NONE; n++)
+   {
+-    if (IsSupportedHwFormat(fmt[n]) || IsSupportedSwFormat(fmt[n]))
++    //if (IsSupportedHwFormat(fmt[n]) || IsSupportedSwFormat(fmt[n]))
+     {
+       CDVDVideoCodecDRMPRIME* ctx = static_cast<CDVDVideoCodecDRMPRIME*>(avctx->opaque);
+       ctx->UpdateProcessInfo(avctx, fmt[n]);
+@@ -240,7 +240,8 @@ enum AVPixelFormat CDVDVideoCodecDRMPRIME::GetFormat(struct AVCodecContext* avct
+ 
+ int CDVDVideoCodecDRMPRIME::GetBuffer(struct AVCodecContext* avctx, AVFrame* frame, int flags)
+ {
+-  if (IsSupportedSwFormat(static_cast<AVPixelFormat>(frame->format)))
++  AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(frame->format);
++  if (IsSupportedSwFormat(pix_fmt))
+   {
+     int width = frame->width;
+     int height = frame->height;
+@@ -248,7 +249,7 @@ int CDVDVideoCodecDRMPRIME::GetBuffer(struct AVCodecContext* avctx, AVFrame* fra
+     AlignedSize(avctx, width, height);
+ 
+     int size;
+-    switch (avctx->pix_fmt)
++    switch (pix_fmt)
+     {
+       case AV_PIX_FMT_YUV420P:
+       case AV_PIX_FMT_YUVJ420P:
+@@ -268,13 +269,12 @@ int CDVDVideoCodecDRMPRIME::GetBuffer(struct AVCodecContext* avctx, AVFrame* fra
+ 
+     CDVDVideoCodecDRMPRIME* ctx = static_cast<CDVDVideoCodecDRMPRIME*>(avctx->opaque);
+     auto buffer = dynamic_cast<CVideoBufferDMA*>(
+-        ctx->m_processInfo.GetVideoBufferManager().Get(avctx->pix_fmt, size, nullptr));
++        ctx->m_processInfo.GetVideoBufferManager().Get(pix_fmt, size, nullptr));
+     if (!buffer)
+       return -1;
+ 
+-    frame->opaque = static_cast<void*>(buffer);
+     frame->opaque_ref =
+-        av_buffer_create(nullptr, 0, ReleaseBuffer, frame->opaque, AV_BUFFER_FLAG_READONLY);
++        av_buffer_create(nullptr, 0, ReleaseBuffer, static_cast<void*>(buffer), AV_BUFFER_FLAG_READONLY);
+ 
+     buffer->Export(frame, width, height);
+     buffer->SyncStart();
+@@ -632,9 +632,9 @@ bool CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+     buffer->SetRef(m_pFrame);
+     pVideoPicture->videoBuffer = buffer;
+   }
+-  else if (m_pFrame->opaque)
++  else if (IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
+   {
+-    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(m_pFrame->opaque);
++    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(av_buffer_get_opaque(m_pFrame->buf[0]));
+     buffer->SetPictureParams(*pVideoPicture);
+     buffer->Acquire();
+     buffer->SyncEnd();
+@@ -668,13 +668,13 @@ void CDVDVideoCodecDRMPRIME::FilterTest()
+ 
+     if (name.find("deinterlace") != std::string::npos)
+     {
+-      if (FilterOpen(name, true))
++      bool ret = FilterOpen(name, false, true);
++      FilterClose();
++      if (ret)
+       {
+         m_deintFilterName = name;
+-
+         CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - found deinterlacing filter {}",
+                   __FUNCTION__, name);
+-
+         return;
+       }
+     }
+@@ -684,14 +684,31 @@ void CDVDVideoCodecDRMPRIME::FilterTest()
+             __FUNCTION__);
+ }
+ 
+-bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
++AVFrame *CDVDVideoCodecDRMPRIME::alloc_filter_frame(AVFilterContext * ctx, void * v, int w, int h)
++{
++  int result;
++  CDVDVideoCodecDRMPRIME* me = static_cast<CDVDVideoCodecDRMPRIME*>(v);
++  AVFrame *frame = av_frame_alloc();
++  frame->width = w;
++  frame->height = h;
++  frame->format = AV_PIX_FMT_YUV420P;
++
++  if ((result = CDVDVideoCodecDRMPRIME::GetBuffer(me->m_pCodecContext, frame, 0)) < 0)
++  {
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::alloc_filter_frame - failed to GetBuffer ({})", result);
++    return nullptr;
++  }
++  return frame;
++}
++
++bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale, bool test)
+ {
+   int result;
+ 
+   if (m_pFilterGraph)
+     FilterClose();
+ 
+-  if (filters.empty())
++  if (filters.empty() && !scale)
+     return true;
+ 
+   if (!(m_pFilterGraph = avfilter_graph_alloc()))
+@@ -702,13 +719,13 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+ 
+   const AVFilter* srcFilter = avfilter_get_by_name("buffer");
+   const AVFilter* outFilter = avfilter_get_by_name("buffersink");
+-  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
++  enum AVPixelFormat pix_fmts[] = { scale ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
+ 
+   std::string args = StringUtils::Format("video_size={}x{}:pix_fmt={}:time_base={}/{}:"
+                                          "pixel_aspect={}/{}",
+                                          m_pCodecContext->width,
+                                          m_pCodecContext->height,
+-                                         AV_PIX_FMT_DRM_PRIME,
++                                         scale ? m_pCodecContext->pix_fmt : AV_PIX_FMT_DRM_PRIME,
+                                          m_pCodecContext->time_base.num ?
+                                            m_pCodecContext->time_base.num : 1,
+                                          m_pCodecContext->time_base.num ?
+@@ -727,7 +744,6 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: src: {} ({})",
+               err, result);
+-    FilterClose();
+     return false;
+   }
+ 
+@@ -735,7 +751,6 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   if (!par)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - unable to alloc buffersrc");
+-    FilterClose();
+     return false;
+   }
+ 
+@@ -751,7 +766,6 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersrc_parameters_set:  {} ({})",
+               err, result);
+-    FilterClose();
+     return false;
+   }
+   av_freep(&par);
+@@ -765,7 +779,6 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     CLog::Log(LOGERROR,
+               "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_create_filter: out: {} ({})",
+               err, result);
+-    FilterClose();
+     return false;
+   }
+ 
+@@ -774,32 +787,46 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+   if (result < 0)
+   {
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - failed settings pix formats");
+-    FilterClose();
+     return false;
+   }
+ 
+-  AVFilterInOut* outputs = avfilter_inout_alloc();
+-  AVFilterInOut* inputs  = avfilter_inout_alloc();
++  if (!filters.empty())
++  {
++    AVFilterInOut* outputs = avfilter_inout_alloc();
++    AVFilterInOut* inputs  = avfilter_inout_alloc();
+ 
+-  outputs->name = av_strdup("in");
+-  outputs->filter_ctx = m_pFilterIn;
+-  outputs->pad_idx = 0;
+-  outputs->next = nullptr;
++    outputs->name = av_strdup("in");
++    outputs->filter_ctx = m_pFilterIn;
++    outputs->pad_idx = 0;
++    outputs->next = nullptr;
+ 
+-  inputs->name = av_strdup("out");
+-  inputs->filter_ctx = m_pFilterOut;
+-  inputs->pad_idx = 0;
+-  inputs->next = nullptr;
++    inputs->name = av_strdup("out");
++    inputs->filter_ctx = m_pFilterOut;
++    inputs->pad_idx = 0;
++    inputs->next = nullptr;
+ 
+-  result = avfilter_graph_parse_ptr(m_pFilterGraph, filters.c_str(), &inputs, &outputs, NULL);
+-  avfilter_inout_free(&outputs);
+-  avfilter_inout_free(&inputs);
++    result = avfilter_graph_parse_ptr(m_pFilterGraph, filters.c_str(), &inputs, &outputs, NULL);
++    avfilter_inout_free(&outputs);
++    avfilter_inout_free(&inputs);
+ 
+-  if (result < 0)
++    if (result < 0)
++    {
++      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
++      return false;
++    }
++  }
++  else
+   {
+-    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
+-    FilterClose();
+-    return false;
++    if ((result = av_buffersink_set_alloc_video_frame(m_pFilterOut, alloc_filter_frame, static_cast<void*>(this))) < 0)
++    {
++      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersink_set_alloc_video_frame = {}", result);
++      return result;
++    }
++    if ((result = avfilter_link(m_pFilterIn, 0, m_pFilterOut, 0)) < 0)
++    {
++      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_link");
++      return false;
++    }
+   }
+ 
+   if ((result = avfilter_graph_config(m_pFilterGraph,  nullptr)) < 0)
+@@ -808,15 +835,11 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool test)
+     av_strerror(result, err, AV_ERROR_MAX_STRING_SIZE);
+     CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_config:  {} ({})",
+               err, result);
+-    FilterClose();
+     return false;
+   }
+ 
+   if (test)
+-  {
+-    FilterClose();
+     return true;
+-  }
+ 
+   m_processInfo.SetVideoDeintMethod(filters);
+ 
+@@ -851,16 +874,16 @@ void CDVDVideoCodecDRMPRIME::FilterClose()
+ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+ {
+   // sw decoded buffers need cache flush and for descripter to be set
+-  if (!IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && m_pFrame->opaque != nullptr)
++  if (!IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
+   {
+-    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(m_pFrame->opaque);
++    CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(av_buffer_get_opaque(m_pFrame->buf[0]));
+     buffer->SetDimensions(m_pFrame->width, m_pFrame->height);
+     buffer->SyncEnd();
+     auto descriptor = buffer->GetDescriptor();
+     m_pFrame->data[0] = reinterpret_cast<uint8_t*>(descriptor);
++    m_pFrame->format = AV_PIX_FMT_DRM_PRIME;
+   }
+ 
+-  m_pFrame->format = AV_PIX_FMT_DRM_PRIME;
+   int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
+   if (ret < 0)
+   {
+@@ -953,25 +976,28 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+     return VC_ERROR;
+   }
+ 
++  // we need to scale if the buffer isn't in DRM_PRIME format
++  bool need_scale = !IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && !IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format));
++
+   if (!m_processInfo.GetVideoInterlaced() && m_pFrame->interlaced_frame)
+     m_processInfo.SetVideoInterlaced(true);
+ 
+   std::string filterChain = GetFilterChain(m_pFrame->interlaced_frame);
+-  if (!filterChain.empty())
++  if (!filterChain.empty() || need_scale)
+   {
+     bool reopenFilter = false;
+     if (m_filters != filterChain)
+       reopenFilter = true;
+ 
+     if (m_pFilterGraph &&
+-        (m_pFilterIn->outputs[0]->w != m_pCodecContext->width ||
+-         m_pFilterIn->outputs[0]->h != m_pCodecContext->height))
++        (m_pFilterIn->outputs[0]->w != m_pFrame->width ||
++         m_pFilterIn->outputs[0]->h != m_pFrame->height))
+       reopenFilter = true;
+ 
+-    if (reopenFilter)
++    if (reopenFilter || (need_scale && m_pFilterGraph == nullptr))
+     {
+       m_filters = filterChain;
+-      if (!FilterOpen(filterChain, false))
++      if (!FilterOpen(filterChain, need_scale, false))
+         FilterClose();
+     }
+ 
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+index fab3431d40..bb88fde1f9 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+@@ -44,7 +44,8 @@ protected:
+   CDVDVideoCodec::VCReturn ProcessFilterOut();
+   static enum AVPixelFormat GetFormat(struct AVCodecContext* avctx, const enum AVPixelFormat* fmt);
+   static int GetBuffer(struct AVCodecContext* avctx, AVFrame* frame, int flags);
+-  bool FilterOpen(const std::string& filters, bool test);
++  static AVFrame *alloc_filter_frame(AVFilterContext * ctx, void * v, int w, int h);
++  bool FilterOpen(const std::string& filters, bool scale, bool test);
+   void FilterClose();
+   void FilterTest();
+   std::string GetFilterChain(bool interlaced);
+-- 
+2.34.1
+
+
+From 9d7c4cd5305a52b7806029860b40d79348475cdf Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Fri, 14 Apr 2023 19:59:42 +0100
+Subject: [PATCH 17/24] DVDVideoCodecDRMPRIME: Remove obsolete
+ thread_safe_callbacks
+
+---
+ xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 62fc0cf822..3ed59af9f7 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -355,7 +355,6 @@ bool CDVDVideoCodecDRMPRIME::Open(CDVDStreamInfo& hints, CDVDCodecOptions& optio
+   m_pCodecContext->bits_per_coded_sample = hints.bitsperpixel;
+   m_pCodecContext->time_base.num = 1;
+   m_pCodecContext->time_base.den = DVD_TIME_BASE;
+-  m_pCodecContext->thread_safe_callbacks = 1;
+   m_pCodecContext->thread_count = CServiceBroker::GetCPUInfo()->GetCPUCount();
+ 
+   if (hints.extradata)
+-- 
+2.34.1
+
+
+From 0c73ce0ada72ec08efbb4b77e7401d91e498f56d Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Mon, 15 May 2023 12:50:16 +0100
+Subject: [PATCH 18/24] DVDVideoCodecDRMPRIME: Fix missing flush after eof
+
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 3ed59af9f7..c9ea5d52d5 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -965,7 +965,15 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+   if (ret == AVERROR(EAGAIN))
+     return VC_BUFFER;
+   else if (ret == AVERROR_EOF)
++  {
++    if (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)
++    {
++      CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - flush buffers", __FUNCTION__);
++      avcodec_flush_buffers(m_pCodecContext);
++      SetCodecControl(m_codecControlFlags & ~DVD_CODEC_CTRL_DRAIN);
++    }
+     return VC_EOF;
++  }
+   else if (ret)
+   {
+     char err[AV_ERROR_MAX_STRING_SIZE] = {};
+-- 
+2.34.1
+
+
+From 647cfba9f3f8feb7b4b9b31a7a235e8d7fbb066c Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 31 May 2023 19:40:37 +0100
+Subject: [PATCH 19/24] DVDVideoCodecDRMPRIME: Clear m_pFilterGraph
+
+---
+ xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index c9ea5d52d5..a3eecf0aed 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -867,6 +867,7 @@ void CDVDVideoCodecDRMPRIME::FilterClose()
+     // Disposed by above code
+     m_pFilterIn = nullptr;
+     m_pFilterOut = nullptr;
++    m_pFilterGraph = nullptr;
+   }
+ }
+ 
+-- 
+2.34.1
+
+
+From bf72fb426a6f1d6f2903d3f0fc825f3b69c2eea4 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Fri, 2 Jun 2023 11:34:22 +0100
+Subject: [PATCH 20/24] DVDVideoCodecDRMPRIME: Move FilterTest from open to
+ first frame returned
+
+The pixel format is not accurate until the first frame is returned
+and it may (later) influence the choice of deinterlacers available.
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 24 ++++++++++++-------
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.h   |  1 +
+ 2 files changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index a3eecf0aed..c2d1e496e0 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -387,15 +387,7 @@ bool CDVDVideoCodecDRMPRIME::Open(CDVDStreamInfo& hints, CDVDCodecOptions& optio
+   m_processInfo.SetVideoDAR(hints.aspect);
+   m_processInfo.SetVideoDeintMethod("none");
+ 
+-  FilterTest();
+-
+-  if (!m_deintFilterName.empty())
+-  {
+-    std::list<EINTERLACEMETHOD> methods;
+-    methods.push_back(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
+-    m_processInfo.UpdateDeinterlacingMethods(methods);
+-    m_processInfo.SetDeinterlacingMethodDefault(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
+-  }
++  m_checkedDeinterlace = false;
+ 
+   return true;
+ }
+@@ -984,6 +976,20 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+     return VC_ERROR;
+   }
+ 
++  if (!m_checkedDeinterlace)
++  {
++    FilterTest();
++
++    if (!m_deintFilterName.empty())
++    {
++      std::list<EINTERLACEMETHOD> methods;
++      methods.push_back(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
++      m_processInfo.UpdateDeinterlacingMethods(methods);
++      m_processInfo.SetDeinterlacingMethodDefault(EINTERLACEMETHOD::VS_INTERLACEMETHOD_DEINTERLACE);
++    }
++    m_checkedDeinterlace = true;
++  }
++
+   // we need to scale if the buffer isn't in DRM_PRIME format
+   bool need_scale = !IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && !IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format));
+ 
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+index bb88fde1f9..df17f89b96 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+@@ -56,6 +56,7 @@ protected:
+   int m_codecControlFlags = 0;
+   CDVDStreamInfo m_hints;
+   double m_DAR = 1.0;
++  bool m_checkedDeinterlace = false;
+   AVCodecContext* m_pCodecContext = nullptr;
+   AVFrame* m_pFrame = nullptr;
+   AVFrame* m_pFilterFrame = nullptr;
+-- 
+2.34.1
+
+
+From d1ca2d8b7bf6bcf3abe5dbffabb0d03f432925ed Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 31 May 2023 14:19:20 +0100
+Subject: [PATCH 21/24] DVDVideoCodecDRMPRIME: Rework filtering code to handle
+ sw deinterlace
+
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 134 +++++++++---------
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.h   |   4 +-
+ 2 files changed, 68 insertions(+), 70 deletions(-)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index c2d1e496e0..521a4c174b 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -207,11 +207,7 @@ static const AVCodec* FindDecoder(CDVDStreamInfo& hints)
+         return codec;
+     }
+ 
+-  codec = avcodec_find_decoder(hints.codec);
+-  if (codec && (codec->capabilities & AV_CODEC_CAP_DR1) == AV_CODEC_CAP_DR1)
+-    return codec;
+-
+-  return nullptr;
++  return avcodec_find_decoder(hints.codec);
+ }
+ 
+ enum AVPixelFormat CDVDVideoCodecDRMPRIME::GetFormat(struct AVCodecContext* avctx,
+@@ -646,27 +642,33 @@ bool CDVDVideoCodecDRMPRIME::SetPictureParams(VideoPicture* pVideoPicture)
+   return true;
+ }
+ 
+-void CDVDVideoCodecDRMPRIME::FilterTest()
++void CDVDVideoCodecDRMPRIME::FilterTest(AVPixelFormat pix_fmt)
+ {
+-  const AVFilter* filter;
+-  void* opaque{};
+-
+   m_deintFilterName.clear();
+ 
+-  while ((filter = av_filter_iterate(&opaque)) != nullptr)
++  // look twice, first for DRM_PRIME support, then for actual pixel format
++  for (int i=0; i < 2; i++)
+   {
+-    std::string name(filter->name);
++    const AVFilter* filter;
++    void* opaque{};
+ 
+-    if (name.find("deinterlace") != std::string::npos)
++    while ((filter = av_filter_iterate(&opaque)) != nullptr)
+     {
+-      bool ret = FilterOpen(name, false, true);
+-      FilterClose();
+-      if (ret)
++      std::string name(filter->name);
++
++      if (name.find(i == 0 ? "deinterlace" : "bwdif") != std::string::npos)
+       {
+-        m_deintFilterName = name;
+-        CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - found deinterlacing filter {}",
+-                  __FUNCTION__, name);
+-        return;
++        bool ret = FilterOpen(name, pix_fmt, true);
++        FilterClose();
++        if (ret)
++        {
++          m_deintFilterName = name;
++          if (name == "bwdif" || name == "yadif")
++            m_deintFilterName += "=1:-1:1";
++          CLog::Log(LOGDEBUG, "CDVDVideoCodecDRMPRIME::{} - found deinterlacing filter {}",
++                    __FUNCTION__, name);
++          return;
++        }
+       }
+     }
+   }
+@@ -692,14 +694,17 @@ AVFrame *CDVDVideoCodecDRMPRIME::alloc_filter_frame(AVFilterContext * ctx, void
+   return frame;
+ }
+ 
+-bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale, bool test)
++bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, AVPixelFormat pix_fmt, bool test)
+ {
+   int result;
+ 
++  if (filters.find("deinterlace") != std::string::npos && pix_fmt == AV_PIX_FMT_YUV420P)
++     pix_fmt = AV_PIX_FMT_DRM_PRIME;
++
+   if (m_pFilterGraph)
+     FilterClose();
+ 
+-  if (filters.empty() && !scale)
++  if (filters.empty())
+     return true;
+ 
+   if (!(m_pFilterGraph = avfilter_graph_alloc()))
+@@ -710,13 +715,12 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale,
+ 
+   const AVFilter* srcFilter = avfilter_get_by_name("buffer");
+   const AVFilter* outFilter = avfilter_get_by_name("buffersink");
+-  enum AVPixelFormat pix_fmts[] = { scale ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NONE };
+ 
+   std::string args = StringUtils::Format("video_size={}x{}:pix_fmt={}:time_base={}/{}:"
+                                          "pixel_aspect={}/{}",
+                                          m_pCodecContext->width,
+                                          m_pCodecContext->height,
+-                                         scale ? m_pCodecContext->pix_fmt : AV_PIX_FMT_DRM_PRIME,
++                                         pix_fmt,
+                                          m_pCodecContext->time_base.num ?
+                                            m_pCodecContext->time_base.num : 1,
+                                          m_pCodecContext->time_base.num ?
+@@ -773,6 +777,7 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale,
+     return false;
+   }
+ 
++  enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
+   result = av_opt_set_int_list(m_pFilterOut, "pix_fmts", &pix_fmts[0],
+                                AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
+   if (result < 0)
+@@ -781,43 +786,32 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale,
+     return false;
+   }
+ 
+-  if (!filters.empty())
++  if ((result = av_buffersink_set_alloc_video_frame(m_pFilterOut, alloc_filter_frame, static_cast<void*>(this))) < 0)
+   {
+-    AVFilterInOut* outputs = avfilter_inout_alloc();
+-    AVFilterInOut* inputs  = avfilter_inout_alloc();
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersink_set_alloc_video_frame = {}", result);
++    return result;
++  }
++  AVFilterInOut* outputs = avfilter_inout_alloc();
++  AVFilterInOut* inputs  = avfilter_inout_alloc();
+ 
+-    outputs->name = av_strdup("in");
+-    outputs->filter_ctx = m_pFilterIn;
+-    outputs->pad_idx = 0;
+-    outputs->next = nullptr;
++  outputs->name = av_strdup("in");
++  outputs->filter_ctx = m_pFilterIn;
++  outputs->pad_idx = 0;
++  outputs->next = nullptr;
+ 
+-    inputs->name = av_strdup("out");
+-    inputs->filter_ctx = m_pFilterOut;
+-    inputs->pad_idx = 0;
+-    inputs->next = nullptr;
++  inputs->name = av_strdup("out");
++  inputs->filter_ctx = m_pFilterOut;
++  inputs->pad_idx = 0;
++  inputs->next = nullptr;
+ 
+-    result = avfilter_graph_parse_ptr(m_pFilterGraph, filters.c_str(), &inputs, &outputs, NULL);
+-    avfilter_inout_free(&outputs);
+-    avfilter_inout_free(&inputs);
++  result = avfilter_graph_parse_ptr(m_pFilterGraph, filters.c_str(), &inputs, &outputs, NULL);
++  avfilter_inout_free(&outputs);
++  avfilter_inout_free(&inputs);
+ 
+-    if (result < 0)
+-    {
+-      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
+-      return false;
+-    }
+-  }
+-  else
++  if (result < 0)
+   {
+-    if ((result = av_buffersink_set_alloc_video_frame(m_pFilterOut, alloc_filter_frame, static_cast<void*>(this))) < 0)
+-    {
+-      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - av_buffersink_set_alloc_video_frame = {}", result);
+-      return result;
+-    }
+-    if ((result = avfilter_link(m_pFilterIn, 0, m_pFilterOut, 0)) < 0)
+-    {
+-      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_link");
+-      return false;
+-    }
++    CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::FilterOpen - avfilter_graph_parse");
++    return false;
+   }
+ 
+   if ((result = avfilter_graph_config(m_pFilterGraph,  nullptr)) < 0)
+@@ -832,8 +826,6 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, bool scale,
+   if (test)
+     return true;
+ 
+-  m_processInfo.SetVideoDeintMethod(filters);
+-
+   if (CServiceBroker::GetLogging().CanLogComponent(LOGVIDEO))
+   {
+     char* graphDump = avfilter_graph_dump(m_pFilterGraph, nullptr);
+@@ -865,8 +857,8 @@ void CDVDVideoCodecDRMPRIME::FilterClose()
+ 
+ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+ {
+-  // sw decoded buffers need cache flush and for descripter to be set
+-  if (!IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)))
++  // sw decoded buffers submitted to hw decoder need cache flush and for descripter to be set
++  if (m_pFrame->format != AV_PIX_FMT_DRM_PRIME && m_pFilterGraph && m_pFilterIn->outputs[0]->format == AV_PIX_FMT_DRM_PRIME)
+   {
+     CVideoBufferDMA* buffer = static_cast<CVideoBufferDMA*>(av_buffer_get_opaque(m_pFrame->buf[0]));
+     buffer->SetDimensions(m_pFrame->width, m_pFrame->height);
+@@ -976,9 +968,10 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+     return VC_ERROR;
+   }
+ 
++  AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(m_pFrame->format);
+   if (!m_checkedDeinterlace)
+   {
+-    FilterTest();
++    FilterTest(pix_fmt);
+ 
+     if (!m_deintFilterName.empty())
+     {
+@@ -990,28 +983,33 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::GetPicture(VideoPicture* pVideo
+     m_checkedDeinterlace = true;
+   }
+ 
+-  // we need to scale if the buffer isn't in DRM_PRIME format
+-  bool need_scale = !IsSupportedSwFormat(static_cast<AVPixelFormat>(m_pFrame->format)) && !IsSupportedHwFormat(static_cast<AVPixelFormat>(m_pFrame->format));
+-
+   if (!m_processInfo.GetVideoInterlaced() && m_pFrame->interlaced_frame)
+     m_processInfo.SetVideoInterlaced(true);
+ 
+   std::string filterChain = GetFilterChain(m_pFrame->interlaced_frame);
+-  if (!filterChain.empty() || need_scale)
++
++  // we need to scale if the buffer isn't in DRM_PRIME format
++  if (!IsSupportedSwFormat(pix_fmt) && !IsSupportedHwFormat(pix_fmt))
++    filterChain = "scale";
++  // we need to copy if the buffer wasn't allocated by us
++  else if (!IsSupportedHwFormat(pix_fmt) && !(m_pCodecContext->codec->capabilities & AV_CODEC_CAP_DR1))
++    filterChain = "copy";
++
++  if (!filterChain.empty())
+   {
+-    bool reopenFilter = false;
+-    if (m_filters != filterChain)
+-      reopenFilter = true;
++    bool reopenFilter = m_filters != filterChain;
+ 
+     if (m_pFilterGraph &&
+         (m_pFilterIn->outputs[0]->w != m_pFrame->width ||
+          m_pFilterIn->outputs[0]->h != m_pFrame->height))
+       reopenFilter = true;
+ 
+-    if (reopenFilter || (need_scale && m_pFilterGraph == nullptr))
++    if (reopenFilter)
+     {
+       m_filters = filterChain;
+-      if (!FilterOpen(filterChain, need_scale, false))
++      m_processInfo.SetVideoDeintMethod(m_filters);
++
++      if (!FilterOpen(filterChain, pix_fmt, false))
+         FilterClose();
+     }
+ 
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+index df17f89b96..55675c3c2e 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h
+@@ -45,9 +45,9 @@ protected:
+   static enum AVPixelFormat GetFormat(struct AVCodecContext* avctx, const enum AVPixelFormat* fmt);
+   static int GetBuffer(struct AVCodecContext* avctx, AVFrame* frame, int flags);
+   static AVFrame *alloc_filter_frame(AVFilterContext * ctx, void * v, int w, int h);
+-  bool FilterOpen(const std::string& filters, bool scale, bool test);
++  bool FilterOpen(const std::string& filters, AVPixelFormat pix_fmt, bool test);
+   void FilterClose();
+-  void FilterTest();
++  void FilterTest(AVPixelFormat pix_fmt);
+   std::string GetFilterChain(bool interlaced);
+ 
+   std::string m_name;
+-- 
+2.34.1
+
+
+From 5c5a019635595b296c0f52783e59a8fb85ee9694 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Tue, 20 Jun 2023 15:13:09 +0100
+Subject: [PATCH 22/24] CDVDVideoCodecDRMPRIME: Support decoding to DRMPRIME
+ with sw deinterlace
+
+We can map a YUV style DRM_PRIME buffer back to AV_PIX_FMT_YUV420P
+to allow subsquent sw deinterlace
+---
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp | 22 +++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 521a4c174b..326d33e8a0 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -701,6 +701,9 @@ bool CDVDVideoCodecDRMPRIME::FilterOpen(const std::string& filters, AVPixelForma
+   if (filters.find("deinterlace") != std::string::npos && pix_fmt == AV_PIX_FMT_YUV420P)
+      pix_fmt = AV_PIX_FMT_DRM_PRIME;
+ 
++  if (filters.find("bwdif") != std::string::npos && pix_fmt == AV_PIX_FMT_DRM_PRIME)
++     pix_fmt = AV_PIX_FMT_YUV420P;
++
+   if (m_pFilterGraph)
+     FilterClose();
+ 
+@@ -867,6 +870,25 @@ CDVDVideoCodec::VCReturn CDVDVideoCodecDRMPRIME::ProcessFilterIn()
+     m_pFrame->data[0] = reinterpret_cast<uint8_t*>(descriptor);
+     m_pFrame->format = AV_PIX_FMT_DRM_PRIME;
+   }
++  // hw decoded buffers submitted to sw decoder need mapping of planes for cpu to access
++  else if (m_pFrame->format == AV_PIX_FMT_DRM_PRIME && m_pFilterGraph && m_pFilterIn->outputs[0]->format == AV_PIX_FMT_YUV420P)
++  {
++    AVFrame *frame = av_frame_alloc();
++    frame->width = m_pFrame->width;
++    frame->height = m_pFrame->height;
++    frame->format = AV_PIX_FMT_YUV420P;
++    int ret = av_hwframe_map(frame, m_pFrame, (int)AV_HWFRAME_MAP_READ);
++    if (ret < 0)
++    {
++      char err[AV_ERROR_MAX_STRING_SIZE] = {};
++      av_strerror(ret, err, AV_ERROR_MAX_STRING_SIZE);
++      CLog::Log(LOGERROR, "CDVDVideoCodecDRMPRIME::{} - av_hwframe_map failed: {} ({})",
++                __FUNCTION__, err, ret);
++      return VC_ERROR;
++    }
++    av_frame_unref(m_pFrame);
++    av_frame_move_ref(m_pFrame, frame);
++  }
+ 
+   int ret = av_buffersrc_add_frame(m_pFilterIn, m_pFrame);
+   if (ret < 0)
+-- 
+2.34.1
+
+
+From 56510bc7aac6c7b4a88bb952fbfabbbbe30df455 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Tue, 20 Jun 2023 15:14:02 +0100
+Subject: [PATCH 23/24] DVDVideoCodecDRMPRIME: Request v4l2 buffers be
+ allocated through cache
+
+This is an optional request, but will improve performance of sw deinterlace
+if supported.
+---
+ .../VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp     | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 326d33e8a0..8c0d37bf59 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -367,6 +367,10 @@ bool CDVDVideoCodecDRMPRIME::Open(CDVDStreamInfo& hints, CDVDCodecOptions& optio
+   for (auto&& option : options.m_keys)
+     av_opt_set(m_pCodecContext, option.m_name.c_str(), option.m_value.c_str(), 0);
+ 
++  // this requests v4l2 buffers are allocated through cache. It will work if this is not supported,
++  // but subsequent operations like deinterlace may be less efficient
++  av_opt_set(m_pCodecContext->priv_data, "dmabuf_alloc", "cma", 0);
++
+   if (avcodec_open2(m_pCodecContext, pCodec, nullptr) < 0)
+   {
+     CLog::Log(LOGINFO, "CDVDVideoCodecDRMPRIME::{} - unable to open codec", __FUNCTION__);
+-- 
+2.34.1
+
+
+From a77c833ea78ab14259b298e60b86205a98099f87 Mon Sep 17 00:00:00 2001
+From: Dom Cobley <popcornmix@gmail.com>
+Date: Wed, 21 Jun 2023 13:16:01 +0100
+Subject: [PATCH 24/24] DVDVideoCodecDRMPRIME: Add setting to enable hw
+ deinterlace
+
+HW deinterlace has lower cpu, but may have higher quality,
+so allow user to choose appropriate setting.
+---
+ .../resource.language.en_gb/resources/strings.po | 11 +++++++++++
+ system/settings/linux.xml                        | 12 ++++++++++++
+ .../DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp    | 16 +++++++++++++++-
+ xbmc/settings/Settings.h                         |  1 +
+ 4 files changed, 39 insertions(+), 1 deletion(-)
+
+diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po
+index 9fb3dcb2b0..c2d0f4352a 100644
+--- a/addons/resource.language.en_gb/resources/strings.po
++++ b/addons/resource.language.en_gb/resources/strings.po
+@@ -7363,6 +7363,11 @@ msgctxt "#13438"
+ msgid "Allow hardware acceleration with DRM PRIME"
+ msgstr ""
+ 
++#: system/settings/settings.xml
++msgctxt "#13500"
++msgid "Allow hardware deinterlace with DRM PRIME"
++msgstr ""
++
+ #: system/settings/settings.xml
+ msgctxt "#13439"
+ msgid "Allow hardware acceleration - MediaCodec"
+@@ -19550,6 +19555,12 @@ msgctxt "#36172"
+ msgid "Enable PRIME decoding of video files"
+ msgstr ""
+ 
++#. Description of setting with label #13500 "Allow hardware deinterlace - PRIME"
++#: system/settings/settings.xml
++msgctxt "#36290"
++msgid "Enable PRIME hardware deinterlace of video files"
++msgstr ""
++
+ #. Description of setting with label #14109 "Short date format"
+ #: system/settings/settings.xml
+ msgctxt "#36173"
+diff --git a/system/settings/linux.xml b/system/settings/linux.xml
+index 89b91db23b..4cdb0982af 100644
+--- a/system/settings/linux.xml
++++ b/system/settings/linux.xml
+@@ -180,6 +180,18 @@
+           <default>true</default>
+           <control type="toggle" />
+         </setting>
++        <setting id="videoplayer.primeallowhwdeinterlace" type="boolean" parent="videoplayer.useprimedecoder" label="13500" help="36290">
++          <requirement>HAS_GLES</requirement>
++          <visible>false</visible>
++          <dependencies>
++            <dependency type="enable">
++              <condition setting="videoplayer.useprimedecoder" operator="is">true</condition>
++            </dependency>
++          </dependencies>
++          <level>3</level>
++          <default>true</default>
++          <control type="toggle" />
++        </setting>
+         <setting id="videoplayer.useprimerenderer" type="integer" label="13462" help="13463">
+           <requirement>HAS_GLES</requirement>
+           <visible>false</visible>
+diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+index 8c0d37bf59..141f08d4fb 100644
+--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.cpp
+@@ -41,6 +41,7 @@ namespace
+ {
+ 
+ constexpr const char* SETTING_VIDEOPLAYER_USEPRIMEDECODERFORHW{"videoplayer.useprimedecoderforhw"};
++constexpr const char* SETTING_VIDEOPLAYER_ALLOWHWDEINTERLACE{"videoplayer.primeallowhwdeinterlace"};
+ 
+ static void ReleaseBuffer(void* opaque, uint8_t* data)
+ {
+@@ -149,6 +150,15 @@ void CDVDVideoCodecDRMPRIME::Register()
+ 
+   setting->SetVisible(true);
+ 
++  setting = settings->GetSetting(SETTING_VIDEOPLAYER_ALLOWHWDEINTERLACE);
++  if (!setting)
++  {
++    CLog::Log(LOGERROR, "Failed to load setting for: {}", SETTING_VIDEOPLAYER_ALLOWHWDEINTERLACE);
++    return;
++  }
++
++  setting->SetVisible(true);
++
+   CDVDFactoryCodec::RegisterHWVideoCodec("drm_prime", CDVDVideoCodecDRMPRIME::Create);
+ }
+ 
+@@ -651,7 +661,11 @@ void CDVDVideoCodecDRMPRIME::FilterTest(AVPixelFormat pix_fmt)
+   m_deintFilterName.clear();
+ 
+   // look twice, first for DRM_PRIME support, then for actual pixel format
+-  for (int i=0; i < 2; i++)
++
++  bool hw = CServiceBroker::GetSettingsComponent()->GetSettings()->GetBool(
++      SETTING_VIDEOPLAYER_ALLOWHWDEINTERLACE);
++
++  for (int i = hw ? 0 : 1; i < 2; i++)
+   {
+     const AVFilter* filter;
+     void* opaque{};
+diff --git a/xbmc/settings/Settings.h b/xbmc/settings/Settings.h
+index bfc5e6072c..5e7ea6ff9e 100644
+--- a/xbmc/settings/Settings.h
++++ b/xbmc/settings/Settings.h
+@@ -123,6 +123,7 @@ public:
+   static constexpr auto SETTING_VIDEOPLAYER_USEMEDIACODEC = "videoplayer.usemediacodec";
+   static constexpr auto SETTING_VIDEOPLAYER_USEMEDIACODECSURFACE =
+       "videoplayer.usemediacodecsurface";
++  static constexpr auto SETTING_VIDEOPLAYER_ALLOWHWDEINTERLACE = "videoplayer.primeallowhwdeinterlace";
+   static constexpr auto SETTING_VIDEOPLAYER_USEVDPAU = "videoplayer.usevdpau";
+   static constexpr auto SETTING_VIDEOPLAYER_USEVDPAUMIXER = "videoplayer.usevdpaumixer";
+   static constexpr auto SETTING_VIDEOPLAYER_USEVDPAUMPEG2 = "videoplayer.usevdpaumpeg2";
+-- 
+2.34.1
+
diff --git a/hosts/raspberry-pi5/raspberry-pi5.nix b/hosts/raspberry-pi5/raspberry-pi5.nix
index 0909be3..9387774 100644
--- a/hosts/raspberry-pi5/raspberry-pi5.nix
+++ b/hosts/raspberry-pi5/raspberry-pi5.nix
@@ -6,13 +6,6 @@
   inherit (lib.lists) flatten forEach;
   inherit (lib.modules) mkForce;
 in {
-  nixpkgs.overlays = [
-    (_final: prev: {
-      makeModulesClosure = x:
-        prev.makeModulesClosure (x // {allowMissing = true;});
-    })
-  ];
-
   imports = flatten (with tree; [
     users.root
     users.chaos
diff --git a/outputs.nix b/outputs.nix
index 4843c92..31b899d 100644
--- a/outputs.nix
+++ b/outputs.nix
@@ -51,6 +51,8 @@ in
             inherit (pkgs) kitty-terminfo;
             inherit (pkgs) linux_rpi5 raspberrypifw raspberrypiWirelessFirmware raspberrypi-utils;
             inherit (pkgs) widevine-aarch64-4k widevine-aarch64-16k;
+            inherit (pkgs) ffmpeg-pi;
+
             inherit (inputs.home-manager-unstable.packages."${system}") home-manager;
           };
         }