linux/drivers/gpu/drm/v3d/v3d_sched.c
Linus Torvalds 0c86b42439 drm for 6.15-rc1
uapi:
 - add mediatek tiled fourcc
 - add support for notifying userspace on device wedged
 
 new driver:
 - appletbdrm: support for Apple Touchbar displays on m1/m2
 - nova-core: skeleton rust driver to develop nova inside off
 
 firmware:
 - add some rust firmware pieces
 
 rust:
 - add 'LocalModule' type alias
 
 component:
 - add helper to query bound status
 
 fbdev:
 - fbtft: remove access to page->index
 
 media:
 - cec: tda998x: import driver from drm
 
 dma-buf:
 - add fast path for single fence merging
 
 tests:
 - fix lockdep warnings
 
 atomic:
 - allow full modeset on connector changes
 - clarify semantics of allow_modeset and drm_atomic_helper_check
 - async-flip: support on arbitary planes
 - writeback: fix UAF
 - Document atomic-state history
 
 format-helper:
 - support ARGB8888 to ARGB4444 conversions
 
 buddy:
 - fix multi-root cleanup
 
 ci:
 - update IGT
 
 dp:
 - support extended wake timeout
 - mst: fix RAD to string conversion
 - increase DPCD eDP control CAP size to 5 bytes
 - add DPCD eDP v1.5 definition
 - add helpers for LTTPR transparent mode
 
 panic:
 - encode QR code according to Fido 2.2
 
 scheduler:
 - add parameter struct for init
 - improve job peek/pop operations
 - optimise drm_sched_job struct layout
 
 ttm:
 - refactor pool allocation
 - add helpers for TTM shrinker
 
 panel-orientation:
 - add a bunch of new quirks
 
 panel:
 - convert panels to multi-style functions
 - edp: Add support for B140UAN04.4, BOE NV140FHM-NZ, CSW MNB601LS1-3,
   LG LP079QX1-SP0V, MNE007QS3-7, STA 116QHD024002, Starry 116KHD024006,
   Lenovo T14s Gen6 Snapdragon
 - himax-hx83102: Add support for CSOT PNA957QT1-1, Kingdisplay
   kd110n11-51ie, Starry 2082109qfh040022-50e
 - visionox-r66451: use multi-style MIPI-DSI functions
 - raydium-rm67200: Add driver for Raydium RM67200
 - simple: Add support for BOE AV123Z7M-N17, BOE AV123Z7M-N17
 - sony-td4353-jdi: Use MIPI-DSI multi-func interface
 - summit: Add driver for Apple Summit display panel
 - visionox-rm692e5: Add driver for Visionox RM692E5
 
 bridge:
 - pass full atomic state to various callbacks
 - adv7511: Report correct capabilities
 - it6505: Fix HDCP V compare
 - snd65dsi86: fix device IDs
 - nwl-dsi: set bridge type
 - ti-sn65si83: add error recovery and set bridge type
 - synopsys: add HDMI audio support
 
 xe:
 - support device-wedged event
 - add mmap support for PCI memory barrier
 - perf pmu integration and expose per-engien activity
 - add EU stall sampling support
 - GPU SVM and Xe SVM implementation
 - use TTM shrinker
 - add survivability mode to allow the driver to do
   firmware updates in critical failure states
 - PXP HWDRM support for MTL and LNL
 - expose package/vram temps over hwmon
 - enable DP tunneling
 - drop mmio_ext abstraction
 - Reject BO evcition if BO is bound to current VM
 - Xe suballocator improvements
 - re-use display vmas when possible
 - add GuC Buffer Cache abstraction
 - PCI ID update for Panther Lake and Battlemage
 - Enable SRIOV for Panther Lake
 - Refactor VRAM manager location
 
 i915:
 - enable extends wake timeout
 - support device-wedged event
 - Enable DP 128b/132b SST DSC
 - FBC dirty rectangle support for display version 30+
 - convert i915/xe to drm client setup
 - Compute HDMI PLLS for rates not in fixed tables
 - Allow DSB usage when PSR is enabled on LNL+
 - Enable panel replay without full modeset
 - Enable async flips with compressed buffers on ICL+
 - support luminance based brightness via DPCD for eDP
 - enable VRR enable/disable without full modeset
 - allow GuC SLPC default strategies on MTL+ for performance
 - lots of display refactoring in move to struct intel_display
 
 amdgpu:
 - add device wedged event
 - support async page flips on overlay planes
 - enable broadcast RGB drm property
 - add info ioctl for virt mode
 - OEM i2c support for RGB lights
 - GC 11.5.2 + 11.5.3 support
 - SDMA 6.1.3 support
 - NBIO 7.9.1 + 7.11.2 support
 - MMHUB 1.8.1 + 3.3.2 support
 - DCN 3.6.0 support
 - Add dynamic workload profile switching for GC 10-12
 - support larger VBIOS sizes
 - Mark gttsize parameters as deprecated
 - Initial JPEG queue resset support
 
 amdkfd:
 - add KFD per process flags for setting precision
 - sync pasid values between KGD and KFD
 - improve GTT/VRAM handling for APUs
 - fix user queue validation on GC7/8
 - SDMA queue reset support
 
 raedeon:
 - rs400 hyperz fix
 
 i2c:
 - td998x: drop platform_data, split driver into media and bridge
 
 ast:
 - transmitter chip detection refactoring
 - vbios display mode refactoring
 - astdp: fix connection status and filter unsupported modes
 - cursor handling refactoring
 
 imagination:
 - check job dependencies with sched helper
 
 ivpu:
 - improve command queue handling
 - use workqueue for IRQ handling
 - add support HW fault injection
 - locking fixes
 
 mgag200:
 - add support for G200eH5
 
 msm:
 - dpu: add concurrent writeback support for DPU 10.x+
 - use LTTPR helpers
 - GPU:
   - Fix obscure GMU suspend failure
   - Expose syncobj timeline support
   - Extend GPU devcoredump with pagetable info
   - a623 support
   - Fix a6xx gen1/gen2 indexed-register blocks in gpu snapshot / devcoredump
 - Display:
   - Add cpu-cfg interconnect paths on SM8560 and SM8650
   - Introduce KMS OMMU fault handler, causing devcoredump snapshot
   - Fixed error pointer dereference in msm_kms_init_aspace()
 - DPU:
   - Fix mode_changing handling
   - Add writeback support on SM6150 (QCS615)
   - Fix DSC programming in 1:1:1 topology
   - Reworked hardware resource allocation, moving it to the CRTC code
   - Enabled support for Concurrent WriteBack (CWB) on SM8650
   - Enabled CDM blocks on all relevant platforms
   - Reworked debugfs interface for BW/clocks debugging
   - Clear perf params before calculating bw
   - Support YUV formats on writeback
   - Fixed double inclusion
   - Fixed writeback in YUV formats when using cloned output, Dropped
     wb2_formats_rgb
   - Corrected dpu_crtc_check_mode_changed and struct dpu_encoder_virt
     kerneldocs
   - Fixed uninitialized variable in dpu_crtc_kickoff_clone_mode()
 - DSI:
   - DSC-related fixes
   - Rework clock programming
 - DSI PHY:
   - Fix 7nm (and lower) PHY programming
   - Add proper DT schema definitions for DSI PHY clocks
 - HDMI:
   - Rework the driver, enabling the use of the HDMI Connector framework
 - Bindings:
   - Added eDP PHY on SA8775P
 
 nouveau:
 - move drm_slave_encoder interface into driver
 - nvkm: refactor GSP RPC
 - use LTTPR helpers
 
 mediatek:
 - HDMI fixup and refinement
 - add MT8188 dsc compatible
 - MT8365 SoC support
 
 panthor:
 - Expose sizes of intenral BOs via fdinfo
 - Fix race between reset and suspend
 - Improve locking
 
 qaic:
 - Add support for AIC200
 
 renesas:
 - Fix limits in DT bindings
 
 rockchip:
 - support rk3562-mali
 - rk3576: Add HDMI support
 - vop2: Add new display modes on RK3588 HDMI0 up to 4K
 - Don't change HDMI reference clock rate
 - Fix DT bindings
 - analogix_dp: add eDP support
 - fix shutodnw
 
 solomon:
 - Set SPI device table to silence warnings
 - Fix pixel and scanline encoding
 
 v3d:
 - handle clock
 
 vc4:
 - Use drm_exec
 - Use dma-resv for wait-BO ioctl
 - Remove seqno infrastructure
 
 virtgpu:
 - Support partial mappings of GEM objects
 - Reserve VGA resources during initialization
 - Fix UAF in virtgpu_dma_buf_free_obj()
 - Add panic support
 
 vkms:
 - Switch to a managed modesetting pipeline
 - Add support for ARGB8888
 - fix UAf
 
 xlnx:
 - Set correct DMA segment size
 - use mutex guards
 - Fix error handling
 - Fix docs
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmfmA2cACgkQDHTzWXnE
 hr7lKQ/+I7gmln3ka8FyKnpwG5KusDpxz3OzgUKHpzOTkXL1+vPMt0vjCKRJKE3D
 zfpUTWNbwlVN0krqmUGyFIeCt8wmevBF6HvQ+GsWbiWltUj3xIlnkV0TVH84XTUo
 0evrXNG9K8sLjMKjrf7yrGL53Ayoaq9IO9wtOws+FCgtykAsMR/IWLrYLpj21ZQ6
 Oclhq5Cz21WRoQpzySR23s3DLi4LHri26RGKbGNh2PzxYwyP/euGW6O+ncEduNmg
 vQLgUfptaM/EubJFG6jxDWZJ2ChIAUUxQuhZwt7DKqRsYIcJKcfDSUzqL95t6SYU
 zewlYmeslvusoesCeTJzHaUj34yqJGsvjFPsHFUEvAy8BVncsqS40D6mhrRMo5nD
 chnSJu+IpDOEEqcdbb1J73zKLw6X3GROM8qUQEThJBD2yTsOdw9d0HXekMDMBXSi
 NayKvXfsBV19rI74FYnRCzHt8BVVANh5qJNnR5RcnPZ2KzHQbV0JFOA9YhxE8vFU
 GWkFWKlpAQUQ+yoTy1kuO9dcUxLIC7QseMeS5BYhcJBMEV78xQuRYRxgsL8YS4Yg
 rIhcb3mZwMFj7jBAqfpLKWiI+GTup+P9vcz7Bvm5iIf8gEhZLUTwqBeAYXQkcWd4
 i1AqDuFR0//sAgODoeU2sg1Q3yTL/i/DhcwvCIPgcDZ/x4Eg868=
 =oK/N
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel

Pull drm updates from Dave Airlie:
 "Outside of drm there are some rust patches from Danilo who maintains
  that area in here, and some pieces for drm header check tests.

  The major things in here are a new driver supporting the touchbar
  displays on M1/M2, the nova-core stub driver which is just the vehicle
  for adding rust abstractions and start developing a real driver inside
  of.

  xe adds support for SVM with a non-driver specific SVM core
  abstraction that will hopefully be useful for other drivers, along
  with support for shrinking for TTM devices. I'm sure xe and AMD
  support new devices, but the pipeline depth on these things is hard to
  know what they end up being in the marketplace!

  uapi:
   - add mediatek tiled fourcc
   - add support for notifying userspace on device wedged

  new driver:
   - appletbdrm: support for Apple Touchbar displays on m1/m2
   - nova-core: skeleton rust driver to develop nova inside off

  firmware:
   - add some rust firmware pieces

  rust:
   - add 'LocalModule' type alias

  component:
   - add helper to query bound status

  fbdev:
   - fbtft: remove access to page->index

  media:
   - cec: tda998x: import driver from drm

  dma-buf:
   - add fast path for single fence merging

  tests:
   - fix lockdep warnings

  atomic:
   - allow full modeset on connector changes
   - clarify semantics of allow_modeset and drm_atomic_helper_check
   - async-flip: support on arbitary planes
   - writeback: fix UAF
   - Document atomic-state history

  format-helper:
   - support ARGB8888 to ARGB4444 conversions

  buddy:
   - fix multi-root cleanup

  ci:
   - update IGT

  dp:
   - support extended wake timeout
   - mst: fix RAD to string conversion
   - increase DPCD eDP control CAP size to 5 bytes
   - add DPCD eDP v1.5 definition
   - add helpers for LTTPR transparent mode

  panic:
   - encode QR code according to Fido 2.2

  scheduler:
   - add parameter struct for init
   - improve job peek/pop operations
   - optimise drm_sched_job struct layout

  ttm:
   - refactor pool allocation
   - add helpers for TTM shrinker

  panel-orientation:
   - add a bunch of new quirks

  panel:
   - convert panels to multi-style functions
   - edp: Add support for B140UAN04.4, BOE NV140FHM-NZ, CSW MNB601LS1-3,
     LG LP079QX1-SP0V, MNE007QS3-7, STA 116QHD024002, Starry
     116KHD024006, Lenovo T14s Gen6 Snapdragon
   - himax-hx83102: Add support for CSOT PNA957QT1-1, Kingdisplay
     kd110n11-51ie, Starry 2082109qfh040022-50e
   - visionox-r66451: use multi-style MIPI-DSI functions
   - raydium-rm67200: Add driver for Raydium RM67200
   - simple: Add support for BOE AV123Z7M-N17, BOE AV123Z7M-N17
   - sony-td4353-jdi: Use MIPI-DSI multi-func interface
   - summit: Add driver for Apple Summit display panel
   - visionox-rm692e5: Add driver for Visionox RM692E5

  bridge:
   - pass full atomic state to various callbacks
   - adv7511: Report correct capabilities
   - it6505: Fix HDCP V compare
   - snd65dsi86: fix device IDs
   - nwl-dsi: set bridge type
   - ti-sn65si83: add error recovery and set bridge type
   - synopsys: add HDMI audio support

  xe:
   - support device-wedged event
   - add mmap support for PCI memory barrier
   - perf pmu integration and expose per-engien activity
   - add EU stall sampling support
   - GPU SVM and Xe SVM implementation
   - use TTM shrinker
   - add survivability mode to allow the driver to do firmware updates
     in critical failure states
   - PXP HWDRM support for MTL and LNL
   - expose package/vram temps over hwmon
   - enable DP tunneling
   - drop mmio_ext abstraction
   - Reject BO evcition if BO is bound to current VM
   - Xe suballocator improvements
   - re-use display vmas when possible
   - add GuC Buffer Cache abstraction
   - PCI ID update for Panther Lake and Battlemage
   - Enable SRIOV for Panther Lake
   - Refactor VRAM manager location

  i915:
   - enable extends wake timeout
   - support device-wedged event
   - Enable DP 128b/132b SST DSC
   - FBC dirty rectangle support for display version 30+
   - convert i915/xe to drm client setup
   - Compute HDMI PLLS for rates not in fixed tables
   - Allow DSB usage when PSR is enabled on LNL+
   - Enable panel replay without full modeset
   - Enable async flips with compressed buffers on ICL+
   - support luminance based brightness via DPCD for eDP
   - enable VRR enable/disable without full modeset
   - allow GuC SLPC default strategies on MTL+ for performance
   - lots of display refactoring in move to struct intel_display

  amdgpu:
   - add device wedged event
   - support async page flips on overlay planes
   - enable broadcast RGB drm property
   - add info ioctl for virt mode
   - OEM i2c support for RGB lights
   - GC 11.5.2 + 11.5.3 support
   - SDMA 6.1.3 support
   - NBIO 7.9.1 + 7.11.2 support
   - MMHUB 1.8.1 + 3.3.2 support
   - DCN 3.6.0 support
   - Add dynamic workload profile switching for GC 10-12
   - support larger VBIOS sizes
   - Mark gttsize parameters as deprecated
   - Initial JPEG queue resset support

  amdkfd:
   - add KFD per process flags for setting precision
   - sync pasid values between KGD and KFD
   - improve GTT/VRAM handling for APUs
   - fix user queue validation on GC7/8
   - SDMA queue reset support

  raedeon:
   - rs400 hyperz fix

  i2c:
   - td998x: drop platform_data, split driver into media and bridge

  ast:
   - transmitter chip detection refactoring
   - vbios display mode refactoring
   - astdp: fix connection status and filter unsupported modes
   - cursor handling refactoring

  imagination:
   - check job dependencies with sched helper

  ivpu:
   - improve command queue handling
   - use workqueue for IRQ handling
   - add support HW fault injection
   - locking fixes

  mgag200:
   - add support for G200eH5

  msm:
   - dpu: add concurrent writeback support for DPU 10.x+
   - use LTTPR helpers
   - GPU:
     - Fix obscure GMU suspend failure
     - Expose syncobj timeline support
     - Extend GPU devcoredump with pagetable info
     - a623 support
     - Fix a6xx gen1/gen2 indexed-register blocks in gpu snapshot /
       devcoredump
   - Display:
     - Add cpu-cfg interconnect paths on SM8560 and SM8650
     - Introduce KMS OMMU fault handler, causing devcoredump snapshot
     - Fixed error pointer dereference in msm_kms_init_aspace()
   - DPU:
     - Fix mode_changing handling
     - Add writeback support on SM6150 (QCS615)
     - Fix DSC programming in 1:1:1 topology
     - Reworked hardware resource allocation, moving it to the CRTC code
     - Enabled support for Concurrent WriteBack (CWB) on SM8650
     - Enabled CDM blocks on all relevant platforms
     - Reworked debugfs interface for BW/clocks debugging
     - Clear perf params before calculating bw
     - Support YUV formats on writeback
     - Fixed double inclusion
     - Fixed writeback in YUV formats when using cloned output, Dropped
       wb2_formats_rgb
     - Corrected dpu_crtc_check_mode_changed and struct dpu_encoder_virt
       kerneldocs
     - Fixed uninitialized variable in dpu_crtc_kickoff_clone_mode()
   - DSI:
     - DSC-related fixes
     - Rework clock programming
   - DSI PHY:
     - Fix 7nm (and lower) PHY programming
     - Add proper DT schema definitions for DSI PHY clocks
   - HDMI:
     - Rework the driver, enabling the use of the HDMI Connector
       framework
   - Bindings:
     - Added eDP PHY on SA8775P

  nouveau:
   - move drm_slave_encoder interface into driver
   - nvkm: refactor GSP RPC
   - use LTTPR helpers

  mediatek:
   - HDMI fixup and refinement
   - add MT8188 dsc compatible
   - MT8365 SoC support

  panthor:
   - Expose sizes of intenral BOs via fdinfo
   - Fix race between reset and suspend
   - Improve locking

  qaic:
   - Add support for AIC200

  renesas:
   - Fix limits in DT bindings

  rockchip:
   - support rk3562-mali
   - rk3576: Add HDMI support
   - vop2: Add new display modes on RK3588 HDMI0 up to 4K
   - Don't change HDMI reference clock rate
   - Fix DT bindings
   - analogix_dp: add eDP support
   - fix shutodnw

  solomon:
   - Set SPI device table to silence warnings
   - Fix pixel and scanline encoding

  v3d:
   - handle clock

  vc4:
   - Use drm_exec
   - Use dma-resv for wait-BO ioctl
   - Remove seqno infrastructure

  virtgpu:
   - Support partial mappings of GEM objects
   - Reserve VGA resources during initialization
   - Fix UAF in virtgpu_dma_buf_free_obj()
   - Add panic support

  vkms:
   - Switch to a managed modesetting pipeline
   - Add support for ARGB8888
   - fix UAf

  xlnx:
   - Set correct DMA segment size
   - use mutex guards
   - Fix error handling
   - Fix docs"

* tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel: (1762 commits)
  drm/amd/pm: Update feature list for smu_v13_0_6
  drm/amdgpu: Add parameter documentation for amdgpu_sync_fence
  drm/amdgpu/discovery: optionally use fw based ip discovery
  drm/amdgpu/discovery: use specific ip_discovery.bin for legacy asics
  drm/amdgpu/discovery: check ip_discovery fw file available
  drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
  drm/amd/pm: Remove unnecessay UQ10 to UINT conversion
  drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA
  drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush
  drm/amd/amdgpu: Increase max rings to enable SDMA page ring
  drm/amdgpu: Decode deferred error type in gfx aca bank parser
  drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5 GPUs
  drm/amdgpu/mes: clean up SDMA HQD loop
  drm/amdgpu/mes: enable compute pipes across all MEC
  drm/amdgpu/mes: drop MES 10.x leftovers
  drm/amdgpu/mes: optimize compute loop handling
  drm/amdgpu/sdma: guilty tracking is per instance
  drm/amdgpu/sdma: fix engine reset handling
  drm/amdgpu: remove invalid usage of sched.ready
  drm/amdgpu: add cleaner shader trace point
  ...
2025-03-28 17:44:52 -07:00

908 lines
24 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/* Copyright (C) 2018 Broadcom */
/**
* DOC: Broadcom V3D scheduling
*
* The shared DRM GPU scheduler is used to coordinate submitting jobs
* to the hardware. Each DRM fd (roughly a client process) gets its
* own scheduler entity, which will process jobs in order. The GPU
* scheduler will schedule the clients with a FIFO scheduling algorithm.
*
* For simplicity, and in order to keep latency low for interactive
* jobs when bulk background jobs are queued up, we submit a new job
* to the HW only when it has completed the last one, instead of
* filling up the CT[01]Q FIFOs with jobs. Similarly, we use
* `drm_sched_job_add_dependency()` to manage the dependency between bin
* and render, instead of having the clients submit jobs using the HW's
* semaphores to interlock between them.
*/
#include <linux/sched/clock.h>
#include <linux/kthread.h>
#include <drm/drm_syncobj.h>
#include "v3d_drv.h"
#include "v3d_regs.h"
#include "v3d_trace.h"
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
static struct v3d_job *
to_v3d_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_job, base);
}
static struct v3d_bin_job *
to_bin_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_bin_job, base.base);
}
static struct v3d_render_job *
to_render_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_render_job, base.base);
}
static struct v3d_tfu_job *
to_tfu_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_tfu_job, base.base);
}
static struct v3d_csd_job *
to_csd_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_csd_job, base.base);
}
static struct v3d_cpu_job *
to_cpu_job(struct drm_sched_job *sched_job)
{
return container_of(sched_job, struct v3d_cpu_job, base.base);
}
static void
v3d_sched_job_free(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
v3d_job_cleanup(job);
}
void
v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
unsigned int count)
{
if (query_info->queries) {
unsigned int i;
for (i = 0; i < count; i++)
drm_syncobj_put(query_info->queries[i].syncobj);
kvfree(query_info->queries);
}
}
void
v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
unsigned int count)
{
if (query_info->queries) {
unsigned int i;
for (i = 0; i < count; i++) {
drm_syncobj_put(query_info->queries[i].syncobj);
kvfree(query_info->queries[i].kperfmon_ids);
}
kvfree(query_info->queries);
}
}
static void
v3d_cpu_job_free(struct drm_sched_job *sched_job)
{
struct v3d_cpu_job *job = to_cpu_job(sched_job);
v3d_timestamp_query_info_free(&job->timestamp_query,
job->timestamp_query.count);
v3d_performance_query_info_free(&job->performance_query,
job->performance_query.count);
v3d_job_cleanup(&job->base);
}
static void
v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
{
struct v3d_perfmon *perfmon = v3d->global_perfmon;
if (!perfmon)
perfmon = job->perfmon;
if (perfmon == v3d->active_perfmon)
return;
if (perfmon != v3d->active_perfmon)
v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
if (perfmon && v3d->active_perfmon != perfmon)
v3d_perfmon_start(v3d, perfmon);
}
static void
v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
{
struct v3d_dev *v3d = job->v3d;
struct v3d_file_priv *file = job->file->driver_priv;
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
unsigned long flags;
/*
* We only need to disable local interrupts to appease lockdep who
* otherwise would think v3d_job_start_stats vs v3d_stats_update has an
* unsafe in-irq vs no-irq-off usage problem. This is a false positive
* because all the locks are per queue and stats type, and all jobs are
* completely one at a time serialised. More specifically:
*
* 1. Locks for GPU queues are updated from interrupt handlers under a
* spin lock and started here with preemption disabled.
*
* 2. Locks for CPU queues are updated from the worker with preemption
* disabled and equally started here with preemption disabled.
*
* Therefore both are consistent.
*
* 3. Because next job can only be queued after the previous one has
* been signaled, and locks are per queue, there is also no scope for
* the start part to race with the update part.
*/
if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_save(flags);
else
preempt_disable();
write_seqcount_begin(&local_stats->lock);
local_stats->start_ns = now;
write_seqcount_end(&local_stats->lock);
write_seqcount_begin(&global_stats->lock);
global_stats->start_ns = now;
write_seqcount_end(&global_stats->lock);
if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_restore(flags);
else
preempt_enable();
}
static void
v3d_stats_update(struct v3d_stats *stats, u64 now)
{
write_seqcount_begin(&stats->lock);
stats->enabled_ns += now - stats->start_ns;
stats->jobs_completed++;
stats->start_ns = 0;
write_seqcount_end(&stats->lock);
}
void
v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
{
struct v3d_dev *v3d = job->v3d;
struct v3d_file_priv *file = job->file->driver_priv;
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
unsigned long flags;
/* See comment in v3d_job_start_stats() */
if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_save(flags);
else
preempt_disable();
v3d_stats_update(local_stats, now);
v3d_stats_update(global_stats, now);
if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_restore(flags);
else
preempt_enable();
}
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
{
struct v3d_bin_job *job = to_bin_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
unsigned long irqflags;
if (unlikely(job->base.base.s_fence->finished.error)) {
spin_lock_irqsave(&v3d->job_lock, irqflags);
v3d->bin_job = NULL;
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
return NULL;
}
/* Lock required around bin_job update vs
* v3d_overflow_mem_work().
*/
spin_lock_irqsave(&v3d->job_lock, irqflags);
v3d->bin_job = job;
/* Clear out the overflow allocation, so we don't
* reuse the overflow attached to a previous job.
*/
V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, V3D_BIN);
if (IS_ERR(fence))
return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
job->start, job->end);
v3d_job_start_stats(&job->base, V3D_BIN);
v3d_switch_perfmon(v3d, &job->base);
/* Set the current and end address of the control list.
* Writing the end register is what starts the job.
*/
if (job->qma) {
V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
}
if (job->qts) {
V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
V3D_CLE_CT0QTS_ENABLE |
job->qts);
}
V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
return fence;
}
static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
{
struct v3d_render_job *job = to_render_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
if (unlikely(job->base.base.s_fence->finished.error)) {
v3d->render_job = NULL;
return NULL;
}
v3d->render_job = job;
/* Can we avoid this flush? We need to be careful of
* scheduling, though -- imagine job0 rendering to texture and
* job1 reading, and them being executed as bin0, bin1,
* render0, render1, so that render1's flush at bin time
* wasn't enough.
*/
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, V3D_RENDER);
if (IS_ERR(fence))
return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
job->start, job->end);
v3d_job_start_stats(&job->base, V3D_RENDER);
v3d_switch_perfmon(v3d, &job->base);
/* XXX: Set the QCFG */
/* Set the current and end address of the control list.
* Writing the end register is what starts the job.
*/
V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
return fence;
}
static struct dma_fence *
v3d_tfu_job_run(struct drm_sched_job *sched_job)
{
struct v3d_tfu_job *job = to_tfu_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
if (unlikely(job->base.base.s_fence->finished.error)) {
v3d->tfu_job = NULL;
return NULL;
}
v3d->tfu_job = job;
fence = v3d_fence_create(v3d, V3D_TFU);
if (IS_ERR(fence))
return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
v3d_job_start_stats(&job->base, V3D_TFU);
V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica);
V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua);
V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa);
if (v3d->ver >= 71)
V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc);
V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios);
V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]);
if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) {
V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]);
V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]);
V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]);
}
/* ICFG kicks off the job. */
V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC);
return fence;
}
static struct dma_fence *
v3d_csd_job_run(struct drm_sched_job *sched_job)
{
struct v3d_csd_job *job = to_csd_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
int i, csd_cfg0_reg;
if (unlikely(job->base.base.s_fence->finished.error)) {
v3d->csd_job = NULL;
return NULL;
}
v3d->csd_job = job;
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, V3D_CSD);
if (IS_ERR(fence))
return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
v3d_job_start_stats(&job->base, V3D_CSD);
v3d_switch_perfmon(v3d, &job->base);
csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver);
for (i = 1; i <= 6; i++)
V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
/* Although V3D 7.1 has an eighth configuration register, we are not
* using it. Therefore, make sure it remains unused.
*
* XXX: Set the CFG7 register
*/
if (v3d->ver >= 71)
V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0);
/* CFG0 write kicks off the job. */
V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
return fence;
}
static void
v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
{
struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd;
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
u32 *wg_counts;
v3d_get_bo_vaddr(bo);
v3d_get_bo_vaddr(indirect);
wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset);
if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0)
return;
args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
(wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
for (int i = 0; i < 3; i++) {
/* 0xffffffff indicates that the uniform rewrite is not needed */
if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) {
u32 uniform_idx = indirect_csd->wg_uniform_offsets[i];
((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i];
}
}
v3d_put_bo_vaddr(indirect);
v3d_put_bo_vaddr(bo);
}
static void
v3d_timestamp_query(struct v3d_cpu_job *job)
{
struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
u8 *value_addr;
v3d_get_bo_vaddr(bo);
for (int i = 0; i < timestamp_query->count; i++) {
value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset;
*((u64 *)value_addr) = i == 0 ? ktime_get_ns() : 0ull;
drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj,
job->base.done_fence);
}
v3d_put_bo_vaddr(bo);
}
static void
v3d_reset_timestamp_queries(struct v3d_cpu_job *job)
{
struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
struct v3d_timestamp_query *queries = timestamp_query->queries;
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
u8 *value_addr;
v3d_get_bo_vaddr(bo);
for (int i = 0; i < timestamp_query->count; i++) {
value_addr = ((u8 *)bo->vaddr) + queries[i].offset;
*((u64 *)value_addr) = 0;
drm_syncobj_replace_fence(queries[i].syncobj, NULL);
}
v3d_put_bo_vaddr(bo);
}
static void write_to_buffer_32(u32 *dst, unsigned int idx, u32 value)
{
dst[idx] = value;
}
static void write_to_buffer_64(u64 *dst, unsigned int idx, u64 value)
{
dst[idx] = value;
}
static void
write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64 value)
{
if (do_64bit)
write_to_buffer_64(dst, idx, value);
else
write_to_buffer_32(dst, idx, value);
}
static void
v3d_copy_query_results(struct v3d_cpu_job *job)
{
struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
struct v3d_timestamp_query *queries = timestamp_query->queries;
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]);
struct v3d_copy_query_results_info *copy = &job->copy;
struct dma_fence *fence;
u8 *query_addr;
bool available, write_result;
u8 *data;
int i;
v3d_get_bo_vaddr(bo);
v3d_get_bo_vaddr(timestamp);
data = ((u8 *)bo->vaddr) + copy->offset;
for (i = 0; i < timestamp_query->count; i++) {
fence = drm_syncobj_fence_get(queries[i].syncobj);
available = fence ? dma_fence_is_signaled(fence) : false;
write_result = available || copy->do_partial;
if (write_result) {
query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset;
write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr));
}
if (copy->availability_bit)
write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u);
data += copy->stride;
dma_fence_put(fence);
}
v3d_put_bo_vaddr(timestamp);
v3d_put_bo_vaddr(bo);
}
static void
v3d_reset_performance_queries(struct v3d_cpu_job *job)
{
struct v3d_performance_query_info *performance_query = &job->performance_query;
struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
struct v3d_dev *v3d = job->base.v3d;
struct v3d_perfmon *perfmon;
for (int i = 0; i < performance_query->count; i++) {
for (int j = 0; j < performance_query->nperfmons; j++) {
perfmon = v3d_perfmon_find(v3d_priv,
performance_query->queries[i].kperfmon_ids[j]);
if (!perfmon) {
DRM_DEBUG("Failed to find perfmon.");
continue;
}
v3d_perfmon_stop(v3d, perfmon, false);
memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64));
v3d_perfmon_put(perfmon);
}
drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL);
}
}
static void
v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data,
unsigned int query)
{
struct v3d_performance_query_info *performance_query =
&job->performance_query;
struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
struct v3d_performance_query *perf_query =
&performance_query->queries[query];
struct v3d_dev *v3d = job->base.v3d;
unsigned int i, j, offset;
for (i = 0, offset = 0;
i < performance_query->nperfmons;
i++, offset += DRM_V3D_MAX_PERF_COUNTERS) {
struct v3d_perfmon *perfmon;
perfmon = v3d_perfmon_find(v3d_priv,
perf_query->kperfmon_ids[i]);
if (!perfmon) {
DRM_DEBUG("Failed to find perfmon.");
continue;
}
v3d_perfmon_stop(v3d, perfmon, true);
if (job->copy.do_64bit) {
for (j = 0; j < perfmon->ncounters; j++)
write_to_buffer_64(data, offset + j,
perfmon->values[j]);
} else {
for (j = 0; j < perfmon->ncounters; j++)
write_to_buffer_32(data, offset + j,
perfmon->values[j]);
}
v3d_perfmon_put(perfmon);
}
}
static void
v3d_copy_performance_query(struct v3d_cpu_job *job)
{
struct v3d_performance_query_info *performance_query = &job->performance_query;
struct v3d_copy_query_results_info *copy = &job->copy;
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
struct dma_fence *fence;
bool available, write_result;
u8 *data;
v3d_get_bo_vaddr(bo);
data = ((u8 *)bo->vaddr) + copy->offset;
for (int i = 0; i < performance_query->count; i++) {
fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
available = fence ? dma_fence_is_signaled(fence) : false;
write_result = available || copy->do_partial;
if (write_result)
v3d_write_performance_query_result(job, data, i);
if (copy->availability_bit)
write_to_buffer(data, performance_query->ncounters,
copy->do_64bit, available ? 1u : 0u);
data += copy->stride;
dma_fence_put(fence);
}
v3d_put_bo_vaddr(bo);
}
static const v3d_cpu_job_fn cpu_job_function[] = {
[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
[V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query,
};
static struct dma_fence *
v3d_cpu_job_run(struct drm_sched_job *sched_job)
{
struct v3d_cpu_job *job = to_cpu_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
if (job->job_type >= ARRAY_SIZE(cpu_job_function)) {
DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type);
return NULL;
}
v3d_job_start_stats(&job->base, V3D_CPU);
trace_v3d_cpu_job_begin(&v3d->drm, job->job_type);
cpu_job_function[job->job_type](job);
trace_v3d_cpu_job_end(&v3d->drm, job->job_type);
v3d_job_update_stats(&job->base, V3D_CPU);
return NULL;
}
static struct dma_fence *
v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_dev *v3d = job->v3d;
v3d_job_start_stats(job, V3D_CACHE_CLEAN);
v3d_clean_caches(v3d);
v3d_job_update_stats(job, V3D_CACHE_CLEAN);
return NULL;
}
static enum drm_gpu_sched_stat
v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
{
enum v3d_queue q;
mutex_lock(&v3d->reset_lock);
/* block scheduler */
for (q = 0; q < V3D_MAX_QUEUES; q++)
drm_sched_stop(&v3d->queue[q].sched, sched_job);
if (sched_job)
drm_sched_increase_karma(sched_job);
/* get the GPU back into the init state */
v3d_reset(v3d);
for (q = 0; q < V3D_MAX_QUEUES; q++)
drm_sched_resubmit_jobs(&v3d->queue[q].sched);
/* Unblock schedulers and restart their jobs. */
for (q = 0; q < V3D_MAX_QUEUES; q++) {
drm_sched_start(&v3d->queue[q].sched, 0);
}
mutex_unlock(&v3d->reset_lock);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
/* If the current address or return address have changed, then the GPU
* has probably made progress and we should delay the reset. This
* could fail if the GPU got in an infinite loop in the CL, but that
* is pretty unlikely outside of an i-g-t testcase.
*/
static enum drm_gpu_sched_stat
v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
u32 *timedout_ctca, u32 *timedout_ctra)
{
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_dev *v3d = job->v3d;
u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
*timedout_ctca = ctca;
*timedout_ctra = ctra;
return DRM_GPU_SCHED_STAT_NOMINAL;
}
return v3d_gpu_reset_for_timeout(v3d, sched_job);
}
static enum drm_gpu_sched_stat
v3d_bin_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_bin_job *job = to_bin_job(sched_job);
return v3d_cl_job_timedout(sched_job, V3D_BIN,
&job->timedout_ctca, &job->timedout_ctra);
}
static enum drm_gpu_sched_stat
v3d_render_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_render_job *job = to_render_job(sched_job);
return v3d_cl_job_timedout(sched_job, V3D_RENDER,
&job->timedout_ctca, &job->timedout_ctra);
}
static enum drm_gpu_sched_stat
v3d_generic_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
return v3d_gpu_reset_for_timeout(job->v3d, sched_job);
}
static enum drm_gpu_sched_stat
v3d_csd_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_csd_job *job = to_csd_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver));
/* If we've made progress, skip reset and let the timer get
* rearmed.
*/
if (job->timedout_batches != batches) {
job->timedout_batches = batches;
return DRM_GPU_SCHED_STAT_NOMINAL;
}
return v3d_gpu_reset_for_timeout(v3d, sched_job);
}
static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
.run_job = v3d_bin_job_run,
.timedout_job = v3d_bin_job_timedout,
.free_job = v3d_sched_job_free,
};
static const struct drm_sched_backend_ops v3d_render_sched_ops = {
.run_job = v3d_render_job_run,
.timedout_job = v3d_render_job_timedout,
.free_job = v3d_sched_job_free,
};
static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
.run_job = v3d_tfu_job_run,
.timedout_job = v3d_generic_job_timedout,
.free_job = v3d_sched_job_free,
};
static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
.run_job = v3d_csd_job_run,
.timedout_job = v3d_csd_job_timedout,
.free_job = v3d_sched_job_free
};
static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
.run_job = v3d_cache_clean_job_run,
.timedout_job = v3d_generic_job_timedout,
.free_job = v3d_sched_job_free
};
static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
.run_job = v3d_cpu_job_run,
.timedout_job = v3d_generic_job_timedout,
.free_job = v3d_cpu_job_free
};
static int
v3d_queue_sched_init(struct v3d_dev *v3d, const struct drm_sched_backend_ops *ops,
enum v3d_queue queue, const char *name)
{
struct drm_sched_init_args args = {
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
.credit_limit = 1,
.timeout = msecs_to_jiffies(500),
.dev = v3d->drm.dev,
};
args.ops = ops;
args.name = name;
return drm_sched_init(&v3d->queue[queue].sched, &args);
}
int
v3d_sched_init(struct v3d_dev *v3d)
{
int ret;
ret = v3d_queue_sched_init(v3d, &v3d_bin_sched_ops, V3D_BIN, "v3d_bin");
if (ret)
return ret;
ret = v3d_queue_sched_init(v3d, &v3d_render_sched_ops, V3D_RENDER,
"v3d_render");
if (ret)
goto fail;
ret = v3d_queue_sched_init(v3d, &v3d_tfu_sched_ops, V3D_TFU, "v3d_tfu");
if (ret)
goto fail;
if (v3d_has_csd(v3d)) {
ret = v3d_queue_sched_init(v3d, &v3d_csd_sched_ops, V3D_CSD,
"v3d_csd");
if (ret)
goto fail;
ret = v3d_queue_sched_init(v3d, &v3d_cache_clean_sched_ops,
V3D_CACHE_CLEAN, "v3d_cache_clean");
if (ret)
goto fail;
}
ret = v3d_queue_sched_init(v3d, &v3d_cpu_sched_ops, V3D_CPU, "v3d_cpu");
if (ret)
goto fail;
return 0;
fail:
v3d_sched_fini(v3d);
return ret;
}
void
v3d_sched_fini(struct v3d_dev *v3d)
{
enum v3d_queue q;
for (q = 0; q < V3D_MAX_QUEUES; q++) {
if (v3d->queue[q].sched.ready)
drm_sched_fini(&v3d->queue[q].sched);
}
}