mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00
Core Changes:
- Fix kernel-doc for gpusvm (Lucas) Driver Changes: - Drop duplicated pc_start call (Rodrigo) - Drop sentinels from rtp (Lucas) - Fix MOCS debugfs missing forcewake (Tvrtko) - Ring flush invalitation (Tvrtko) - Fix type for width alignement (Tvrtko) -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmfSYPIZHGx1Y2FzLmRl bWFyY2hpQGludGVsLmNvbQAKCRCboqWmMMvqU2f1D/40KcS7EcAwECRX9QSUCM16 dOzqnwmXF3oHoFeBAQNgVAFtck12x0IJ/HoeugZzBuXBT8ebSmkB/PyAKMhRp4Y8 ygbp0t6HJtOpvmjOwk+98W2xcN2Zxe4KiQPegZYn/HE3fuzG6zl3LVxzPhgT8+4e RawvGjfwv+f5y2kUczzuoQx1ex86Tm9ScBvtRce0A8hZa3wjjRtIdf5ZkiBZJUTq hawMyAYlnjy0GC5aOV/G8shoAXcXmslkeU65fGk2qdZ7uV7wXSOcBY6zzKENvxRV 7oavUqZtBqOrHJdK8b+ikswPeTFDSqLWxpTu7MIh2cS0BlRiZyZDC4GZDX6I0QnO HGWVLwKFMN0tJeEtCO2gd6IFUY3suHmUMF79FfeWXpsgw3qSoLz0kwlpT1IXkGTl 6eyoa45CnuTtmIeYr1MM024oeD0ro72/XkHFBzH3doV1yoJT3I+pQmutfM25KpM3 pE89O9TJzGBAFH7ujGydMyfYwkw6R0PbktCeO3wmaY6teTP1O8z1ETwK2aqf0Rm5 TxRx//jj5BI/lsEA02p00QyCb1Dx0huTbyG8eM28a8mMFwzj5IJ7YBz4wxZTDCww h3321wBSSdBvOwg7wvZ6PGkhJ9Y5pEdyu+3gFP5KYXEEEIl+i2qHV1TlhiWdSxDD fl+Fa2ry+s9/ilTmykc2wA== =0FN2 -----END PGP SIGNATURE----- Merge tag 'drm-xe-next-fixes-2025-03-12' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next Core Changes: - Fix kernel-doc for gpusvm (Lucas) Driver Changes: - Drop duplicated pc_start call (Rodrigo) - Drop sentinels from rtp (Lucas) - Fix MOCS debugfs missing forcewake (Tvrtko) - Ring flush invalitation (Tvrtko) - Fix type for width alignement (Tvrtko) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/fsztfqcddrarwjlxjwm2k4wvc6u5vntceh6b7nsnxjmwzgtunj@sbkshjow65rf
This commit is contained in:
commit
5da39dce1f
@ -67,14 +67,19 @@ Agreed upon design principles
|
||||
Overview of baseline design
|
||||
===========================
|
||||
|
||||
Baseline design is simple as possible to get a working basline in which can be
|
||||
built upon.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/xe/drm_gpusvm.c
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
|
||||
:doc: Overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
|
||||
:doc: Locking
|
||||
:doc: Migrataion
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
|
||||
:doc: Migration
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
|
||||
:doc: Partial Unmapping of Ranges
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
|
||||
:doc: Examples
|
||||
|
||||
Possible future design features
|
||||
|
@ -23,37 +23,42 @@
|
||||
* DOC: Overview
|
||||
*
|
||||
* GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
|
||||
*
|
||||
* The GPU SVM layer is a component of the DRM framework designed to manage shared
|
||||
* virtual memory between the CPU and GPU. It enables efficient data exchange and
|
||||
* processing for GPU-accelerated applications by allowing memory sharing and
|
||||
* is a component of the DRM framework designed to manage shared virtual memory
|
||||
* between the CPU and GPU. It enables efficient data exchange and processing
|
||||
* for GPU-accelerated applications by allowing memory sharing and
|
||||
* synchronization between the CPU's and GPU's virtual address spaces.
|
||||
*
|
||||
* Key GPU SVM Components:
|
||||
* - Notifiers: Notifiers: Used for tracking memory intervals and notifying the
|
||||
* GPU of changes, notifiers are sized based on a GPU SVM
|
||||
* initialization parameter, with a recommendation of 512M or
|
||||
* larger. They maintain a Red-BlacK tree and a list of ranges that
|
||||
* fall within the notifier interval. Notifiers are tracked within
|
||||
* a GPU SVM Red-BlacK tree and list and are dynamically inserted
|
||||
* or removed as ranges within the interval are created or
|
||||
* destroyed.
|
||||
* - Ranges: Represent memory ranges mapped in a DRM device and managed
|
||||
* by GPU SVM. They are sized based on an array of chunk sizes, which
|
||||
* is a GPU SVM initialization parameter, and the CPU address space.
|
||||
* Upon GPU fault, the largest aligned chunk that fits within the
|
||||
* faulting CPU address space is chosen for the range size. Ranges are
|
||||
* expected to be dynamically allocated on GPU fault and removed on an
|
||||
* MMU notifier UNMAP event. As mentioned above, ranges are tracked in
|
||||
* a notifier's Red-Black tree.
|
||||
* - Operations: Define the interface for driver-specific GPU SVM operations
|
||||
* such as range allocation, notifier allocation, and
|
||||
* invalidations.
|
||||
* - Device Memory Allocations: Embedded structure containing enough information
|
||||
* for GPU SVM to migrate to / from device memory.
|
||||
* - Device Memory Operations: Define the interface for driver-specific device
|
||||
* memory operations release memory, populate pfns,
|
||||
* and copy to / from device memory.
|
||||
*
|
||||
* - Notifiers:
|
||||
* Used for tracking memory intervals and notifying the GPU of changes,
|
||||
* notifiers are sized based on a GPU SVM initialization parameter, with a
|
||||
* recommendation of 512M or larger. They maintain a Red-BlacK tree and a
|
||||
* list of ranges that fall within the notifier interval. Notifiers are
|
||||
* tracked within a GPU SVM Red-BlacK tree and list and are dynamically
|
||||
* inserted or removed as ranges within the interval are created or
|
||||
* destroyed.
|
||||
* - Ranges:
|
||||
* Represent memory ranges mapped in a DRM device and managed by GPU SVM.
|
||||
* They are sized based on an array of chunk sizes, which is a GPU SVM
|
||||
* initialization parameter, and the CPU address space. Upon GPU fault,
|
||||
* the largest aligned chunk that fits within the faulting CPU address
|
||||
* space is chosen for the range size. Ranges are expected to be
|
||||
* dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
|
||||
* event. As mentioned above, ranges are tracked in a notifier's Red-Black
|
||||
* tree.
|
||||
*
|
||||
* - Operations:
|
||||
* Define the interface for driver-specific GPU SVM operations such as
|
||||
* range allocation, notifier allocation, and invalidations.
|
||||
*
|
||||
* - Device Memory Allocations:
|
||||
* Embedded structure containing enough information for GPU SVM to migrate
|
||||
* to / from device memory.
|
||||
*
|
||||
* - Device Memory Operations:
|
||||
* Define the interface for driver-specific device memory operations
|
||||
* release memory, populate pfns, and copy to / from device memory.
|
||||
*
|
||||
* This layer provides interfaces for allocating, mapping, migrating, and
|
||||
* releasing memory ranges between the CPU and GPU. It handles all core memory
|
||||
@ -63,14 +68,18 @@
|
||||
* below.
|
||||
*
|
||||
* Expected Driver Components:
|
||||
* - GPU page fault handler: Used to create ranges and notifiers based on the
|
||||
* fault address, optionally migrate the range to
|
||||
* device memory, and create GPU bindings.
|
||||
* - Garbage collector: Used to unmap and destroy GPU bindings for ranges.
|
||||
* Ranges are expected to be added to the garbage collector
|
||||
* upon a MMU_NOTIFY_UNMAP event in notifier callback.
|
||||
* - Notifier callback: Used to invalidate and DMA unmap GPU bindings for
|
||||
* ranges.
|
||||
*
|
||||
* - GPU page fault handler:
|
||||
* Used to create ranges and notifiers based on the fault address,
|
||||
* optionally migrate the range to device memory, and create GPU bindings.
|
||||
*
|
||||
* - Garbage collector:
|
||||
* Used to unmap and destroy GPU bindings for ranges. Ranges are expected
|
||||
* to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
|
||||
* notifier callback.
|
||||
*
|
||||
* - Notifier callback:
|
||||
* Used to invalidate and DMA unmap GPU bindings for ranges.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -83,9 +92,9 @@
|
||||
* range RB tree and list, as well as the range's DMA mappings and sequence
|
||||
* number. GPU SVM manages all necessary locking and unlocking operations,
|
||||
* except for the recheck range's pages being valid
|
||||
* (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. This
|
||||
* lock corresponds to the 'driver->update' lock mentioned in the HMM
|
||||
* documentation (TODO: Link). Future revisions may transition from a GPU SVM
|
||||
* (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
|
||||
* This lock corresponds to the ``driver->update`` lock mentioned in
|
||||
* Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
|
||||
* global lock to a per-notifier lock if finer-grained locking is deemed
|
||||
* necessary.
|
||||
*
|
||||
@ -102,11 +111,11 @@
|
||||
* DOC: Migration
|
||||
*
|
||||
* The migration support is quite simple, allowing migration between RAM and
|
||||
* device memory at the range granularity. For example, GPU SVM currently does not
|
||||
* support mixing RAM and device memory pages within a range. This means that upon GPU
|
||||
* fault, the entire range can be migrated to device memory, and upon CPU fault, the
|
||||
* entire range is migrated to RAM. Mixed RAM and device memory storage within a range
|
||||
* could be added in the future if required.
|
||||
* device memory at the range granularity. For example, GPU SVM currently does
|
||||
* not support mixing RAM and device memory pages within a range. This means
|
||||
* that upon GPU fault, the entire range can be migrated to device memory, and
|
||||
* upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device
|
||||
* memory storage within a range could be added in the future if required.
|
||||
*
|
||||
* The reasoning for only supporting range granularity is as follows: it
|
||||
* simplifies the implementation, and range sizes are driver-defined and should
|
||||
@ -119,11 +128,11 @@
|
||||
* Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
|
||||
* in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
|
||||
* being that a subset of the range still has CPU and GPU mappings. If the
|
||||
* backing store for the range is in device memory, a subset of the backing store has
|
||||
* references. One option would be to split the range and device memory backing store,
|
||||
* but the implementation for this would be quite complicated. Given that
|
||||
* partial unmappings are rare and driver-defined range sizes are relatively
|
||||
* small, GPU SVM does not support splitting of ranges.
|
||||
* backing store for the range is in device memory, a subset of the backing
|
||||
* store has references. One option would be to split the range and device
|
||||
* memory backing store, but the implementation for this would be quite
|
||||
* complicated. Given that partial unmappings are rare and driver-defined range
|
||||
* sizes are relatively small, GPU SVM does not support splitting of ranges.
|
||||
*
|
||||
* With no support for range splitting, upon partial unmapping of a range, the
|
||||
* driver is expected to invalidate and destroy the entire range. If the range
|
||||
@ -144,6 +153,8 @@
|
||||
*
|
||||
* 1) GPU page fault handler
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
|
||||
* {
|
||||
* int err = 0;
|
||||
@ -208,7 +219,9 @@
|
||||
* return err;
|
||||
* }
|
||||
*
|
||||
* 2) Garbage Collector.
|
||||
* 2) Garbage Collector
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
|
||||
* struct drm_gpusvm_range *range)
|
||||
@ -231,7 +244,9 @@
|
||||
* __driver_garbage_collector(gpusvm, range);
|
||||
* }
|
||||
*
|
||||
* 3) Notifier callback.
|
||||
* 3) Notifier callback
|
||||
*
|
||||
* .. code-block:: c
|
||||
*
|
||||
* void driver_invalidation(struct drm_gpusvm *gpusvm,
|
||||
* struct drm_gpusvm_notifier *notifier,
|
||||
@ -499,7 +514,7 @@ drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
|
||||
*/
|
||||
static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
|
||||
@ -2055,7 +2070,6 @@ err_out:
|
||||
|
||||
/**
|
||||
* drm_gpusvm_range_evict - Evict GPU SVM range
|
||||
* @pagemap: Pointer to the GPU SVM structure
|
||||
* @range: Pointer to the GPU SVM range to be removed
|
||||
*
|
||||
* This function evicts the specified GPU SVM range. This function will not
|
||||
@ -2146,8 +2160,8 @@ static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf)
|
||||
return err ? VM_FAULT_SIGBUS : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* drm_gpusvm_pagemap_ops() - Device page map operations for GPU SVM
|
||||
/*
|
||||
* drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM
|
||||
*/
|
||||
static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = {
|
||||
.page_free = drm_gpusvm_page_free,
|
||||
|
@ -82,7 +82,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
|
||||
static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
|
||||
const struct i915_gtt_view *view,
|
||||
struct i915_vma *vma,
|
||||
u64 physical_alignment)
|
||||
unsigned int alignment)
|
||||
{
|
||||
struct xe_device *xe = to_xe_device(fb->base.dev);
|
||||
struct xe_tile *tile0 = xe_device_get_root_tile(xe);
|
||||
@ -108,7 +108,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
|
||||
XE_BO_FLAG_VRAM0 |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_PAGETABLE,
|
||||
physical_alignment);
|
||||
alignment);
|
||||
else
|
||||
dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
|
||||
dpt_size, ~0ull,
|
||||
@ -116,7 +116,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
|
||||
XE_BO_FLAG_STOLEN |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_PAGETABLE,
|
||||
physical_alignment);
|
||||
alignment);
|
||||
if (IS_ERR(dpt))
|
||||
dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
|
||||
dpt_size, ~0ull,
|
||||
@ -124,7 +124,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
|
||||
XE_BO_FLAG_SYSTEM |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_PAGETABLE,
|
||||
physical_alignment);
|
||||
alignment);
|
||||
if (IS_ERR(dpt))
|
||||
return PTR_ERR(dpt);
|
||||
|
||||
@ -194,7 +194,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
|
||||
static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
|
||||
const struct i915_gtt_view *view,
|
||||
struct i915_vma *vma,
|
||||
u64 physical_alignment)
|
||||
unsigned int alignment)
|
||||
{
|
||||
struct drm_gem_object *obj = intel_fb_bo(&fb->base);
|
||||
struct xe_bo *bo = gem_to_xe_bo(obj);
|
||||
@ -277,7 +277,7 @@ out:
|
||||
|
||||
static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
|
||||
const struct i915_gtt_view *view,
|
||||
u64 physical_alignment)
|
||||
unsigned int alignment)
|
||||
{
|
||||
struct drm_device *dev = fb->base.dev;
|
||||
struct xe_device *xe = to_xe_device(dev);
|
||||
@ -327,9 +327,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
|
||||
|
||||
vma->bo = bo;
|
||||
if (intel_fb_uses_dpt(&fb->base))
|
||||
ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment);
|
||||
ret = __xe_pin_fb_vma_dpt(fb, view, vma, alignment);
|
||||
else
|
||||
ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment);
|
||||
ret = __xe_pin_fb_vma_ggtt(fb, view, vma, alignment);
|
||||
if (ret)
|
||||
goto err_unpin;
|
||||
|
||||
@ -422,7 +422,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
|
||||
struct i915_vma *vma;
|
||||
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
|
||||
struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
|
||||
u64 phys_alignment = plane->min_alignment(plane, fb, 0);
|
||||
unsigned int alignment = plane->min_alignment(plane, fb, 0);
|
||||
|
||||
if (reuse_vma(new_plane_state, old_plane_state))
|
||||
return 0;
|
||||
@ -430,7 +430,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
|
||||
/* We reject creating !SCANOUT fb's, so this is weird.. */
|
||||
drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
|
||||
|
||||
vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, phys_alignment);
|
||||
vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment);
|
||||
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
@ -320,7 +320,7 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test)
|
||||
count_rtp_entries++;
|
||||
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries);
|
||||
xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr);
|
||||
|
||||
xa_for_each(®_sr->xa, idx, sre) {
|
||||
if (idx == param->expected_reg.addr)
|
||||
|
@ -1496,14 +1496,6 @@ void xe_guc_stop(struct xe_guc *guc)
|
||||
|
||||
int xe_guc_start(struct xe_guc *guc)
|
||||
{
|
||||
if (!IS_SRIOV_VF(guc_to_xe(guc))) {
|
||||
int err;
|
||||
|
||||
err = xe_guc_pc_start(&guc->pc);
|
||||
xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n",
|
||||
ERR_PTR(err));
|
||||
}
|
||||
|
||||
return xe_guc_submit_start(guc);
|
||||
}
|
||||
|
||||
|
@ -400,10 +400,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
|
||||
PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
|
||||
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
|
||||
xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -459,10 +458,9 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
|
||||
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
|
||||
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr);
|
||||
}
|
||||
|
||||
static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
|
||||
|
@ -781,7 +781,9 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
|
||||
flags = get_mocs_settings(xe, &table);
|
||||
|
||||
xe_pm_runtime_get_noresume(xe);
|
||||
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
|
||||
fw_ref = xe_force_wake_get(gt_to_fw(gt),
|
||||
flags & HAS_LNCF_MOCS ?
|
||||
XE_FORCEWAKE_ALL : XE_FW_GT);
|
||||
if (!fw_ref)
|
||||
goto err_fw;
|
||||
|
||||
|
@ -88,7 +88,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
||||
RING_FORCE_TO_NONPRIV_RANGE_4))
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
|
||||
@ -137,7 +136,8 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
|
||||
{
|
||||
struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
|
||||
|
||||
xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
|
||||
xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist),
|
||||
&hwe->reg_whitelist);
|
||||
whitelist_apply_to_hwe(hwe);
|
||||
}
|
||||
|
||||
|
@ -90,11 +90,10 @@ static int emit_flush_dw(u32 *dw, int i)
|
||||
return i;
|
||||
}
|
||||
|
||||
static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
|
||||
u32 *dw, int i)
|
||||
static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 flags, u32 *dw, int i)
|
||||
{
|
||||
dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
|
||||
(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
|
||||
flags;
|
||||
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
|
||||
dw[i++] = 0;
|
||||
dw[i++] = value;
|
||||
@ -111,16 +110,13 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
|
||||
return i;
|
||||
}
|
||||
|
||||
static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
|
||||
static int emit_flush_invalidate(u32 *dw, int i)
|
||||
{
|
||||
dw[i] = MI_FLUSH_DW;
|
||||
dw[i] |= flag;
|
||||
dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
|
||||
MI_FLUSH_DW_STORE_INDEX;
|
||||
|
||||
dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
|
||||
dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
|
||||
MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
|
||||
dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
|
||||
dw[i++] = 0;
|
||||
dw[i++] = 0;
|
||||
dw[i++] = ~0U;
|
||||
|
||||
return i;
|
||||
}
|
||||
@ -257,7 +253,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
|
||||
if (job->ring_ops_flush_tlb) {
|
||||
dw[i++] = preparser_disable(true);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, true, dw, i);
|
||||
seqno, MI_INVALIDATE_TLB, dw, i);
|
||||
dw[i++] = preparser_disable(false);
|
||||
} else {
|
||||
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
@ -273,7 +269,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
|
||||
dw, i);
|
||||
}
|
||||
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i);
|
||||
|
||||
i = emit_user_interrupt(dw, i);
|
||||
|
||||
@ -319,7 +315,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
|
||||
if (job->ring_ops_flush_tlb)
|
||||
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, true, dw, i);
|
||||
seqno, MI_INVALIDATE_TLB, dw, i);
|
||||
|
||||
dw[i++] = preparser_disable(false);
|
||||
|
||||
@ -336,7 +332,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
dw, i);
|
||||
}
|
||||
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i);
|
||||
|
||||
i = emit_user_interrupt(dw, i);
|
||||
|
||||
@ -413,7 +409,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
|
||||
if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
|
||||
/* XXX: Do we need this? Leaving for now. */
|
||||
dw[i++] = preparser_disable(true);
|
||||
i = emit_flush_invalidate(0, dw, i);
|
||||
i = emit_flush_invalidate(dw, i);
|
||||
dw[i++] = preparser_disable(false);
|
||||
}
|
||||
|
||||
|
@ -237,6 +237,7 @@ static void rtp_mark_active(struct xe_device *xe,
|
||||
* the save-restore argument.
|
||||
* @ctx: The context for processing the table, with one of device, gt or hwe
|
||||
* @entries: Table with RTP definitions
|
||||
* @n_entries: Number of entries to process, usually ARRAY_SIZE(entries)
|
||||
* @sr: Save-restore struct where matching rules execute the action. This can be
|
||||
* viewed as the "coalesced view" of multiple the tables. The bits for each
|
||||
* register set are expected not to collide with previously added entries
|
||||
@ -247,6 +248,7 @@ static void rtp_mark_active(struct xe_device *xe,
|
||||
*/
|
||||
void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
|
||||
const struct xe_rtp_entry_sr *entries,
|
||||
size_t n_entries,
|
||||
struct xe_reg_sr *sr)
|
||||
{
|
||||
const struct xe_rtp_entry_sr *entry;
|
||||
@ -259,7 +261,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
|
||||
if (IS_SRIOV_VF(xe))
|
||||
return;
|
||||
|
||||
for (entry = entries; entry && entry->name; entry++) {
|
||||
xe_assert(xe, entries);
|
||||
|
||||
for (entry = entries; entry - entries < n_entries; entry++) {
|
||||
bool match = false;
|
||||
|
||||
if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
|
||||
|
@ -430,7 +430,7 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
|
||||
|
||||
void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
|
||||
const struct xe_rtp_entry_sr *entries,
|
||||
struct xe_reg_sr *sr);
|
||||
size_t n_entries, struct xe_reg_sr *sr);
|
||||
|
||||
void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
|
||||
const struct xe_rtp_entry *entries);
|
||||
|
@ -85,8 +85,6 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
|
||||
XE_RTP_RULES(MEDIA_VERSION(2000)),
|
||||
XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
|
||||
},
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct xe_rtp_entry_sr engine_tunings[] = {
|
||||
@ -100,7 +98,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = {
|
||||
ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct xe_rtp_entry_sr lrc_tunings[] = {
|
||||
@ -138,8 +135,6 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
|
||||
XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
|
||||
REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
|
||||
},
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
/**
|
||||
@ -180,7 +175,7 @@ void xe_tuning_process_gt(struct xe_gt *gt)
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx,
|
||||
gt->tuning_active.gt,
|
||||
ARRAY_SIZE(gt_tunings));
|
||||
xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr);
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
|
||||
|
||||
@ -191,7 +186,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe)
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx,
|
||||
hwe->gt->tuning_active.engine,
|
||||
ARRAY_SIZE(engine_tunings));
|
||||
xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
|
||||
&hwe->reg_sr);
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
|
||||
|
||||
@ -210,7 +206,7 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx,
|
||||
hwe->gt->tuning_active.lrc,
|
||||
ARRAY_SIZE(lrc_tunings));
|
||||
xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc);
|
||||
xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
|
||||
}
|
||||
|
||||
void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
|
||||
|
@ -279,8 +279,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
|
||||
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)),
|
||||
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
|
||||
},
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct xe_rtp_entry_sr engine_was[] = {
|
||||
@ -624,8 +622,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
|
||||
FUNC(xe_rtp_match_first_render_or_compute)),
|
||||
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
|
||||
},
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct xe_rtp_entry_sr lrc_was[] = {
|
||||
@ -825,8 +821,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
|
||||
DIS_PARTIAL_AUTOSTRIP |
|
||||
DIS_AUTOSTRIP))
|
||||
},
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
static __maybe_unused const struct xe_rtp_entry oob_was[] = {
|
||||
@ -868,7 +862,7 @@ void xe_wa_process_gt(struct xe_gt *gt)
|
||||
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt,
|
||||
ARRAY_SIZE(gt_was));
|
||||
xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr);
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
|
||||
|
||||
@ -886,7 +880,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
|
||||
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine,
|
||||
ARRAY_SIZE(engine_was));
|
||||
xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
|
||||
xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -903,7 +897,7 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
|
||||
|
||||
xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc,
|
||||
ARRAY_SIZE(lrc_was));
|
||||
xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
|
||||
xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user