Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Wang, X" <x.wang@intel.com>
To: Matthew Auld <matthew.auld@intel.com>, <intel-xe@lists.freedesktop.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Subject: Re: [PATCH v3] drm/xe: Allow compressible surfaces to be 1-way coherent
Date: Wed, 7 Jan 2026 17:20:23 -0800	[thread overview]
Message-ID: <dd8191c9-1444-45bd-b557-a3e8df67cb49@intel.com> (raw)
In-Reply-To: <38e7e2b5-bf30-4353-b97b-272e6ac3bbc1@intel.com>



On 1/7/2026 01:45, Matthew Auld wrote:
>> ...cut...
>>       /*
>> -     * Compression implies coh_none, therefore we know for sure that WB
>> -     * memory can't currently use compression, which is likely one 
>> of the
>> -     * common cases.
>> -     * Additionally, userspace may explicitly request no compression 
>> via the
>> -     * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also 
>> disable
>> -     * CCS usage.
>> +     * For WB (Write-Back) CPU caching mode, check if compression is
>> +     * supported through any available PAT index. If not, FlatCCS
>> +     * can't be used.
>>        */
>> -    if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB ||
>> -        bo->flags & XE_BO_FLAG_NO_COMPRESSION)
>> -        return false;
>> +    if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) {
>> +        bool wb_comp_supported = false;
>> +
>> +        /*
>> +         * Compression for WB caching was introduced in
>> +         * GRAPHICS_VER 30 (Xe2). Earlier versions do not
>> +         * support it.
>> +         */
>> +        if (GRAPHICS_VER(xe) < 30)
>> +            return false;
>> +
>> +        for (int i = 0; i < xe->pat.n_entries; i++) {
>> +            if (!xe->pat.table[i].valid)
>> +                continue;
>> +            if (xe_pat_index_get_comp_en(xe, i) &&
>> +                xe_pat_index_get_coh_mode(xe, i) != XE_COH_NONE) {
>> +                wb_comp_supported = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!wb_comp_supported)
>> +            return false;
>> +    }
>
> Would it be cleaner to make this a feature flag instead of checking 
> this every time, if you want to avoid the version check? 
> info.wb_comp_supported?
>
Since we know our newly added PAT entry will definitely be used in the code
later, we might as well add a new cache type, XE_CACHE_WB_COMPRESSION. This
way, we can determine the return value by checking if a valid WB COMP pat
index exists.

Xin
>>         return true;
>>   }
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 313ce83ab0e5..04dbf995a18b 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -140,6 +140,36 @@ static void xe_gt_disable_host_l2_vram(struct 
>> xe_gt *gt)
>>       xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
>>   }
>>   +static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
>> +{
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    unsigned int fw_ref;
>> +    u32 reg;
>> +
>> +    if (IS_SRIOV_VF(xe))
>> +        return;
>> +
>> +    if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) {
>> +        fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
>> +        if (!fw_ref)
>> +            return;
>> +
>> +        reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
>> +        reg |= EN_CMP_1WCOH;
>> +        xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
>> +
>> +        if (xe_gt_is_media_type(gt)) {
>> +            xe_mmio_rmw32(&gt->mmio, XE2_GAMWALK_CTRL_MEDIA, 0, 
>> EN_CMP_1WCOH_GW);
>> +        } else {
>> +            reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMWALK_CTRL_3D);
>> +            reg |= EN_CMP_1WCOH_GW;
>> +            xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg);
>> +        }
>> +
>> +        xe_force_wake_put(gt_to_fw(gt), fw_ref);
>> +    }
>> +}
>> +
>>   static void gt_reset_worker(struct work_struct *w);
>>     static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
>> @@ -466,6 +496,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt 
>> *gt)
>>       xe_gt_topology_init(gt);
>>       xe_gt_mcr_init(gt);
>>       xe_gt_enable_host_l2_vram(gt);
>> +    xe_gt_enable_comp_1wcoh(gt);
>>         if (xe_gt_is_main_type(gt)) {
>>           err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
>> @@ -745,6 +776,7 @@ static int do_gt_restart(struct xe_gt *gt)
>>       xe_pat_init(gt);
>>         xe_gt_enable_host_l2_vram(gt);
>> +    xe_gt_enable_comp_1wcoh(gt);
>>         xe_gt_mcr_set_implicit_defaults(gt);
>>       xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
>> diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
>> index 2c3375e0250b..440a9013dc04 100644
>> --- a/drivers/gpu/drm/xe/xe_pat.c
>> +++ b/drivers/gpu/drm/xe/xe_pat.c
>> @@ -132,9 +132,10 @@ static const struct xe_pat_table_entry 
>> xelpg_pat_table[] = {
>>    * in the table.
>>    *
>>    * Note: There is an implicit assumption in the driver that 
>> compression and
>> - * coh_1way+ are mutually exclusive. If this is ever not true then 
>> userptr
>> - * and imported dma-buf from external device will have uncleared ccs 
>> state. See
>> - * also xe_bo_needs_ccs_pages().
>> + * coh_1way+ are mutually exclusive for platforms prior to Xe3. 
>> Starting
>> + * with Xe3, compression can be combined with coherency. If using 
>> compression
>> + * with coherency, userptr and imported dma-buf from external device 
>> will
>> + * have uncleared ccs state. See also xe_bo_needs_ccs_pages().
>>    */
>>   #define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, 
>> __coh_mode) \
>>       { \
>> @@ -144,8 +145,7 @@ static const struct xe_pat_table_entry 
>> xelpg_pat_table[] = {
>>               REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
>>               REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
>>               REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
>> -        .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || 
>> __coh_mode) ? \
>> -            XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \
>> +        .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \
>>           .valid = 1 \
>>       }
>>   @@ -181,6 +181,38 @@ static const struct xe_pat_table_entry 
>> xe2_pat_table[] = {
>>       [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ),
>>   };
>>   +static const struct xe_pat_table_entry xe3_lpg_pat_table[] = {
>> +    [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ),
>> +    [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ),
>> +    [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ),
>> +    [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ),
>> +    [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ),
>> +    [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ),
>> +    [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ),
>> +    [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ),
>> +    [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ),
>> +    [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ),
>> +    [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ),
>> +    [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ),
>> +    [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ),
>> +    [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ),
>> +    [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ),
>> +    [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ),
>> +    [16] = XE2_PAT( 0, 1, 0, 0, 3, 2 ),
>> +    /* 17..19 are reserved; leave set to all 0's */
>> +    [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ),
>> +    [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ),
>> +    [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ),
>> +    [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ),
>> +    [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ),
>> +    [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ),
>> +    [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ),
>> +    [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ),
>> +    [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ),
>> +    [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ),
>> +    [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ),
>> +    [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ),
>> +};
>>   /* Special PAT values programmed outside the main table */
>>   static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 
>> 0, 0, 3, 3 );
>>   static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 
>> 0, 0, 3, 0 );
>> @@ -501,7 +533,10 @@ void xe_pat_init_early(struct xe_device *xe)
>>           xe->pat.idx[XE_CACHE_WB] = 2;
>>       } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
>>           xe->pat.ops = &xe2_pat_ops;
>> -        xe->pat.table = xe2_pat_table;
>> +        if (GRAPHICS_VER(xe) == 30)
>> +            xe->pat.table = xe3_lpg_pat_table;
>> +        else
>> +            xe->pat.table = xe2_pat_table;
>>           xe->pat.pat_ats = &xe2_pat_ats;
>>           if (IS_DGFX(xe))
>>               xe->pat.pat_pta = &xe2_pat_pta;
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index a07d8b53de66..481ee7763b09 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -3405,6 +3405,7 @@ static int vm_bind_ioctl_check_args(struct 
>> xe_device *xe, struct xe_vm *vm,
>>               DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
>>           u16 pat_index = (*bind_ops)[i].pat_index;
>>           u16 coh_mode;
>> +        bool comp_en;
>>             if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
>>                    (!xe_vm_in_fault_mode(vm) ||
>> @@ -3421,6 +3422,7 @@ static int vm_bind_ioctl_check_args(struct 
>> xe_device *xe, struct xe_vm *vm,
>>           pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
>>           (*bind_ops)[i].pat_index = pat_index;
>>           coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
>> +        comp_en = xe_pat_index_get_comp_en(xe, pat_index);
>>           if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
>>               err = -EINVAL;
>>               goto free_bind_ops;
>> @@ -3451,6 +3453,8 @@ static int vm_bind_ioctl_check_args(struct 
>> xe_device *xe, struct xe_vm *vm,
>>                    op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
>>               XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
>>                    op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
>> +            XE_IOCTL_DBG(xe, comp_en &&
>> +                 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
>>               XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
>>                    !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
>>               XE_IOCTL_DBG(xe, obj &&
>> @@ -3529,6 +3533,7 @@ static int xe_vm_bind_ioctl_validate_bo(struct 
>> xe_device *xe, struct xe_bo *bo,
>>                       u16 pat_index, u32 op, u32 bind_flags)
>>   {
>>       u16 coh_mode;
>> +    bool comp_en;
>>         if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
>>                xe_pat_index_get_comp_en(xe, pat_index)))
>> @@ -3574,6 +3579,14 @@ static int xe_vm_bind_ioctl_validate_bo(struct 
>> xe_device *xe, struct xe_bo *bo,
>>           return -EINVAL;
>>       }
>>   +    /*
>> +     * Ensures that imported buffer objects (dma-bufs) are not mapped
>> +     * with a PAT index that enables compression.
>> +     */
>> +    comp_en = xe_pat_index_get_comp_en(xe, pat_index);
>> +    if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
>> +        return -EINVAL;
>> +
>>       /* If a BO is protected it can only be mapped if the key is 
>> still valid */
>>       if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && 
>> xe_bo_is_protected(bo) &&
>>           op != DRM_XE_VM_BIND_OP_UNMAP && op != 
>> DRM_XE_VM_BIND_OP_UNMAP_ALL)
>


  reply	other threads:[~2026-01-08  1:20 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-06 18:55 [PATCH v3] drm/xe: Allow compressible surfaces to be 1-way coherent Xin Wang
2026-01-06 19:01 ` ✗ CI.checkpatch: warning for drm/xe: Allow compressible surfaces to be 1-way coherent (rev4) Patchwork
2026-01-06 19:02 ` ✓ CI.KUnit: success " Patchwork
2026-01-06 19:45 ` ✓ Xe.CI.BAT: " Patchwork
2026-01-06 21:22 ` ✗ Xe.CI.Full: failure " Patchwork
2026-01-07  9:45 ` [PATCH v3] drm/xe: Allow compressible surfaces to be 1-way coherent Matthew Auld
2026-01-08  1:20   ` Wang, X [this message]
  -- strict thread matches above, loose matches on Subject: below --
2025-11-04 19:17 [PATCH] " Xin Wang
2026-01-06 18:40 ` [PATCH v3] " Xin Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dd8191c9-1444-45bd-b557-a3e8df67cb49@intel.com \
    --to=x.wang@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=matthew.d.roper@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox