On 08-03-2024 14:25, Riana Tauro wrote: > Lmem init check should be done only after pcode initialization > status is complete. Move lmem init check after pcode status > check. Also wait for a short while after pcode status check > to allow completion of the task. > > Failing to do so, can lead to aborting the module load > leaving the system unusable. Wait until the lmem initialization > is complete within a timeout (60s) or till the user aborts. > > Signed-off-by: Riana Tauro > --- > drivers/gpu/drm/xe/xe_device.c | 53 +++++++++++++++++++++++++++++++++- > drivers/gpu/drm/xe/xe_mmio.c | 29 ------------------- > 2 files changed, 52 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c > index 83dd60f68566..4806e7806be5 100644 > --- a/drivers/gpu/drm/xe/xe_device.c > +++ b/drivers/gpu/drm/xe/xe_device.c > @@ -413,12 +413,59 @@ static int xe_set_dma_info(struct xe_device *xe) > return err; > } > > +static int verify_lmem_ready(struct xe_gt *gt) > +{ > + return xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT; > +} > + > +static int wait_for_lmem_ready(struct xe_device *xe) > +{ > + struct xe_gt *gt = xe_root_mmio_gt(xe); > + unsigned long timeout, start; > + > + if (!IS_DGFX(xe)) > + return 0; > + > + if (IS_SRIOV_VF(xe)) > + return 0; > + /* > + * The boot firmware initializes local memory and assesses its health. > + * If memory training fails, the punit will have been instructed to > + * keep the GT powered down; we won't be able to communicate with it > + * and we should not continue with driver initialization. > + */ > + if (verify_lmem_ready(gt)) > + return 0; > + > + drm_dbg(&xe->drm, "Waiting for lmem initialisation\n"); > + > + start = jiffies; > + timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ > + > + do { > + if (signal_pending(current)) > + return -EINTR; > + > + if (time_after(jiffies, timeout)) > + return -EPROBE_DEFER; How about adding reason for deferred probing? lmem initialization timedout ? Reviewed-by: Himal Prasad Ghimiray > + > + msleep(20); > + > + } while (!verify_lmem_ready(gt)); > + > + drm_dbg(&xe->drm, "lmem ready after %ums", > + jiffies_to_msecs(jiffies - start)); > + > + return 0; > +} > + > /** > * xe_device_probe_early: Device early probe > * @xe: xe device instance > * > * Initialize MMIO resources that don't require any > - * knowledge about tile count. Also initialize pcode > + * knowledge about tile count. Also initialize pcode and > + * check vram initialization on root tile. > * > * Return: 0 on success, error code on failure > */ > @@ -438,6 +485,10 @@ int xe_device_probe_early(struct xe_device *xe) > if (err) > return err; > > + err = wait_for_lmem_ready(xe); > + if (err) > + return err; > + > return 0; > } > > diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c > index 7ba2477452d7..7fc0c5453b21 100644 > --- a/drivers/gpu/drm/xe/xe_mmio.c > +++ b/drivers/gpu/drm/xe/xe_mmio.c > @@ -360,30 +360,6 @@ static void mmio_fini(struct drm_device *drm, void *arg) > iounmap(xe->mem.vram.mapping); > } > > -static int xe_verify_lmem_ready(struct xe_device *xe) > -{ > - struct xe_gt *gt = xe_root_mmio_gt(xe); > - > - if (!IS_DGFX(xe)) > - return 0; > - > - if (IS_SRIOV_VF(xe)) > - return 0; > - > - /* > - * The boot firmware initializes local memory and assesses its health. > - * If memory training fails, the punit will have been instructed to > - * keep the GT powered down; we won't be able to communicate with it > - * and we should not continue with driver initialization. > - */ > - if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { > - drm_err(&xe->drm, "VRAM not initialized by firmware\n"); > - return -ENODEV; > - } > - > - return 0; > -} > - > int xe_mmio_init(struct xe_device *xe) > { > struct pci_dev *pdev = to_pci_dev(xe->drm.dev); > @@ -407,16 +383,11 @@ int xe_mmio_init(struct xe_device *xe) > int xe_mmio_root_tile_init(struct xe_device *xe) > { > struct xe_tile *root_tile = xe_device_get_root_tile(xe); > - int err; > > /* Setup first tile; other tiles (if present) will be setup later. */ > root_tile->mmio.size = SZ_16M; > root_tile->mmio.regs = xe->mmio.regs; > > - err = xe_verify_lmem_ready(xe); > - if (err) > - return err; > - > return 0; > } >