On 2/20/2015 4:41 PM, Mika Kuoppala wrote: > Michel Thierry writes: > >> From: Ben Widawsky >> >> Instead of implementing the full tracking + dynamic allocation, this >> patch does a bit less than half of the work, by tracking and warning on >> unexpected conditions. The tracking itself follows which PTEs within a >> page table are currently being used for objects. The next patch will >> modify this to actually allocate the page tables only when necessary. >> >> With the current patch there isn't much in the way of making a gen >> agnostic range allocation function. However, in the next patch we'll add >> more specificity which makes having separate functions a bit easier to >> manage. >> >> One important change introduced here is that DMA mappings are >> created/destroyed at the same page directories/tables are >> allocated/deallocated. >> >> Notice that aliasing PPGTT is not managed here. The patch which actually >> begins dynamic allocation/teardown explains the reasoning for this. >> >> v2: s/pdp.page_directory/pdp.page_directorys >> Make a scratch page allocation helper >> >> v3: Rebase and expand commit message. >> >> v4: Allocate required pagetables only when it is needed, _bind_to_vm >> instead of bind_vma (Daniel). >> >> v5: Rebased to remove the unnecessary noise in the diff, also: >> - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK. >> - Removed unnecessary checks in gen6_alloc_va_range. >> - Changed map/unmap_px_single macros to use dma functions directly and >> be part of a static inline function instead. >> - Moved drm_device plumbing through page tables operation to its own >> patch. >> - Moved allocate/teardown_va_range calls until they are fully >> implemented (in subsequent patch). >> - Merged pt and scratch_pt unmap_and_free path. >> - Moved scratch page allocator helper to the patch that will use it. >> >> v6: Reduce complexity by not tearing down pagetables dynamically, the >> same can be achieved while freeing empty vms. (Daniel) >> >> Cc: Daniel Vetter >> Signed-off-by: Ben Widawsky >> Signed-off-by: Michel Thierry (v3+) >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 191 +++++++++++++++++++++++++----------- >> drivers/gpu/drm/i915/i915_gem_gtt.h | 75 ++++++++++++++ >> 2 files changed, 206 insertions(+), 60 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index e2bcd10..760585e 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -274,29 +274,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, >> return pte; >> } >> >> -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev) >> +#define i915_dma_unmap_single(px, dev) \ >> + __i915_dma_unmap_single((px)->daddr, dev) >> + >> +static inline void __i915_dma_unmap_single(dma_addr_t daddr, >> + struct drm_device *dev) >> +{ >> + struct device *device = &dev->pdev->dev; >> + >> + dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); >> +} >> + >> +/** >> + * i915_dma_map_px_single() - Create a dma mapping for a page table/dir/etc. >> + * @px: Page table/dir/etc to get a DMA map for >> + * @dev: drm device >> + * >> + * Page table allocations are unified across all gens. They always require a >> + * single 4k allocation, as well as a DMA mapping. If we keep the structs >> + * symmetric here, the simple macro covers us for every page table type. >> + * >> + * Return: 0 if success. >> + */ >> +#define i915_dma_map_px_single(px, dev) \ >> + i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) >> + > If this is symmetrical to i915_dma_unmap_single() is the _px_ needed? > >> +static inline int i915_dma_map_page_single(struct page *page, >> + struct drm_device *dev, >> + dma_addr_t *daddr) >> +{ >> + struct device *device = &dev->pdev->dev; >> + >> + *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); >> + return dma_mapping_error(device, *daddr); >> +} >> + >> +static void unmap_and_free_pt(struct i915_page_table_entry *pt, >> + struct drm_device *dev) >> { >> if (WARN_ON(!pt->page)) >> return; >> + >> + i915_dma_unmap_single(pt, dev); >> __free_page(pt->page); >> + kfree(pt->used_ptes); >> kfree(pt); >> } >> >> static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev) >> { >> struct i915_page_table_entry *pt; >> + const size_t count = INTEL_INFO(dev)->gen >= 8 ? >> + GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES; >> + int ret = -ENOMEM; >> >> pt = kzalloc(sizeof(*pt), GFP_KERNEL); >> if (!pt) >> return ERR_PTR(-ENOMEM); >> >> + pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), >> + GFP_KERNEL); >> + >> + if (!pt->used_ptes) >> + goto fail_bitmap; >> + >> pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> - if (!pt->page) { >> - kfree(pt); >> - return ERR_PTR(-ENOMEM); >> - } >> + if (!pt->page) >> + goto fail_page; >> + >> + ret = i915_dma_map_px_single(pt, dev); >> + if (ret) >> + goto fail_dma; >> >> return pt; >> + >> +fail_dma: >> + __free_page(pt->page); >> +fail_page: >> + kfree(pt->used_ptes); >> +fail_bitmap: >> + kfree(pt); >> + >> + return ERR_PTR(ret); >> } >> >> /** >> @@ -836,26 +895,36 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) >> } >> } >> >> -static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) >> +/* Write pde (index) from the page directory @pd to the page table @pt */ >> +static void gen6_write_pdes(struct i915_page_directory_entry *pd, > For me it seems that you will write only one pde entry so s/pdes/pde ? > >> + const int pde, struct i915_page_table_entry *pt) >> { >> - struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; >> - gen6_gtt_pte_t __iomem *pd_addr; >> - uint32_t pd_entry; >> - int i; >> + struct i915_hw_ppgtt *ppgtt = >> + container_of(pd, struct i915_hw_ppgtt, pd); >> + u32 pd_entry; >> >> - WARN_ON(ppgtt->pd.pd_offset & 0x3f); >> - pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + >> - ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t); >> - for (i = 0; i < ppgtt->num_pd_entries; i++) { >> - dma_addr_t pt_addr; >> + pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr); >> + pd_entry |= GEN6_PDE_VALID; >> >> - pt_addr = ppgtt->pd.page_tables[i]->daddr; >> - pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); >> - pd_entry |= GEN6_PDE_VALID; >> + writel(pd_entry, ppgtt->pd_addr + pde); >> >> - writel(pd_entry, pd_addr + i); >> - } >> - readl(pd_addr); >> + /* XXX: Caller needs to make sure the write completes if necessary */ >> +} > Move this comment on top of the function and lift the XXX: > >> + >> +/* Write all the page tables found in the ppgtt structure to incrementing page >> + * directories. */ >> +static void gen6_write_page_range(struct drm_i915_private *dev_priv, >> + struct i915_page_directory_entry *pd, uint32_t start, uint32_t length) >> +{ >> + struct i915_page_table_entry *pt; >> + uint32_t pde, temp; >> + >> + gen6_for_each_pde(pt, pd, start, length, temp, pde) >> + gen6_write_pdes(pd, pde, pt); >> + >> + /* Make sure write is complete before other code can use this page >> + * table. Also require for WC mapped PTEs */ >> + readl(dev_priv->gtt.gsm); >> } >> >> static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) >> @@ -1071,6 +1140,28 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) >> 4096, PCI_DMA_BIDIRECTIONAL); >> } >> >> +static int gen6_alloc_va_range(struct i915_address_space *vm, >> + uint64_t start, uint64_t length) >> +{ >> + struct i915_hw_ppgtt *ppgtt = >> + container_of(vm, struct i915_hw_ppgtt, base); >> + struct i915_page_table_entry *pt; >> + uint32_t pde, temp; >> + >> + gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { >> + DECLARE_BITMAP(tmp_bitmap, I915_PPGTT_PT_ENTRIES); >> + >> + bitmap_zero(tmp_bitmap, I915_PPGTT_PT_ENTRIES); >> + bitmap_set(tmp_bitmap, gen6_pte_index(start), >> + gen6_pte_count(start, length)); >> + >> + bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap, >> + I915_PPGTT_PT_ENTRIES); >> + } >> + >> + return 0; >> +} >> + >> static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) >> { >> int i; >> @@ -1117,20 +1208,24 @@ alloc: >> 0, dev_priv->gtt.base.total, >> 0); >> if (ret) >> - return ret; >> + goto err_out; >> >> retried = true; >> goto alloc; >> } >> >> if (ret) >> - return ret; >> + goto err_out; >> + >> >> if (ppgtt->node.start < dev_priv->gtt.mappable_end) >> DRM_DEBUG("Forced to use aperture for PDEs\n"); >> >> ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; >> return 0; >> + >> +err_out: >> + return ret; >> } >> >> static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) >> @@ -1152,30 +1247,6 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) >> return 0; >> } >> >> -static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) >> -{ >> - struct drm_device *dev = ppgtt->base.dev; >> - int i; >> - >> - for (i = 0; i < ppgtt->num_pd_entries; i++) { >> - struct page *page; >> - dma_addr_t pt_addr; >> - >> - page = ppgtt->pd.page_tables[i]->page; >> - pt_addr = pci_map_page(dev->pdev, page, 0, 4096, >> - PCI_DMA_BIDIRECTIONAL); >> - >> - if (pci_dma_mapping_error(dev->pdev, pt_addr)) { >> - gen6_ppgtt_unmap_pages(ppgtt); >> - return -EIO; >> - } >> - >> - ppgtt->pd.page_tables[i]->daddr = pt_addr; >> - } >> - >> - return 0; >> -} >> - >> static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> { >> struct drm_device *dev = ppgtt->base.dev; >> @@ -1196,12 +1267,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> if (ret) >> return ret; >> >> - ret = gen6_ppgtt_setup_page_tables(ppgtt); >> - if (ret) { >> - gen6_ppgtt_free(ppgtt); >> - return ret; >> - } >> - >> + ppgtt->base.allocate_va_range = gen6_alloc_va_range; >> ppgtt->base.clear_range = gen6_ppgtt_clear_range; >> ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; >> ppgtt->base.cleanup = gen6_ppgtt_cleanup; >> @@ -1212,13 +1278,17 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> ppgtt->pd.pd_offset = >> ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); >> >> + ppgtt->pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + >> + ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t); >> + >> ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); >> >> + gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); >> + >> DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", >> ppgtt->node.size >> 20, >> ppgtt->node.start / PAGE_SIZE); >> >> - gen6_write_pdes(ppgtt); >> DRM_DEBUG("Adding PPGTT at offset %x\n", >> ppgtt->pd.pd_offset << 10); >> >> @@ -1491,13 +1561,14 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) >> >> list_for_each_entry(vm, &dev_priv->vm_list, global_link) { >> /* TODO: Perhaps it shouldn't be gen6 specific */ >> - if (i915_is_ggtt(vm)) { >> - if (dev_priv->mm.aliasing_ppgtt) >> - gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); >> - continue; >> - } >> >> - gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); >> + struct i915_hw_ppgtt *ppgtt = >> + container_of(vm, struct i915_hw_ppgtt, base); >> + >> + if (i915_is_ggtt(vm)) >> + ppgtt = dev_priv->mm.aliasing_ppgtt; > If we have ggtt but aliasing is not enabled we get NULL here and oops > over in next function? > > -Mika > I applied the changes in v7. Thanks for the review. -Michel