From mboxrd@z Thu Jan 1 00:00:00 1970 From: Konrad Rzeszutek Wilk Subject: Re: Dom0 physical networking/swiotlb/something issue in 3.7-rc1 Date: Fri, 9 Nov 2012 08:48:29 -0500 Message-ID: <20121109134829.GB30634@phenom.dumpdata.com> References: <1350037688.14806.93.camel@zakaz.uk.xensource.com> <509CD50302000078000A76E4@nat28.tlf.novell.com> <1352452573.21048.35.camel@dagon.hellion.org.uk> <509CDDBA02000078000A772C@nat28.tlf.novell.com> <509CEAB402000078000A7772@nat28.tlf.novell.com> <509CFA7B02000078000A778C@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="5mCyUwZo2JvN/JJP" Return-path: Content-Disposition: inline In-Reply-To: <509CFA7B02000078000A778C@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: Ian Campbell , xen-devel List-Id: xen-devel@lists.xenproject.org --5mCyUwZo2JvN/JJP Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Fri, Nov 09, 2012 at 11:43:39AM +0000, Jan Beulich wrote: > >>> On 09.11.12 at 11:36, "Jan Beulich" wrote: > > In the forward ported kernels, those two checks are however > > accompanied by range_needs_mapping() (aka > > range_straddles_page_boundary()) checks, which ought to > > take care of this. There is brokenness there with the invocations > > of gnttab_dma_map_page(), but only if the initial offset is at > > least PAGE_SIZE - will have to check whether that occurs. > > And indeed, fixing this also makes the problem go away when > the allocation order doesn't get forced to zero. So presumably > there's also only that one problem I had pointed out in pv-ops. The pvops one has this in the map-page variant (xen_swiotlb_map_page): 351 if (dma_capable(dev, dev_addr, size) && 352 !range_straddles_page_boundary(phys, size) && !swiotlb_force) 353 return dev_addr; and in the sg variant: 494 if (swiotlb_force || 495 !dma_capable(hwdev, dev_addr, sg->length) || 496 range_straddles_page_boundary(paddr, sg->length)) { 497 void *map = swiotlb_tbl_map_single(hwdev, So I think that check is OK. There is no gnttab_dma_map_page call - so that can't be the issue. I did play with this a bit and wrote this little driver (see attached) that forces allocation of large pages and it worked as expected on Xen-SWIOTLB. But while doing this I found that the 'skge' driver is busted - it does not even work on baremetal if you do 'iommu=soft swiotlb=force'. Since Xen-SWIOTLB would occasionaly use the bounce-buffer - and with greater than 0-page order - the bug in skge became more obvious. I hadn't narrowed down where the issue is with skge. > > Jan --5mCyUwZo2JvN/JJP Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="dma_test.c" #include #include #include #include #include #include #include #define DMA_TEST "0.1" MODULE_AUTHOR("Konrad Rzeszutek Wilk "); MODULE_DESCRIPTION("dma_test"); MODULE_LICENSE("GPL"); MODULE_VERSION(DMA_TEST); static struct bus_type fallback_bus_type = { .name = "fallback_bus:", }; static void fake_release(struct device *dev) { /* No kfree as the device was allocated on stack. */ } struct args { int len; enum dma_data_direction dir; }; #define MAGIC_DEVICE 0xffffffdd #define MAGIC_CPU 0xffffffcc static int dma_test_thread(void *arg) { struct page *page; dma_addr_t dma_addr = 0; struct device fake = { .coherent_dma_mask = DMA_BIT_MASK(32), .bus = &fallback_bus_type, .release = fake_release, }; gfp_t gfp = __GFP_COMP | __GFP_NOWARN | GFP_ATOMIC; int ret; int i; void *addr; struct page *p; struct args *args = (struct args *)arg; int dir = args->dir; int len = args->len; dev_set_name(&fake, "%s", dir == DMA_TO_DEVICE ? "to_dev" : "to_cpu"); fake.dma_mask = &fake.coherent_dma_mask; ret = device_register(&fake); if (ret) goto out; do { unsigned long prev_mfn = 0; bool bus_and_dma_same; page = alloc_pages(gfp, get_order(len)); p = page; /* Check that the bus addresses are contingous. */ for (i = 0; i < len / PAGE_SIZE; i++, p++) { unsigned long pfn, mfn; addr = page_address(p); pfn = PFN_DOWN(virt_to_phys(addr)); if (xen_domain()) mfn = pfn_to_mfn(pfn); else mfn = pfn; if (i != 0) { if (prev_mfn + 1 != mfn) dev_warn(&fake, "va: %lx (pfn:%lx, mfn:%lx) w.r.t prev mfn: %lx!\n", (unsigned long)addr, pfn, mfn, prev_mfn); } prev_mfn = mfn; } dma_addr = dma_map_page(&fake, page, 0 /* no offset */, len, dir); /* Note, dma_addr is the physical address ! */ if (dma_mapping_error(&fake, dma_addr)) { dev_warn(&fake, "DMA %lx for %lx is not right\n", (unsigned long)dma_addr, (unsigned long)page_address(page)); __free_pages(page, get_order(len)); page = NULL; } bus_and_dma_same = false; if (page) { unsigned long phys; unsigned long pfn, mfn, bus_addr_mfn; unsigned long bus_addr = 0; p = page; for (i = 0; i < len / PAGE_SIZE; i++, p++) { void *bus_va; addr = page_address(p); phys = virt_to_phys(addr); pfn = PFN_DOWN(phys); bus_va = (void *)(dma_addr + (i * PAGE_SIZE)); if (xen_domain()) { void * tmp; /* Find the bus frame for the physical frame*/ mfn = pfn_to_mfn(pfn); /* and .. voodoo time! */ bus_addr_mfn = PFN_DOWN(dma_addr + (i * PAGE_SIZE)); bus_addr = PFN_PHYS(mfn_to_pfn(bus_addr_mfn)); tmp = __va(bus_addr); bus_va = mfn_to_virt(bus_addr_mfn); WARN(bus_va != tmp, "Expected %lx (%lx+%d*PAGE_SIZE), got: %lx (pfn: %lx, mfn: %lx)!\n", (unsigned long)bus_va, (unsigned long)dma_addr, i, (unsigned long)tmp, PFN_DOWN(bus_addr), bus_addr_mfn); } else { mfn = pfn; bus_addr = (unsigned long)bus_va; /* Assume DMA addr == physical addr */ bus_addr_mfn = PFN_DOWN(bus_addr); bus_va = __va(PFN_PHYS(bus_addr_mfn)); } dev_info(&fake, "%lx (pfn:%lx, bus frame: %lx) %s %lx (addr: %lx, frame: %lx)\n", (unsigned long)addr, pfn, mfn, dir == DMA_TO_DEVICE ? "=>" : "<=", (unsigned long)bus_va, bus_addr, bus_addr_mfn); if (!virt_addr_valid(bus_va)) break; if (!virt_addr_valid(addr)) break; /* CPU */ memset(addr, 0xCC, PAGE_SIZE); /* Device */ memset(bus_va, 0xDD, PAGE_SIZE); if (addr == bus_va) bus_and_dma_same = true; } } set_current_state(TASK_INTERRUPTIBLE); schedule_timeout_interruptible(5*HZ); if (!page) continue; p = page; for (i = 0; i < len / PAGE_SIZE; i++, p++) { if (bus_and_dma_same) continue; addr = page_address(p); if (((char *)addr)[0] != MAGIC_CPU) dev_warn(&fake, "%lx with DMA (%lx) has %x (expected %lx)\n", (unsigned long)addr, (unsigned long)(dma_addr + (i * PAGE_SIZE)), ((char *)addr)[0], (unsigned long)MAGIC_CPU); } /* sync the page */ dma_sync_single_for_cpu(&fake, dma_addr, len, dir); p = page; for (i = 0; i < len / PAGE_SIZE; i++, p++) { unsigned long check_val = MAGIC_DEVICE; addr = page_address(p); if (dir == DMA_TO_DEVICE) check_val = MAGIC_CPU; if (dir == DMA_FROM_DEVICE) check_val = MAGIC_DEVICE; dev_info(&fake, "%lx with DMA (%lx) has %x (expected %lx)\n", (unsigned long)addr, (unsigned long)(dma_addr + (i * PAGE_SIZE)), ((char *)addr)[0], check_val); } dma_unmap_page(&fake, dma_addr, len, dir); dma_addr = 0; __free_pages(page, get_order(len)); page = NULL; } while (!kthread_should_stop()); if (dma_addr) dma_unmap_page(&fake, dma_addr, len, dir); if (page) __free_pages(page, get_order(len)); put_device(&fake); device_unregister(&fake); out: return 0; } static struct task_struct *t[2]; static struct args a[2]; static int __init dma_test_init(void) { int ret; /* No point doing this without SWIOTLB */ if (!swiotlb_nr_tbl()) return -ENODEV; ret = bus_register(&fallback_bus_type); if (ret) return ret; a[0].dir = DMA_TO_DEVICE; a[0].len = 32768; t[0] = kthread_run(dma_test_thread, &a[0], "dma_test_dev"); a[1].len = 16384; a[1].dir = DMA_FROM_DEVICE; t[1] = kthread_run(dma_test_thread, &a[1], "dma_test_cpu"); return 0; } static void __exit dma_test_exit(void) { if (t[0]) kthread_stop(t[0]); if (t[1]) kthread_stop(t[1]); bus_unregister(&fallback_bus_type); } module_init(dma_test_init); module_exit(dma_test_exit); --5mCyUwZo2JvN/JJP Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --5mCyUwZo2JvN/JJP--