From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org
Cc: "Linus Torvalds" <torvalds@linux-foundation.org>,
joro@8bytes.org, "Mel Gorman" <mgorman@suse.de>,
"H. Peter Anvin" <hpa@zytor.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Andrea Arcangeli" <aarcange@redhat.com>,
"Johannes Weiner" <jweiner@redhat.com>,
"Larry Woodman" <lwoodman@redhat.com>,
"Rik van Riel" <riel@redhat.com>,
"Dave Airlie" <airlied@redhat.com>,
"Brendan Conoboy" <blc@redhat.com>,
"Joe Donohue" <jdonohue@redhat.com>,
"Christophe Harle" <charle@nvidia.com>,
"Duncan Poole" <dpoole@nvidia.com>,
"Sherry Cheung" <SCheung@nvidia.com>,
"Subhash Gutti" <sgutti@nvidia.com>,
"John Hubbard" <jhubbard@nvidia.com>,
"Mark Hairgrove" <mhairgrove@nvidia.com>,
"Lucien Dunning" <ldunning@nvidia.com>,
"Cameron Buschardt" <cabuschardt@nvidia.com>,
"Arvind Gopalakrishnan" <arvindg@nvidia.com>,
"Haggai Eran" <haggaie@mellanox.com>,
"Shachar Raindel" <raindel@mellanox.com>,
"Liran Liss" <liranl@mellanox.com>,
"Roland Dreier" <roland@purestorage.com>,
"Ben Sander" <ben.sander@amd.com>,
"Greg Stoner" <Greg.Stoner@amd.com>,
"John Bridgman" <John.Bridgman@amd.com>,
"Michael Mantor" <Michael.Mantor@amd.com>,
"Paul Blinzer" <Paul.Blinzer@amd.com>,
"Leonid Shamis" <Leonid.Shamis@amd.com>,
"Laurent Morichetti" <Laurent.Morichetti@amd.com>,
"Alexander Deucher" <Alexander.Deucher@amd.com>,
"Jérôme Glisse" <jglisse@redhat.com>,
"Jatin Kumar" <jakumar@nvidia.com>
Subject: [PATCH v12 29/29] HMM: add mirror fault support for system to device memory migration v3.
Date: Tue, 8 Mar 2016 15:43:22 -0500 [thread overview]
Message-ID: <1457469802-11850-30-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com>
Migration to device memory is done as a special kind of device mirror
fault. Memory migration being initiated by device driver and never by
HMM (unless it is a migration back to system memory).
Changed since v1:
- Adapt to HMM page table changes.
Changed since v2:
- Fix error code path for migration, calling mm_hmm_migrate_cleanup()
is wrong.
Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
Signed-off-by: Sherry Cheung <SCheung@nvidia.com>
Signed-off-by: Subhash Gutti <sgutti@nvidia.com>
Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jatin Kumar <jakumar@nvidia.com>
---
mm/hmm.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 170 insertions(+)
diff --git a/mm/hmm.c b/mm/hmm.c
index 38943a7..41637a3 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -53,6 +53,10 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
dma_addr_t *dst,
unsigned long start,
unsigned long end);
+static int hmm_mirror_migrate(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ struct vm_area_struct *vma,
+ struct hmm_pt_iter *iter);
static inline int hmm_mirror_update(struct hmm_mirror *mirror,
struct hmm_event *event,
struct page *page);
@@ -101,6 +105,12 @@ static inline int hmm_event_init(struct hmm_event *event,
return 0;
}
+static inline unsigned long hmm_event_npages(const struct hmm_event *event)
+{
+ return (PAGE_ALIGN(event->end) - (event->start & PAGE_MASK)) >>
+ PAGE_SHIFT;
+}
+
/* hmm - core HMM functions.
*
@@ -1255,6 +1265,9 @@ retry:
}
switch (event->etype) {
+ case HMM_COPY_TO_DEVICE:
+ ret = hmm_mirror_migrate(mirror, event, vma, &iter);
+ break;
case HMM_DEVICE_WFAULT:
if (!(vma->vm_flags & VM_WRITE)) {
ret = -EFAULT;
@@ -1392,6 +1405,163 @@ static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
return ret ? ret : r;
}
+static int hmm_mirror_migrate(struct hmm_mirror *mirror,
+ struct hmm_event *event,
+ struct vm_area_struct *vma,
+ struct hmm_pt_iter *iter)
+{
+ struct hmm_device *device = mirror->device;
+ struct hmm *hmm = mirror->hmm;
+ struct hmm_event invalidate;
+ unsigned long addr, npages;
+ struct hmm_mirror *tmp;
+ dma_addr_t *dst;
+ pte_t *save_pte;
+ int r = 0, ret;
+
+ /* Only allow migration of private anonymous memory. */
+ if (vma->vm_ops || unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)))
+ return -EINVAL;
+
+ /*
+ * TODO More advance loop for splitting migration into several chunk.
+ * For now limit the amount that can be migrated in one shot. Also we
+ * would need to see if we need rescheduling if this is happening as
+ * part of system call to the device driver.
+ */
+ npages = hmm_event_npages(event);
+ if (npages * max(sizeof(*dst), sizeof(*save_pte)) > PAGE_SIZE)
+ return -EINVAL;
+ dst = kcalloc(npages, sizeof(*dst), GFP_KERNEL);
+ if (dst == NULL)
+ return -ENOMEM;
+ save_pte = kcalloc(npages, sizeof(*save_pte), GFP_KERNEL);
+ if (save_pte == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mm_hmm_migrate(hmm->mm, vma, save_pte, &event->backoff,
+ &hmm->mmu_notifier, event->start, event->end);
+ if (ret == -EAGAIN)
+ goto out;
+ if (ret)
+ goto out;
+
+ /*
+ * Now invalidate for all other device, note that they can not race
+ * with us as the CPU page table is full of special entry.
+ */
+ hmm_event_init(&invalidate, mirror->hmm, event->start,
+ event->end, HMM_MIGRATE);
+again:
+ down_read(&hmm->rwsem);
+ hlist_for_each_entry(tmp, &hmm->mirrors, mlist) {
+ if (tmp == mirror)
+ continue;
+ if (hmm_mirror_update(tmp, &invalidate, NULL)) {
+ hmm_mirror_ref(tmp);
+ up_read(&hmm->rwsem);
+ hmm_mirror_kill(tmp);
+ hmm_mirror_unref(&tmp);
+ goto again;
+ }
+ }
+ up_read(&hmm->rwsem);
+
+ /*
+ * Populate the mirror page table with saved entry and also mark entry
+ * that can be migrated.
+ */
+ for (addr = event->start; addr < event->end;) {
+ unsigned long i, idx, next = event->end, npages;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_populate(iter, addr, &next);
+ if (!hmm_pte) {
+ ret = -ENOMEM;
+ goto out_cleanup;
+ }
+
+ npages = (next - addr) >> PAGE_SHIFT;
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ hmm_pt_iter_directory_lock(iter);
+ for (i = 0; i < npages; i++, idx++) {
+ hmm_pte_clear_select(&hmm_pte[i]);
+ if (!pte_present(save_pte[idx]))
+ continue;
+ hmm_pte_set_select(&hmm_pte[i]);
+ /* This can not be a valid device entry here. */
+ VM_BUG_ON(hmm_pte_test_valid_dev(&hmm_pte[i]));
+ if (hmm_pte_test_valid_dma(&hmm_pte[i]))
+ continue;
+
+ if (hmm_pte_test_valid_pfn(&hmm_pte[i]))
+ continue;
+
+ hmm_pt_iter_directory_ref(iter);
+ hmm_pte[i] = hmm_pte_from_pfn(pte_pfn(save_pte[idx]));
+ if (pte_write(save_pte[idx]))
+ hmm_pte_set_write(&hmm_pte[i]);
+ hmm_pte_set_select(&hmm_pte[i]);
+ }
+ hmm_pt_iter_directory_unlock(iter);
+
+ if (device->dev) {
+ spinlock_t *lock;
+
+ lock = hmm_pt_iter_directory_lock_ptr(iter);
+ ret = hmm_mirror_dma_map_range(mirror, hmm_pte,
+ lock, npages);
+ /* Keep going only for entry that have been mapped. */
+ if (ret) {
+ for (i = 0; i < npages; ++i) {
+ if (!hmm_pte_test_select(&dst[i]))
+ continue;
+ if (hmm_pte_test_valid_dma(&dst[i]))
+ continue;
+ hmm_pte_clear_select(&hmm_pte[i]);
+ }
+ }
+ }
+ addr = next;
+ }
+
+ /* Now Waldo we can do the copy. */
+ r = device->ops->copy_to_device(mirror, event, vma, dst,
+ event->start, event->end);
+
+ /* Update mirror page table with successfully migrated entry. */
+ for (addr = event->start; addr < event->end;) {
+ unsigned long i, idx, next = event->end, npages;
+ dma_addr_t *hmm_pte;
+
+ hmm_pte = hmm_pt_iter_walk(iter, &addr, &next);
+ if (!hmm_pte)
+ continue;
+ npages = (next - addr) >> PAGE_SHIFT;
+ idx = (addr - event->start) >> PAGE_SHIFT;
+ hmm_pt_iter_directory_lock(iter);
+ for (i = 0; i < npages; i++, idx++) {
+ if (!hmm_pte_test_valid_dev(&dst[idx]))
+ continue;
+
+ VM_BUG_ON(!hmm_pte_test_select(&hmm_pte[i]));
+ hmm_pte[i] = dst[idx];
+ }
+ hmm_pt_iter_directory_unlock(iter);
+ addr = next;
+ }
+
+out_cleanup:
+ mm_hmm_migrate_cleanup(hmm->mm, vma, save_pte, dst,
+ event->start, event->end);
+out:
+ kfree(save_pte);
+ kfree(dst);
+ return ret ? ret : r;
+}
+
/* hmm_mirror_range_discard() - discard a range of address.
*
* @mirror: The mirror struct.
--
2.4.3
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-03-08 19:48 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-08 20:42 HMM (Heterogeneous Memory Management) Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 01/29] mmu_notifier: add event information to address invalidation v9 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 02/29] mmu_notifier: keep track of active invalidation ranges v5 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 03/29] mmu_notifier: pass page pointer to mmu_notifier_invalidate_page() v2 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 04/29] mmu_notifier: allow range invalidation to exclude a specific mmu_notifier Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 05/29] HMM: introduce heterogeneous memory management v5 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 06/29] HMM: add HMM page table v4 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 07/29] HMM: add per mirror " Jérôme Glisse
2016-03-29 22:58 ` John Hubbard
2016-03-08 20:43 ` [PATCH v12 08/29] HMM: add device page fault support v6 Jérôme Glisse
2016-03-23 6:52 ` Aneesh Kumar K.V
2016-03-23 10:09 ` Jerome Glisse
2016-03-23 10:29 ` Aneesh Kumar K.V
2016-03-23 11:25 ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 09/29] HMM: add mm page table iterator helpers Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 10/29] HMM: use CPU page table during invalidation Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 11/29] HMM: add discard range helper (to clear and free resources for a range) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 12/29] HMM: add dirty range helper (toggle dirty bit inside mirror page table) v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 13/29] HMM: DMA map memory on behalf of device driver v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 14/29] HMM: Add support for hugetlb Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 15/29] HMM: add documentation explaining HMM internals and how to use it Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 16/29] fork: pass the dst vma to copy_page_range() and its sub-functions Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 17/29] HMM: add special swap filetype for memory migrated to device v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 18/29] HMM: add new HMM page table flag (valid device memory) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 19/29] HMM: add new HMM page table flag (select flag) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 20/29] HMM: handle HMM device page table entry on mirror page table fault and update Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 21/29] HMM: mm add helper to update page table when migrating memory back v2 Jérôme Glisse
2016-03-21 11:27 ` Aneesh Kumar K.V
2016-03-21 12:02 ` Jerome Glisse
2016-03-21 13:48 ` Aneesh Kumar K.V
2016-03-21 14:30 ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 22/29] HMM: mm add helper to update page table when migrating memory v3 Jérôme Glisse
2016-03-21 14:24 ` Aneesh Kumar K.V
2016-03-08 20:43 ` [PATCH v12 23/29] HMM: new callback for copying memory from and to device memory v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 24/29] HMM: allow to get pointer to spinlock protecting a directory Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 25/29] HMM: split DMA mapping function in two Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 26/29] HMM: add helpers for migration back to system memory v3 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 27/29] HMM: fork copy migrated memory into system memory for child process Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 28/29] HMM: CPU page fault on migrated memory Jérôme Glisse
2016-03-08 20:43 ` Jérôme Glisse [this message]
2016-03-08 22:02 ` HMM (Heterogeneous Memory Management) John Hubbard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457469802-11850-30-git-send-email-jglisse@redhat.com \
--to=jglisse@redhat.com \
--cc=Alexander.Deucher@amd.com \
--cc=Greg.Stoner@amd.com \
--cc=John.Bridgman@amd.com \
--cc=Laurent.Morichetti@amd.com \
--cc=Leonid.Shamis@amd.com \
--cc=Michael.Mantor@amd.com \
--cc=Paul.Blinzer@amd.com \
--cc=SCheung@nvidia.com \
--cc=aarcange@redhat.com \
--cc=airlied@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=arvindg@nvidia.com \
--cc=ben.sander@amd.com \
--cc=blc@redhat.com \
--cc=cabuschardt@nvidia.com \
--cc=charle@nvidia.com \
--cc=dpoole@nvidia.com \
--cc=haggaie@mellanox.com \
--cc=hpa@zytor.com \
--cc=jakumar@nvidia.com \
--cc=jdonohue@redhat.com \
--cc=jhubbard@nvidia.com \
--cc=joro@8bytes.org \
--cc=jweiner@redhat.com \
--cc=ldunning@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=liranl@mellanox.com \
--cc=lwoodman@redhat.com \
--cc=mgorman@suse.de \
--cc=mhairgrove@nvidia.com \
--cc=peterz@infradead.org \
--cc=raindel@mellanox.com \
--cc=riel@redhat.com \
--cc=roland@purestorage.com \
--cc=sgutti@nvidia.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).