linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Jérôme Glisse" <jglisse@redhat.com>
To: akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: "Linus Torvalds" <torvalds@linux-foundation.org>,
	joro@8bytes.org, "Mel Gorman" <mgorman@suse.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Andrea Arcangeli" <aarcange@redhat.com>,
	"Johannes Weiner" <jweiner@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Dave Airlie" <airlied@redhat.com>,
	"Brendan Conoboy" <blc@redhat.com>,
	"Joe Donohue" <jdonohue@redhat.com>,
	"Christophe Harle" <charle@nvidia.com>,
	"Duncan Poole" <dpoole@nvidia.com>,
	"Sherry Cheung" <SCheung@nvidia.com>,
	"Subhash Gutti" <sgutti@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	"Mark Hairgrove" <mhairgrove@nvidia.com>,
	"Lucien Dunning" <ldunning@nvidia.com>,
	"Cameron Buschardt" <cabuschardt@nvidia.com>,
	"Arvind Gopalakrishnan" <arvindg@nvidia.com>,
	"Haggai Eran" <haggaie@mellanox.com>,
	"Shachar Raindel" <raindel@mellanox.com>,
	"Liran Liss" <liranl@mellanox.com>,
	"Roland Dreier" <roland@purestorage.com>,
	"Ben Sander" <ben.sander@amd.com>,
	"Greg Stoner" <Greg.Stoner@amd.com>,
	"John Bridgman" <John.Bridgman@amd.com>,
	"Michael Mantor" <Michael.Mantor@amd.com>,
	"Paul Blinzer" <Paul.Blinzer@amd.com>,
	"Leonid Shamis" <Leonid.Shamis@amd.com>,
	"Laurent Morichetti" <Laurent.Morichetti@amd.com>,
	"Alexander Deucher" <Alexander.Deucher@amd.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Jatin Kumar" <jakumar@nvidia.com>
Subject: [PATCH v12 26/29] HMM: add helpers for migration back to system memory v3.
Date: Tue,  8 Mar 2016 15:43:19 -0500	[thread overview]
Message-ID: <1457469802-11850-27-git-send-email-jglisse@redhat.com> (raw)
In-Reply-To: <1457469802-11850-1-git-send-email-jglisse@redhat.com>

This patch add all necessary functions and helpers for migration
from device memory back to system memory. They are 3 differents
case that would use that code :
  - CPU page fault
  - fork
  - device driver request

Note that this patch use regular memory accounting this means that
migration can fail as a result of memory cgroup resource exhaustion.
Latter patches will modify memcg to allow to keep remote memory
accounted as regular memory thus removing this point of failure.

Changed since v1:
  - Fixed logic in dma unmap code path on migration error.

Changed since v2:
  - Adapt to HMM page table changes.
  - Fix bug in migration failure code path.

Signed-off-by: JA(C)rA'me Glisse <jglisse@redhat.com>
Signed-off-by: Sherry Cheung <SCheung@nvidia.com>
Signed-off-by: Subhash Gutti <sgutti@nvidia.com>
Signed-off-by: Mark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Jatin Kumar <jakumar@nvidia.com>
---
 mm/hmm.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/mm/hmm.c b/mm/hmm.c
index 07f1ab6..435e376 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -47,6 +47,12 @@
 
 static struct mmu_notifier_ops hmm_notifier_ops;
 static void hmm_mirror_kill(struct hmm_mirror *mirror);
+static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
+				   struct hmm_event *event,
+				   pte_t *new_pte,
+				   dma_addr_t *dst,
+				   unsigned long start,
+				   unsigned long end);
 static inline int hmm_mirror_update(struct hmm_mirror *mirror,
 				    struct hmm_event *event,
 				    struct page *page);
@@ -421,6 +427,46 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
 };
 
 
+static int hmm_migrate_back(struct hmm *hmm,
+			    struct hmm_event *event,
+			    struct mm_struct *mm,
+			    struct vm_area_struct *vma,
+			    pte_t *new_pte,
+			    dma_addr_t *dst,
+			    unsigned long start,
+			    unsigned long end)
+{
+	struct hmm_mirror *mirror;
+	int r, ret;
+
+	/*
+	 * Do not return right away on error, as there might be valid page we
+	 * can migrate.
+	 */
+	ret = mm_hmm_migrate_back(mm, vma, new_pte, start, end);
+
+again:
+	down_read(&hmm->rwsem);
+	hlist_for_each_entry(mirror, &hmm->mirrors, mlist) {
+		r = hmm_mirror_migrate_back(mirror, event, new_pte,
+					    dst, start, end);
+		if (r) {
+			ret = ret ? ret : r;
+			mirror = hmm_mirror_ref(mirror);
+			BUG_ON(!mirror);
+			up_read(&hmm->rwsem);
+			hmm_mirror_kill(mirror);
+			hmm_mirror_unref(&mirror);
+			goto again;
+		}
+	}
+	up_read(&hmm->rwsem);
+
+	mm_hmm_migrate_back_cleanup(mm, vma, new_pte, dst, start, end);
+
+	return ret;
+}
+
 int hmm_handle_cpu_fault(struct mm_struct *mm,
 			struct vm_area_struct *vma,
 			pmd_t *pmdp, unsigned long addr,
@@ -1153,6 +1199,111 @@ out:
 }
 EXPORT_SYMBOL(hmm_mirror_fault);
 
+static int hmm_mirror_migrate_back(struct hmm_mirror *mirror,
+				   struct hmm_event *event,
+				   pte_t *new_pte,
+				   dma_addr_t *dst,
+				   unsigned long start,
+				   unsigned long end)
+{
+	unsigned long addr, i, npages = (end - start) >> PAGE_SHIFT;
+	struct hmm_device *device = mirror->device;
+	struct device *dev = mirror->device->dev;
+	struct hmm_pt_iter iter;
+	int r, ret = 0;
+
+	hmm_pt_iter_init(&iter, &mirror->pt);
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
+		unsigned long next = end;
+		dma_addr_t *hmm_pte;
+
+		hmm_pte_clear_select(&dst[i]);
+
+		if (!pte_present(new_pte[i]))
+			continue;
+		hmm_pte = hmm_pt_iter_lookup(&iter, addr, &next);
+		if (!hmm_pte)
+			continue;
+
+		if (!hmm_pte_test_valid_dev(hmm_pte))
+			continue;
+
+		dst[i] = hmm_pte_from_pfn(pte_pfn(new_pte[i]));
+		hmm_pte_set_select(&dst[i]);
+		hmm_pte_set_write(&dst[i]);
+	}
+
+	if (dev) {
+		ret = hmm_mirror_dma_map_range(mirror, dst, NULL, npages);
+		if (ret) {
+			for (i = 0; i < npages; ++i) {
+				if (!hmm_pte_test_select(&dst[i]))
+					continue;
+				if (hmm_pte_test_valid_dma(&dst[i]))
+					continue;
+				dst[i] = 0;
+			}
+		}
+	}
+
+	r = device->ops->copy_from_device(mirror, event, dst, start, end);
+
+	/* Update mirror page table with successfully migrated entry. */
+	for (addr = start; addr < end;) {
+		unsigned long idx, next = end, npages;
+		dma_addr_t *hmm_pte;
+
+		hmm_pte = hmm_pt_iter_walk(&iter, &addr, &next);
+		if (!hmm_pte)
+			continue;
+		idx = (addr - event->start) >> PAGE_SHIFT;
+		npages = (next - addr) >> PAGE_SHIFT;
+		hmm_pt_iter_directory_lock(&iter);
+		for (i = 0; i < npages; i++, idx++) {
+			if (!hmm_pte_test_valid_pfn(&dst[idx]) &&
+			    !hmm_pte_test_valid_dma(&dst[idx])) {
+				if (hmm_pte_test_valid_dev(&hmm_pte[i])) {
+					hmm_pte[i] = 0;
+					hmm_pt_iter_directory_unref(&iter);
+				}
+				continue;
+			}
+
+			VM_BUG_ON(!hmm_pte_test_select(&dst[idx]));
+			VM_BUG_ON(!hmm_pte_test_valid_dev(&hmm_pte[i]));
+			hmm_pte[i] = dst[idx];
+		}
+		hmm_pt_iter_directory_unlock(&iter);
+
+		/* DMA unmap failed migrate entry. */
+		if (dev) {
+			idx = (addr - event->start) >> PAGE_SHIFT;
+			for (i = 0; i < npages; i++, idx++) {
+				dma_addr_t dma_addr;
+
+				/*
+				 * Failed entry have the valid bit clear but
+				 * the select bit remain set.
+				 */
+				if (!hmm_pte_test_select(&dst[idx]) ||
+				    hmm_pte_test_valid_dma(&dst[i]))
+					continue;
+
+				hmm_pte_set_valid_dma(&dst[idx]);
+				dma_addr = hmm_pte_dma_addr(dst[idx]);
+				dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+					       DMA_BIDIRECTIONAL);
+				dst[idx] = 0;
+			}
+		}
+
+		addr = next;
+	}
+	hmm_pt_iter_fini(&iter);
+
+	return ret ? ret : r;
+}
+
 /* hmm_mirror_range_discard() - discard a range of address.
  *
  * @mirror: The mirror struct.
-- 
2.4.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-03-08 19:47 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-08 20:42 HMM (Heterogeneous Memory Management) Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 01/29] mmu_notifier: add event information to address invalidation v9 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 02/29] mmu_notifier: keep track of active invalidation ranges v5 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 03/29] mmu_notifier: pass page pointer to mmu_notifier_invalidate_page() v2 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 04/29] mmu_notifier: allow range invalidation to exclude a specific mmu_notifier Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 05/29] HMM: introduce heterogeneous memory management v5 Jérôme Glisse
2016-03-08 20:42 ` [PATCH v12 06/29] HMM: add HMM page table v4 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 07/29] HMM: add per mirror " Jérôme Glisse
2016-03-29 22:58   ` John Hubbard
2016-03-08 20:43 ` [PATCH v12 08/29] HMM: add device page fault support v6 Jérôme Glisse
2016-03-23  6:52   ` Aneesh Kumar K.V
2016-03-23 10:09     ` Jerome Glisse
2016-03-23 10:29       ` Aneesh Kumar K.V
2016-03-23 11:25         ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 09/29] HMM: add mm page table iterator helpers Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 10/29] HMM: use CPU page table during invalidation Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 11/29] HMM: add discard range helper (to clear and free resources for a range) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 12/29] HMM: add dirty range helper (toggle dirty bit inside mirror page table) v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 13/29] HMM: DMA map memory on behalf of device driver v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 14/29] HMM: Add support for hugetlb Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 15/29] HMM: add documentation explaining HMM internals and how to use it Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 16/29] fork: pass the dst vma to copy_page_range() and its sub-functions Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 17/29] HMM: add special swap filetype for memory migrated to device v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 18/29] HMM: add new HMM page table flag (valid device memory) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 19/29] HMM: add new HMM page table flag (select flag) Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 20/29] HMM: handle HMM device page table entry on mirror page table fault and update Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 21/29] HMM: mm add helper to update page table when migrating memory back v2 Jérôme Glisse
2016-03-21 11:27   ` Aneesh Kumar K.V
2016-03-21 12:02     ` Jerome Glisse
2016-03-21 13:48       ` Aneesh Kumar K.V
2016-03-21 14:30         ` Jerome Glisse
2016-03-08 20:43 ` [PATCH v12 22/29] HMM: mm add helper to update page table when migrating memory v3 Jérôme Glisse
2016-03-21 14:24   ` Aneesh Kumar K.V
2016-03-08 20:43 ` [PATCH v12 23/29] HMM: new callback for copying memory from and to device memory v2 Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 24/29] HMM: allow to get pointer to spinlock protecting a directory Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 25/29] HMM: split DMA mapping function in two Jérôme Glisse
2016-03-08 20:43 ` Jérôme Glisse [this message]
2016-03-08 20:43 ` [PATCH v12 27/29] HMM: fork copy migrated memory into system memory for child process Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 28/29] HMM: CPU page fault on migrated memory Jérôme Glisse
2016-03-08 20:43 ` [PATCH v12 29/29] HMM: add mirror fault support for system to device memory migration v3 Jérôme Glisse
2016-03-08 22:02 ` HMM (Heterogeneous Memory Management) John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457469802-11850-27-git-send-email-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Greg.Stoner@amd.com \
    --cc=John.Bridgman@amd.com \
    --cc=Laurent.Morichetti@amd.com \
    --cc=Leonid.Shamis@amd.com \
    --cc=Michael.Mantor@amd.com \
    --cc=Paul.Blinzer@amd.com \
    --cc=SCheung@nvidia.com \
    --cc=aarcange@redhat.com \
    --cc=airlied@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=arvindg@nvidia.com \
    --cc=ben.sander@amd.com \
    --cc=blc@redhat.com \
    --cc=cabuschardt@nvidia.com \
    --cc=charle@nvidia.com \
    --cc=dpoole@nvidia.com \
    --cc=haggaie@mellanox.com \
    --cc=hpa@zytor.com \
    --cc=jakumar@nvidia.com \
    --cc=jdonohue@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=jweiner@redhat.com \
    --cc=ldunning@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liranl@mellanox.com \
    --cc=lwoodman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mhairgrove@nvidia.com \
    --cc=peterz@infradead.org \
    --cc=raindel@mellanox.com \
    --cc=riel@redhat.com \
    --cc=roland@purestorage.com \
    --cc=sgutti@nvidia.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).