Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 34/34] fs/binfmt_elf: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: Ira Weiny <ira.weiny@intel.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

get_dump_page calls get_user_page so put_user_page must be used
to match.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 fs/binfmt_elf.c       | 2 +-
 fs/binfmt_elf_fdpic.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d4e11b2e04f6..92e4a5ca99d8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2377,7 +2377,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 				void *kaddr = kmap(page);
 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				put_page(page);
+				put_user_page(page);
 			} else
 				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d86ebd0dcc3d..321724b3be22 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1511,7 +1511,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
 				void *kaddr = kmap(page);
 				res = dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				put_page(page);
+				put_user_page(page);
 			} else {
 				res = dump_skip(cprm, PAGE_SIZE);
 			}
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 31/34] fs/nfs: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Calum Mackay, Trond Myklebust,
	Anna Schumaker
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Calum Mackay <calum.mackay@oracle.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: linux-nfs@vger.kernel.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 fs/nfs/direct.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0cb442406168..c0c1b9f2c069 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -276,13 +276,6 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	return nfs_file_direct_write(iocb, iter);
 }
 
-static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
-{
-	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
-}
-
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 			      struct nfs_direct_req *dreq)
 {
@@ -512,7 +505,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		put_user_pages(pagevec, npages);
 		kvfree(pagevec);
 		if (result < 0)
 			break;
@@ -935,7 +928,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		put_user_pages(pagevec, npages);
 		kvfree(pagevec);
 		if (result < 0)
 			break;
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 33/34] kernel/events/core.c: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Alexander Shishkin, Jiri Olsa,
	Namhyung Kim
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..7be52bbbfe87 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6426,7 +6426,7 @@ static u64 perf_virt_to_phys(u64 virt)
 			phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
 
 		if (p)
-			put_page(p);
+			put_user_page(p);
 	}
 
 	return phys_addr;
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 24/34] futex: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Thomas Gleixner, Ingo Molnar,
	Peter Zijlstra, Darren Hart
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 kernel/futex.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 6d50728ef2e7..4b4cae58ec57 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -623,7 +623,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 		lock_page(page);
 		shmem_swizzled = PageSwapCache(page) || page->mapping;
 		unlock_page(page);
-		put_page(page);
+		put_user_page(page);
 
 		if (shmem_swizzled)
 			goto again;
@@ -675,7 +675,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 
 		if (READ_ONCE(page->mapping) != mapping) {
 			rcu_read_unlock();
-			put_page(page);
+			put_user_page(page);
 
 			goto again;
 		}
@@ -683,7 +683,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 		inode = READ_ONCE(mapping->host);
 		if (!inode) {
 			rcu_read_unlock();
-			put_page(page);
+			put_user_page(page);
 
 			goto again;
 		}
@@ -702,7 +702,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 		 */
 		if (!atomic_inc_not_zero(&inode->i_count)) {
 			rcu_read_unlock();
-			put_page(page);
+			put_user_page(page);
 
 			goto again;
 		}
@@ -723,7 +723,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 	}
 
 out:
-	put_page(page);
+	put_user_page(page);
 	return err;
 }
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 22/34] orangefs: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Mike Marshall, Martin Brandenburg
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Mike Marshall <hubcap@omnibond.com>
Cc: Martin Brandenburg <martin@omnibond.com>
Cc: devel@lists.orangefs.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 fs/orangefs/orangefs-bufmap.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 2bb916d68576..f2f33a16d604 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -168,10 +168,7 @@ static DEFINE_SPINLOCK(orangefs_bufmap_lock);
 static void
 orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
 {
-	int i;
-
-	for (i = 0; i < bufmap->page_count; i++)
-		put_page(bufmap->page_array[i]);
+	put_user_pages(bufmap->page_array, bufmap->page_count);
 }
 
 static void
@@ -280,7 +277,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
 
 		for (i = 0; i < ret; i++) {
 			SetPageError(bufmap->page_array[i]);
-			put_page(bufmap->page_array[i]);
+			put_user_page(bufmap->page_array[i]);
 		}
 		return -ENOMEM;
 	}
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 16/34] drivers/tee: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Jens Wiklander
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/tee/tee_shm.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 2da026fd12c9..c967d0420b67 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -31,16 +31,13 @@ static void tee_shm_release(struct tee_shm *shm)
 
 		poolm->ops->free(poolm, shm);
 	} else if (shm->flags & TEE_SHM_REGISTER) {
-		size_t n;
 		int rc = teedev->desc->ops->shm_unregister(shm->ctx, shm);
 
 		if (rc)
 			dev_err(teedev->dev.parent,
 				"unregister shm %p failed: %d", shm, rc);
 
-		for (n = 0; n < shm->num_pages; n++)
-			put_page(shm->pages[n]);
-
+		put_user_pages(shm->pages, shm->num_pages);
 		kfree(shm->pages);
 	}
 
@@ -313,16 +310,13 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
 	return shm;
 err:
 	if (shm) {
-		size_t n;
-
 		if (shm->id >= 0) {
 			mutex_lock(&teedev->mutex);
 			idr_remove(&teedev->idr, shm->id);
 			mutex_unlock(&teedev->mutex);
 		}
 		if (shm->pages) {
-			for (n = 0; n < shm->num_pages; n++)
-				put_page(shm->pages[n]);
+			put_user_pages(shm->pages, shm->num_pages);
 			kfree(shm->pages);
 		}
 	}
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 05/34] drm/etnaviv: convert release_pages() to put_user_pages()
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H . Peter Anvin
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Joerg Roedel <joro@8bytes.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index e8778ebb72e6..a0144a5ee325 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -686,7 +686,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 		ret = get_user_pages_fast(ptr, num_pages,
 					  !userptr->ro ? FOLL_WRITE : 0, pages);
 		if (ret < 0) {
-			release_pages(pvec, pinned);
+			put_user_pages(pvec, pinned);
 			kvfree(pvec);
 			return ret;
 		}
@@ -710,7 +710,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
 	if (etnaviv_obj->pages) {
 		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
 
-		release_pages(etnaviv_obj->pages, npages);
+		put_user_pages(etnaviv_obj->pages, npages);
 		kvfree(etnaviv_obj->pages);
 	}
 }
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 03/34] net/ceph: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Jeff Layton, Ilya Dryomov, Sage Weil,
	David S . Miller
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Acked-by: Jeff Layton <jlayton@kernel.org>

Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Sage Weil <sage@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: ceph-devel@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 net/ceph/pagevec.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 64305e7056a1..c88fff2ab9bd 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -12,13 +12,7 @@
 
 void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 {
-	int i;
-
-	for (i = 0; i < num_pages; i++) {
-		if (dirty)
-			set_page_dirty_lock(pages[i]);
-		put_page(pages[i]);
-	}
+	put_user_pages_dirty_lock(pages, num_pages, dirty);
 	kvfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 02/34] net/rds: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Santosh Shilimkar, David S . Miller
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: rds-devel@oss.oracle.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 net/rds/info.c    |  5 ++---
 net/rds/message.c |  2 +-
 net/rds/rdma.c    | 15 +++++++--------
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/net/rds/info.c b/net/rds/info.c
index 03f6fd56d237..ca6af2889adf 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -162,7 +162,6 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
 	struct rds_info_lengths lens;
 	unsigned long nr_pages = 0;
 	unsigned long start;
-	unsigned long i;
 	rds_info_func func;
 	struct page **pages = NULL;
 	int ret;
@@ -235,8 +234,8 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
 		ret = -EFAULT;
 
 out:
-	for (i = 0; pages && i < nr_pages; i++)
-		put_page(pages[i]);
+	if (pages)
+		put_user_pages(pages, nr_pages);
 	kfree(pages);
 
 	return ret;
diff --git a/net/rds/message.c b/net/rds/message.c
index 50f13f1d4ae0..d7b0d266c437 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -404,7 +404,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
 			int i;
 
 			for (i = 0; i < rm->data.op_nents; i++)
-				put_page(sg_page(&rm->data.op_sg[i]));
+				put_user_page(sg_page(&rm->data.op_sg[i]));
 			mmp = &rm->data.op_mmp_znotifier->z_mmp;
 			mm_unaccount_pinned_pages(mmp);
 			ret = -EFAULT;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 916f5ec373d8..6762e8696b99 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -162,8 +162,7 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
 				  pages);
 
 	if (ret >= 0 && ret < nr_pages) {
-		while (ret--)
-			put_page(pages[ret]);
+		put_user_pages(pages, ret);
 		ret = -EFAULT;
 	}
 
@@ -276,7 +275,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
 
 	if (IS_ERR(trans_private)) {
 		for (i = 0 ; i < nents; i++)
-			put_page(sg_page(&sg[i]));
+			put_user_page(sg_page(&sg[i]));
 		kfree(sg);
 		ret = PTR_ERR(trans_private);
 		goto out;
@@ -464,9 +463,10 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
 		 * to local memory */
 		if (!ro->op_write) {
 			WARN_ON(!page->mapping && irqs_disabled());
-			set_page_dirty(page);
+			put_user_pages_dirty_lock(&page, 1, true);
+		} else {
+			put_user_page(page);
 		}
-		put_page(page);
 	}
 
 	kfree(ro->op_notifier);
@@ -481,8 +481,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
 	/* Mark page dirty if it was possibly modified, which
 	 * is the case for a RDMA_READ which copies from remote
 	 * to local memory */
-	set_page_dirty(page);
-	put_page(page);
+	put_user_pages_dirty_lock(&page, 1, true);
 
 	kfree(ao->op_notifier);
 	ao->op_notifier = NULL;
@@ -867,7 +866,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 	return ret;
 err:
 	if (page)
-		put_page(page);
+		put_user_page(page);
 	rm->atomic.op_active = 0;
 	kfree(rm->atomic.op_notifier);
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 01/34] mm/gup: add make_dirty arg to put_user_pages_dirty_lock()
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard, Christoph Hellwig, Matthew Wilcox
In-Reply-To: <20190804224915.28669-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

Provide a more capable variation of put_user_pages_dirty_lock(),
and delete put_user_pages_dirty(). This is based on the
following:

1. Lots of call sites become simpler if a bool is passed
into put_user_page*(), instead of making the call site
choose which put_user_page*() variant to call.

2. Christoph Hellwig's observation that set_page_dirty_lock()
is usually correct, and set_page_dirty() is usually a
bug, or at least questionable, within a put_user_page*()
calling chain.

This leads to the following API choices:

    * put_user_pages_dirty_lock(page, npages, make_dirty)

    * There is no put_user_pages_dirty(). You have to
      hand code that, in the rare case that it's
      required.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/infiniband/core/umem.c             |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c    |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |  13 +--
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   5 +-
 drivers/infiniband/sw/siw/siw_mem.c        |  19 +---
 include/linux/mm.h                         |   5 +-
 mm/gup.c                                   | 115 +++++++++------------
 7 files changed, 61 insertions(+), 106 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 08da840ed7ee..965cf9dea71a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 		page = sg_page_iter_page(&sg_iter);
-		if (umem->writable && dirty)
-			put_user_pages_dirty_lock(&page, 1);
-		else
-			put_user_page(page);
+		put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
 	}
 
 	sg_free_table(&umem->sg_head);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index b89a9b9aef7a..469acb961fbd 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty)
 {
-	if (dirty)
-		put_user_pages_dirty_lock(p, npages);
-	else
-		put_user_pages(p, npages);
+	put_user_pages_dirty_lock(p, npages, dirty);
 
 	if (mm) { /* during close after signal, mm can be NULL */
 		atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index bfbfbb7e0ff4..26c1fb8d45cc 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -37,15 +37,6 @@
 
 #include "qib.h"
 
-static void __qib_release_user_pages(struct page **p, size_t num_pages,
-				     int dirty)
-{
-	if (dirty)
-		put_user_pages_dirty_lock(p, num_pages);
-	else
-		put_user_pages(p, num_pages);
-}
-
 /**
  * qib_map_page - a safety wrapper around pci_map_page()
  *
@@ -124,7 +115,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
 	return 0;
 bail_release:
-	__qib_release_user_pages(p, got, 0);
+	put_user_pages_dirty_lock(p, got, false);
 bail:
 	atomic64_sub(num_pages, &current->mm->pinned_vm);
 	return ret;
@@ -132,7 +123,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
 void qib_release_user_pages(struct page **p, size_t num_pages)
 {
-	__qib_release_user_pages(p, num_pages, 1);
+	put_user_pages_dirty_lock(p, num_pages, true);
 
 	/* during close after signal, mm can be NULL */
 	if (current->mm)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..62e6ffa9ad78 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
 			page = sg_page(sg);
 			pa = sg_phys(sg);
-			if (dirty)
-				put_user_pages_dirty_lock(&page, 1);
-			else
-				put_user_page(page);
+			put_user_pages_dirty_lock(&page, 1, dirty);
 			usnic_dbg("pa: %pa\n", &pa);
 		}
 		kfree(chunk);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index 67171c82b0c4..1e197753bf2f 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -60,20 +60,6 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
 	return NULL;
 }
 
-static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
-			   bool dirty)
-{
-	struct page **p = chunk->plist;
-
-	while (num_pages--) {
-		if (!PageDirty(*p) && dirty)
-			put_user_pages_dirty_lock(p, 1);
-		else
-			put_user_page(*p);
-		p++;
-	}
-}
-
 void siw_umem_release(struct siw_umem *umem, bool dirty)
 {
 	struct mm_struct *mm_s = umem->owning_mm;
@@ -82,8 +68,9 @@ void siw_umem_release(struct siw_umem *umem, bool dirty)
 	for (i = 0; num_pages; i++) {
 		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
 
-		siw_free_plist(&umem->page_chunk[i], to_free,
-			       umem->writable && dirty);
+		put_user_pages_dirty_lock(umem->page_chunk[i].plist,
+					  to_free,
+					  umem->writable && dirty);
 		kfree(umem->page_chunk[i].plist);
 		num_pages -= to_free;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..9759b6a24420 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1057,8 +1057,9 @@ static inline void put_user_page(struct page *page)
 	put_page(page);
 }
 
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty);
+
 void put_user_pages(struct page **pages, unsigned long npages);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bac..7fefd7ab02c4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
 	unsigned int page_mask;
 };
 
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-				   unsigned long npages,
-				   set_dirty_func_t sdf)
-{
-	unsigned long index;
-
-	for (index = 0; index < npages; index++) {
-		struct page *page = compound_head(pages[index]);
-
-		/*
-		 * Checking PageDirty at this point may race with
-		 * clear_page_dirty_for_io(), but that's OK. Two key cases:
-		 *
-		 * 1) This code sees the page as already dirty, so it skips
-		 * the call to sdf(). That could happen because
-		 * clear_page_dirty_for_io() called page_mkclean(),
-		 * followed by set_page_dirty(). However, now the page is
-		 * going to get written back, which meets the original
-		 * intention of setting it dirty, so all is well:
-		 * clear_page_dirty_for_io() goes on to call
-		 * TestClearPageDirty(), and write the page back.
-		 *
-		 * 2) This code sees the page as clean, so it calls sdf().
-		 * The page stays dirty, despite being written back, so it
-		 * gets written back again in the next writeback cycle.
-		 * This is harmless.
-		 */
-		if (!PageDirty(page))
-			sdf(page);
-
-		put_user_page(page);
-	}
-}
-
 /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
  * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
  *
  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
  * variants called on that page.
  *
  * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
  *
  * Please see the put_user_page() documentation for details.
  *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
  *
  */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty)
 {
-	__put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+	unsigned long index;
 
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-	__put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+	/*
+	 * TODO: this can be optimized for huge pages: if a series of pages is
+	 * physically contiguous and part of the same compound page, then a
+	 * single operation to the head page should suffice.
+	 */
+
+	if (!make_dirty) {
+		put_user_pages(pages, npages);
+		return;
+	}
+
+	for (index = 0; index < npages; index++) {
+		struct page *page = compound_head(pages[index]);
+		/*
+		 * Checking PageDirty at this point may race with
+		 * clear_page_dirty_for_io(), but that's OK. Two key
+		 * cases:
+		 *
+		 * 1) This code sees the page as already dirty, so it
+		 * skips the call to set_page_dirty(). That could happen
+		 * because clear_page_dirty_for_io() called
+		 * page_mkclean(), followed by set_page_dirty().
+		 * However, now the page is going to get written back,
+		 * which meets the original intention of setting it
+		 * dirty, so all is well: clear_page_dirty_for_io() goes
+		 * on to call TestClearPageDirty(), and write the page
+		 * back.
+		 *
+		 * 2) This code sees the page as clean, so it calls
+		 * set_page_dirty(). The page stays dirty, despite being
+		 * written back, so it gets written back again in the
+		 * next writeback cycle. This is harmless.
+		 */
+		if (!PageDirty(page))
+			set_page_dirty_lock(page);
+		put_user_page(page);
+	}
 }
 EXPORT_SYMBOL(put_user_pages_dirty_lock);
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH v2 00/34] put_user_pages(): miscellaneous call sites
From: john.hubbard @ 2019-08-04 22:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
	Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
	LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
	kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
	linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
	linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
	xen-devel, John Hubbard

From: John Hubbard <jhubbard@nvidia.com>

Changes since v1:

* 9 out of 34 patches have been reviewed or ack'd or changed:
    * Picked up Keith's Reviewed-by for patch 26 (gup_benchmark).
    * Picked up ACKs for patches 3, 10, 15, 16 (ceph, genwqe,
      staging/vc04_services, drivers/tee).

* Patch 6 (i915): adjusted drivers/gpu/drm/i915/gem/i915_gem_userptr.c to
  match the latest linux.git: the code has already been fixed in linux.git,
  as of the latest -rc, to do a set_page_dirty_lock(), instead of
  set_page_dirty(). So all that it needs now is a conversion to
  put_user_page(). I've done that in a way (avoiding the changed API call)
  that allows patch 6 to go up via either Andrew's -mm tree, or the drm
  tree, just in case. See that patch's comments for slightly more detail.

* Patch 20 (xen): applied Juergen's recommended fix, and speculatively
  (pending his approval) added his Signed-off-by (also noted in the patch
  comments).

* Improved patch 31 (NFS) as recommended by Calum Mackay.

* Includes the latest version of patch 1. (Patch 1 has been separately
  reposted [3], with those updates. And it's included here in order to
  make this series apply directly to linux.git, as noted in the original
  cover letter below.)

Cover letter from v1:

These are best characterized as miscellaneous conversions: many (not all)
call sites that don't involve biovec or iov_iter, nor mm/. It also leaves
out a few call sites that require some more work. These are mostly pretty
simple ones.

It's probably best to send all of these via Andrew's -mm tree, assuming
that there are no significant merge conflicts with ongoing work in other
trees (which I doubt, given that these are small changes).

These patches apply to the latest linux.git. Patch #1 is also already in
Andrew's tree, but given the broad non-linux-mm Cc list, I thought it
would be more convenient to just include that patch here, so that people
can use linux.git as the base--even though these are probably destined
for linux-mm.

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions"). That commit
has an extensive description of the problem and the planned steps to
solve it, but the highlites are:

1) Provide put_user_page*() routines, intended to be used
for releasing pages that were pinned via get_user_pages*().

2) Convert all of the call sites for get_user_pages*(), to
invoke put_user_page*(), instead of put_page(). This involves dozens of
call sites, and will take some time.

3) After (2) is complete, use get_user_pages*() and put_user_page*() to
implement tracking of these pages. This tracking will be separate from
the existing struct page refcounting.

4) Use the tracking and identification of these pages, to implement
special handling (especially in writeback paths) when the pages are
backed by a filesystem.

And a few references, also from that commit:

[1] https://lwn.net/Articles/774411/ : "DMA and get_user_pages()"
[2] https://lwn.net/Articles/753027/ : "The Trouble with get_user_pages()"

[3] "mm/gup: add make_dirty arg to put_user_pages_dirty_lock()"
    https://lore.kernel.org/r/20190804214042.4564-1-jhubbard@nvidia.com

Ira Weiny (1):
  fs/binfmt_elf: convert put_page() to put_user_page*()

John Hubbard (33):
  mm/gup: add make_dirty arg to put_user_pages_dirty_lock()
  net/rds: convert put_page() to put_user_page*()
  net/ceph: convert put_page() to put_user_page*()
  x86/kvm: convert put_page() to put_user_page*()
  drm/etnaviv: convert release_pages() to put_user_pages()
  drm/i915: convert put_page() to put_user_page*()
  drm/radeon: convert put_page() to put_user_page*()
  media/ivtv: convert put_page() to put_user_page*()
  media/v4l2-core/mm: convert put_page() to put_user_page*()
  genwqe: convert put_page() to put_user_page*()
  scif: convert put_page() to put_user_page*()
  vmci: convert put_page() to put_user_page*()
  rapidio: convert put_page() to put_user_page*()
  oradax: convert put_page() to put_user_page*()
  staging/vc04_services: convert put_page() to put_user_page*()
  drivers/tee: convert put_page() to put_user_page*()
  vfio: convert put_page() to put_user_page*()
  fbdev/pvr2fb: convert put_page() to put_user_page*()
  fsl_hypervisor: convert put_page() to put_user_page*()
  fs/exec.c: convert put_page() to put_user_page*()
  xen: convert put_page() to put_user_page*()
  orangefs: convert put_page() to put_user_page*()
  uprobes: convert put_page() to put_user_page*()
  futex: convert put_page() to put_user_page*()
  mm/frame_vector.c: convert put_page() to put_user_page*()
  mm/gup_benchmark.c: convert put_page() to put_user_page*()
  mm/memory.c: convert put_page() to put_user_page*()
  mm/madvise.c: convert put_page() to put_user_page*()
  mm/process_vm_access.c: convert put_page() to put_user_page*()
  crypt: convert put_page() to put_user_page*()
  fs/nfs: convert put_page() to put_user_page*()
  goldfish_pipe: convert put_page() to put_user_page*()
  kernel/events/core.c: convert put_page() to put_user_page*()

 arch/x86/kvm/svm.c                            |   4 +-
 crypto/af_alg.c                               |   7 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c         |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   6 +-
 drivers/gpu/drm/radeon/radeon_ttm.c           |   2 +-
 drivers/infiniband/core/umem.c                |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c       |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c    |  13 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c      |   5 +-
 drivers/infiniband/sw/siw/siw_mem.c           |  19 +--
 drivers/media/pci/ivtv/ivtv-udma.c            |  14 +--
 drivers/media/pci/ivtv/ivtv-yuv.c             |  11 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c     |   3 +-
 drivers/misc/genwqe/card_utils.c              |  17 +--
 drivers/misc/mic/scif/scif_rma.c              |  17 ++-
 drivers/misc/vmw_vmci/vmci_context.c          |   2 +-
 drivers/misc/vmw_vmci/vmci_queue_pair.c       |  11 +-
 drivers/platform/goldfish/goldfish_pipe.c     |   9 +-
 drivers/rapidio/devices/rio_mport_cdev.c      |   9 +-
 drivers/sbus/char/oradax.c                    |   2 +-
 .../interface/vchiq_arm/vchiq_2835_arm.c      |  10 +-
 drivers/tee/tee_shm.c                         |  10 +-
 drivers/vfio/vfio_iommu_type1.c               |   8 +-
 drivers/video/fbdev/pvr2fb.c                  |   3 +-
 drivers/virt/fsl_hypervisor.c                 |   7 +-
 drivers/xen/privcmd.c                         |  32 ++---
 fs/binfmt_elf.c                               |   2 +-
 fs/binfmt_elf_fdpic.c                         |   2 +-
 fs/exec.c                                     |   2 +-
 fs/nfs/direct.c                               |  11 +-
 fs/orangefs/orangefs-bufmap.c                 |   7 +-
 include/linux/mm.h                            |   5 +-
 kernel/events/core.c                          |   2 +-
 kernel/events/uprobes.c                       |   6 +-
 kernel/futex.c                                |  10 +-
 mm/frame_vector.c                             |   4 +-
 mm/gup.c                                      | 115 ++++++++----------
 mm/gup_benchmark.c                            |   2 +-
 mm/madvise.c                                  |   2 +-
 mm/memory.c                                   |   2 +-
 mm/process_vm_access.c                        |  18 +--
 net/ceph/pagevec.c                            |   8 +-
 net/rds/info.c                                |   5 +-
 net/rds/message.c                             |   2 +-
 net/rds/rdma.c                                |  15 ++-
 virt/kvm/kvm_main.c                           |   4 +-
 46 files changed, 164 insertions(+), 295 deletions(-)

-- 
2.22.0


^ permalink raw reply

* [PATCH net 5/5] net: dsa: sja1105: Fix memory leak on meta state machine error path
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem; +Cc: netdev, Vladimir Oltean
In-Reply-To: <20190804223848.31676-1-olteanv@gmail.com>

When RX timestamping is enabled and two link-local (non-meta) frames are
received in a row, this constitutes an error.

The tagger is always caching the last link-local frame, in an attempt to
merge it with the meta follow-up frame when that arrives. To recover
from the above error condition, the initial cached link-local frame is
dropped and the second frame in a row is cached (in expectance of the
second meta frame).

However, when dropping the initial link-local frame, its backing memory
was being leaked.

Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
 net/dsa/tag_sja1105.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 8fa8dda8a15b..47ee88163a9d 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -165,6 +165,7 @@ static struct sk_buff
 					    "Expected meta frame, is %12llx "
 					    "in the DSA master multicast filter?\n",
 					    SJA1105_META_DMAC);
+			kfree_skb(sp->data->stampable_skb);
 		}
 
 		/* Hold a reference to avoid dsa_switch_rcv
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 1/5] net: dsa: sja1105: Fix broken learning with vlan_filtering disabled
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem
  Cc: netdev, Vladimir Oltean, Georg Waibel
In-Reply-To: <20190804223848.31676-1-olteanv@gmail.com>

When put under a bridge with vlan_filtering 0, the SJA1105 ports will
flood all traffic as if learning was broken. This is because learning
interferes with the rx_vid's configured by dsa_8021q as unique pvid's.

So learning technically still *does* work, it's just that the learnt
entries never get matched due to their unique VLAN ID.

The setting that saves the day is Shared VLAN Learning, which on this
switch family works exactly as desired: VLAN tagging still works
(untagged traffic gets the correct pvid) and FDB entries are still
populated with the correct contents including VID. Also, a frame cannot
violate the forwarding domain restrictions enforced by its classified
VLAN. It is just that the VID is ignored when looking up the FDB for
taking a forwarding decision (selecting the egress port).

This patch activates SVL, and the result is that frames with a learnt
DMAC are no longer flooded in the scenario described above.

Now exactly *because* SVL works as desired, we have to revisit some
earlier patches:

- It is no longer necessary to manipulate the VID of the 'bridge fdb
  {add,del}' command when vlan_filtering is off. This is because now,
  SVL is enabled for that case, so the actual VID does not matter*.

- It is still desirable to hide dsa_8021q VID's in the FDB dump
  callback. But right now the dump callback should no longer hide
  duplicates (one per each front panel port's pvid, plus one for the
  VLAN that the CPU port is going to tag a TX frame with), because there
  shouldn't be any (the switch will match a single FDB entry no matter
  its VID anyway).

* Not really... It's no longer necessary to transform a 'bridge fdb add'
  into 5 fdb add operations, but the user might still add a fdb entry with
  any vid, and all of them would appear as duplicates in 'bridge fdb
  show'. So force a 'bridge fdb add' to insert the VID of 0**, so that we
  can prune the duplicates at insertion time.

** The VID of 0 is better than 1 because it is always guaranteed to be
   in the ports' hardware filter. DSA also avoids putting the VID inside
   the netlink response message towards the bridge driver when we return
   this particular VID, which makes it suitable for FDB entries learnt
   with vlan_filtering off.

Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Georg Waibel <georg.waibel@sensor-technik.de>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 121 +++++++++++--------------
 1 file changed, 55 insertions(+), 66 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 32bf3a7cc3b6..dc6ab834f0cc 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -218,7 +218,7 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 		/* This selects between Independent VLAN Learning (IVL) and
 		 * Shared VLAN Learning (SVL)
 		 */
-		.shared_learn = false,
+		.shared_learn = true,
 		/* Don't discard management traffic based on ENFPORT -
 		 * we don't perform SMAC port enforcement anyway, so
 		 * what we are setting here doesn't matter.
@@ -1089,8 +1089,13 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	l2_lookup.mask_vlanid = VLAN_VID_MASK;
-	l2_lookup.mask_iotag = BIT(0);
+	if (dsa_port_is_vlan_filtering(&ds->ports[port])) {
+		l2_lookup.mask_vlanid = VLAN_VID_MASK;
+		l2_lookup.mask_iotag = BIT(0);
+	} else {
+		l2_lookup.mask_vlanid = 0;
+		l2_lookup.mask_iotag = 0;
+	}
 	l2_lookup.destports = BIT(port);
 
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
@@ -1147,8 +1152,13 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	l2_lookup.mask_vlanid = VLAN_VID_MASK;
-	l2_lookup.mask_iotag = BIT(0);
+	if (dsa_port_is_vlan_filtering(&ds->ports[port])) {
+		l2_lookup.mask_vlanid = VLAN_VID_MASK;
+		l2_lookup.mask_iotag = BIT(0);
+	} else {
+		l2_lookup.mask_vlanid = 0;
+		l2_lookup.mask_iotag = 0;
+	}
 	l2_lookup.destports = BIT(port);
 
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
@@ -1178,60 +1188,31 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
 			   const unsigned char *addr, u16 vid)
 {
 	struct sja1105_private *priv = ds->priv;
-	u16 rx_vid, tx_vid;
-	int rc, i;
 
-	if (dsa_port_is_vlan_filtering(&ds->ports[port]))
-		return priv->info->fdb_add_cmd(ds, port, addr, vid);
-
-	/* Since we make use of VLANs even when the bridge core doesn't tell us
-	 * to, translate these FDB entries into the correct dsa_8021q ones.
-	 * The basic idea (also repeats for removal below) is:
-	 * - Each of the other front-panel ports needs to be able to forward a
-	 *   pvid-tagged (aka tagged with their rx_vid) frame that matches this
-	 *   DMAC.
-	 * - The CPU port (aka the tx_vid of this port) needs to be able to
-	 *   send a frame matching this DMAC to the specified port.
-	 * For a better picture see net/dsa/tag_8021q.c.
+	/* dsa_8021q is in effect when the bridge's vlan_filtering isn't,
+	 * so the switch still does some VLAN processing internally.
+	 * But Shared VLAN Learning (SVL) is also active, and it will take
+	 * care of autonomous forwarding between the unique pvid's of each
+	 * port.  Here we just make sure that users can't add duplicate FDB
+	 * entries when in this mode - the actual VID doesn't matter except
+	 * for what gets printed in 'bridge fdb show'.  In the case of zero,
+	 * no VID gets printed at all.
 	 */
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
-		if (i == port)
-			continue;
-		if (i == dsa_upstream_port(priv->ds, port))
-			continue;
+	if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+		vid = 0;
 
-		rx_vid = dsa_8021q_rx_vid(ds, i);
-		rc = priv->info->fdb_add_cmd(ds, port, addr, rx_vid);
-		if (rc < 0)
-			return rc;
-	}
-	tx_vid = dsa_8021q_tx_vid(ds, port);
-	return priv->info->fdb_add_cmd(ds, port, addr, tx_vid);
+	return priv->info->fdb_add_cmd(ds, port, addr, vid);
 }
 
 static int sja1105_fdb_del(struct dsa_switch *ds, int port,
 			   const unsigned char *addr, u16 vid)
 {
 	struct sja1105_private *priv = ds->priv;
-	u16 rx_vid, tx_vid;
-	int rc, i;
 
-	if (dsa_port_is_vlan_filtering(&ds->ports[port]))
-		return priv->info->fdb_del_cmd(ds, port, addr, vid);
+	if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+		vid = 0;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
-		if (i == port)
-			continue;
-		if (i == dsa_upstream_port(priv->ds, port))
-			continue;
-
-		rx_vid = dsa_8021q_rx_vid(ds, i);
-		rc = priv->info->fdb_del_cmd(ds, port, addr, rx_vid);
-		if (rc < 0)
-			return rc;
-	}
-	tx_vid = dsa_8021q_tx_vid(ds, port);
-	return priv->info->fdb_del_cmd(ds, port, addr, tx_vid);
+	return priv->info->fdb_del_cmd(ds, port, addr, vid);
 }
 
 static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
@@ -1285,24 +1266,9 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
 			l2_lookup.lockeds = (match >= 0);
 		}
 
-		/* We need to hide the dsa_8021q VLANs from the user. This
-		 * basically means hiding the duplicates and only showing
-		 * the pvid that is supposed to be active in standalone and
-		 * non-vlan_filtering modes (aka 1).
-		 * - For statically added FDB entries (bridge fdb add), we
-		 *   can convert the TX VID (coming from the CPU port) into the
-		 *   pvid and ignore the RX VIDs of the other ports.
-		 * - For dynamically learned FDB entries, a single entry with
-		 *   no duplicates is learned - that which has the real port's
-		 *   pvid, aka RX VID.
-		 */
-		if (!dsa_port_is_vlan_filtering(&ds->ports[port])) {
-			if (l2_lookup.vlanid == tx_vid ||
-			    l2_lookup.vlanid == rx_vid)
-				l2_lookup.vlanid = 1;
-			else
-				continue;
-		}
+		/* We need to hide the dsa_8021q VLANs from the user. */
+		if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
+			l2_lookup.vlanid = 0;
 		cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
 	}
 	return 0;
@@ -1594,6 +1560,7 @@ static int sja1105_vlan_prepare(struct dsa_switch *ds, int port,
  */
 static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 {
+	struct sja1105_l2_lookup_params_entry *l2_lookup_params;
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_table *table;
@@ -1622,6 +1589,28 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	general_params->incl_srcpt1 = enabled;
 	general_params->incl_srcpt0 = enabled;
 
+	/* VLAN filtering => independent VLAN learning.
+	 * No VLAN filtering => shared VLAN learning.
+	 *
+	 * In shared VLAN learning mode, untagged traffic still gets
+	 * pvid-tagged, and the FDB table gets populated with entries
+	 * containing the "real" (pvid or from VLAN tag) VLAN ID.
+	 * However the switch performs a masked L2 lookup in the FDB,
+	 * effectively only looking up a frame's DMAC (and not VID) for the
+	 * forwarding decision.
+	 *
+	 * This is extremely convenient for us, because in modes with
+	 * vlan_filtering=0, dsa_8021q actually installs unique pvid's into
+	 * each front panel port. This is good for identification but breaks
+	 * learning badly - the VID of the learnt FDB entry is unique, aka
+	 * no frames coming from any other port are going to have it. So
+	 * for forwarding purposes, this is as though learning was broken
+	 * (all frames get flooded).
+	 */
+	table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
+	l2_lookup_params = table->entries;
+	l2_lookup_params->shared_learn = !enabled;
+
 	rc = sja1105_static_config_reload(priv);
 	if (rc)
 		dev_err(ds->dev, "Failed to change VLAN Ethertype\n");
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 3/5] net: dsa: sja1105: Really fix panic on unregistering PTP clock
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem; +Cc: netdev, Vladimir Oltean
In-Reply-To: <20190804223848.31676-1-olteanv@gmail.com>

The IS_ERR_OR_NULL(priv->clock) check inside
sja1105_ptp_clock_unregister() is preventing cancel_delayed_work_sync
from actually being run.

Additionally, sja1105_ptp_clock_unregister() does not actually get run,
when placed in sja1105_remove(). The DSA switch gets torn down, but the
sja1105 module does not get unregistered. So sja1105_ptp_clock_unregister
needs to be moved to sja1105_teardown, to be symmetrical with
sja1105_ptp_clock_register which is called from the DSA sja1105_setup.

It is strange to fix a "fixes" patch, but the probe failure can only be
seen when the attached PHY does not respond to MDIO (issue which I can't
pinpoint the reason to) and it goes away after I power-cycle the board.
This time the patch was validated on a failing board, and the kernel
panic from the fixed commit's message can no longer be seen.

Fixes: 29dd908d355f ("net: dsa: sja1105: Cancel PTP delayed work on unregister")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 4 ++--
 drivers/net/dsa/sja1105/sja1105_ptp.c  | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index fd036bf0a819..13c22f51d476 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1725,6 +1725,8 @@ static void sja1105_teardown(struct dsa_switch *ds)
 
 	cancel_work_sync(&priv->tagger_data.rxtstamp_work);
 	skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue);
+	sja1105_ptp_clock_unregister(priv);
+	sja1105_static_config_free(&priv->static_config);
 }
 
 static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
@@ -2182,9 +2184,7 @@ static int sja1105_remove(struct spi_device *spi)
 {
 	struct sja1105_private *priv = spi_get_drvdata(spi);
 
-	sja1105_ptp_clock_unregister(priv);
 	dsa_unregister_switch(priv->ds);
-	sja1105_static_config_free(&priv->static_config);
 	return 0;
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index d19cfdf681af..d8e8dd59f3d1 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -369,16 +369,15 @@ int sja1105_ptp_clock_register(struct sja1105_private *priv)
 		.mult = SJA1105_CC_MULT,
 	};
 	mutex_init(&priv->ptp_lock);
-	INIT_DELAYED_WORK(&priv->refresh_work, sja1105_ptp_overflow_check);
-
-	schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
-
 	priv->ptp_caps = sja1105_ptp_caps;
 
 	priv->clock = ptp_clock_register(&priv->ptp_caps, ds->dev);
 	if (IS_ERR_OR_NULL(priv->clock))
 		return PTR_ERR(priv->clock);
 
+	INIT_DELAYED_WORK(&priv->refresh_work, sja1105_ptp_overflow_check);
+	schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
+
 	return sja1105_ptp_reset(priv);
 }
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 4/5] net: dsa: sja1105: Fix memory leak on meta state machine normal path
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem; +Cc: netdev, Vladimir Oltean
In-Reply-To: <20190804223848.31676-1-olteanv@gmail.com>

After a meta frame is received, it is associated with the cached
sp->data->stampable_skb from the DSA tagger private structure.

Cached means its refcount is incremented with skb_get() in order for
dsa_switch_rcv() to not free it when the tagger .rcv returns NULL.

The mistake is that skb_unref() is not the correct function to use. It
will correctly decrement the refcount (which will go back to zero) but
the skb memory will not be freed.  That is the job of kfree_skb(), which
also calls skb_unref().

But it turns out that freeing the cached stampable_skb is in fact not
necessary.  It is still a perfectly valid skb, and now it is even
annotated with the partial RX timestamp.  So remove the skb_copy()
altogether and simply pass the stampable_skb with a refcount of 1
(incremented by us, decremented by dsa_switch_rcv) up the stack.

Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
 net/dsa/tag_sja1105.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 26363d72d25b..8fa8dda8a15b 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -211,17 +211,8 @@ static struct sk_buff
 		 * for further processing up the network stack.
 		 */
 		kfree_skb(skb);
-
-		skb = skb_copy(stampable_skb, GFP_ATOMIC);
-		if (!skb) {
-			dev_err_ratelimited(dp->ds->dev,
-					    "Failed to copy stampable skb\n");
-			spin_unlock(&sp->data->meta_lock);
-			return NULL;
-		}
+		skb = stampable_skb;
 		sja1105_transfer_meta(skb, meta);
-		/* The cached copy will be freed now */
-		skb_unref(stampable_skb);
 
 		spin_unlock(&sp->data->meta_lock);
 	}
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 2/5] net: dsa: sja1105: Use the LOCKEDS bit for SJA1105 E/T as well
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem; +Cc: netdev, Vladimir Oltean
In-Reply-To: <20190804223848.31676-1-olteanv@gmail.com>

It looks like the FDB dump taken from first-generation switches also
contains information on whether entries are static or not. So use that
instead of searching through the driver's tables.

Fixes: d763778224ea ("net: dsa: sja1105: Implement is_static for FDB entries on E/T")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 14 +++++++++++++-
 drivers/net/dsa/sja1105/sja1105_main.c           | 15 ---------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 6bfb1696a6f2..9988c9d18567 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -277,6 +277,18 @@ sja1105et_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 			SJA1105ET_SIZE_L2_LOOKUP_ENTRY, op);
 }
 
+static size_t sja1105et_dyn_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+						    enum packing_op op)
+{
+	struct sja1105_l2_lookup_entry *entry = entry_ptr;
+	u8 *cmd = buf + SJA1105ET_SIZE_L2_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(cmd, &entry->lockeds, 28, 28, size, op);
+
+	return sja1105et_l2_lookup_entry_packing(buf, entry_ptr, op);
+}
+
 static void
 sja1105et_mgmt_route_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				 enum packing_op op)
@@ -477,7 +489,7 @@ sja1105et_general_params_entry_packing(void *buf, void *entry_ptr,
 /* SJA1105E/T: First generation */
 struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_L2_LOOKUP] = {
-		.entry_packing = sja1105et_l2_lookup_entry_packing,
+		.entry_packing = sja1105et_dyn_l2_lookup_entry_packing,
 		.cmd_packing = sja1105et_l2_lookup_cmd_packing,
 		.access = (OP_READ | OP_WRITE | OP_DEL),
 		.max_entry_count = SJA1105_MAX_L2_LOOKUP_COUNT,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index dc6ab834f0cc..fd036bf0a819 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1251,21 +1251,6 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
 			continue;
 		u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
-		/* On SJA1105 E/T, the switch doesn't implement the LOCKEDS
-		 * bit, so it doesn't tell us whether a FDB entry is static
-		 * or not.
-		 * But, of course, we can find out - we're the ones who added
-		 * it in the first place.
-		 */
-		if (priv->info->device_id == SJA1105E_DEVICE_ID ||
-		    priv->info->device_id == SJA1105T_DEVICE_ID) {
-			int match;
-
-			match = sja1105_find_static_fdb_entry(priv, port,
-							      &l2_lookup);
-			l2_lookup.lockeds = (match >= 0);
-		}
-
 		/* We need to hide the dsa_8021q VLANs from the user. */
 		if (!dsa_port_is_vlan_filtering(&ds->ports[port]))
 			l2_lookup.vlanid = 0;
-- 
2.17.1


^ permalink raw reply related

* [PATCH net 0/5] Fixes for SJA1105 DSA: FDBs, Learning and PTP
From: Vladimir Oltean @ 2019-08-04 22:38 UTC (permalink / raw)
  To: f.fainelli, vivien.didelot, andrew, davem; +Cc: netdev, Vladimir Oltean

This is an assortment of functional fixes for the sja1105 switch driver
targeted for the "net" tree (although they apply on net-next just as
well).

Patch 1/5 ("net: dsa: sja1105: Fix broken learning with vlan_filtering
disabled") repairs a breakage introduced in the early development stages
of the driver: support for traffic from the CPU has broken "normal"
frame forwarding (based on DMAC) - there is connectivity through the
switch only because all frames are flooded.
I debated whether this patch qualifies as a fix, since it puts the
switch into a mode it has never operated in before (aka SVL). But
"normal" forwarding did use to work before the "Traffic support for
SJA1105 DSA driver" patchset, and arguably this patch should have been
part of that.
Also, it would be strange for this feature to be broken in the 5.2 LTS.

Patch 2/5 ("net: dsa: sja1105: Use the LOCKEDS bit for SJA1105 E/T as
well") is a simplification of a previous FDB-related patch that is
currently in the 5.3 rc's.

Patches 3/5 - 5/5 fix various crashes found while running linuxptp over the
switch ports for extended periods of time, or in conjunction with other
error conditions. The fixed-up commits were all introduced in 5.2.

Vladimir Oltean (5):
  net: dsa: sja1105: Fix broken learning with vlan_filtering disabled
  net: dsa: sja1105: Use the LOCKEDS bit for SJA1105 E/T as well
  net: dsa: sja1105: Really fix panic on unregistering PTP clock
  net: dsa: sja1105: Fix memory leak on meta state machine normal path
  net: dsa: sja1105: Fix memory leak on meta state machine error path

 .../net/dsa/sja1105/sja1105_dynamic_config.c  |  14 +-
 drivers/net/dsa/sja1105/sja1105_main.c        | 140 +++++++-----------
 drivers/net/dsa/sja1105/sja1105_ptp.c         |   7 +-
 net/dsa/tag_sja1105.c                         |  12 +-
 4 files changed, 75 insertions(+), 98 deletions(-)

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Andy Lutomirski @ 2019-08-04 22:16 UTC (permalink / raw)
  To: Song Liu
  Cc: Andy Lutomirski, Kees Cook, Networking, bpf, Alexei Starovoitov,
	Daniel Borkmann, Kernel Team, Lorenz Bauer, Jann Horn, Greg KH,
	Linux API, LSM List
In-Reply-To: <5A2FCD7E-7F54-41E5-BFAE-BB9494E74F2D@fb.com>

On Fri, Aug 2, 2019 at 12:22 AM Song Liu <songliubraving@fb.com> wrote:
>
> Hi Andy,
>
 >> I actually agree CAP_BPF_ADMIN makes sense. The hard part is to make
> >> existing tools (setcap, getcap, etc.) and libraries aware of the new CAP.
> >
> > It's been done before -- it's not that hard.  IMO the main tricky bit
> > would be try be somewhat careful about defining exactly what
> > CAP_BPF_ADMIN does.
>
> Agreed. I think defining CAP_BPF_ADMIN could be a good topic for the
> Plumbers conference.
>
> OTOH, I don't think we have to wait for CAP_BPF_ADMIN to allow daemons
> like systemd to do sys_bpf() without root.

I don't understand the use case here.  Are you talking about systemd
--user?  As far as I know, a user is expected to be able to fully
control their systemd --user process, so giving it unrestricted bpf
access is very close to giving it superuser access, and this doesn't
sound like a good idea.  I think that, if systemd --user needs bpf(),
it either needs real unprivileged bpf() or it needs a privileged
helper (SUID or a daemon) to intermediate this access.

>
> >
> >>> I don't see why you need to invent a whole new mechanism for this.
> >>> The entire cgroup ecosystem outside bpf() does just fine using the
> >>> write permission on files in cgroupfs to control access.  Why can't
> >>> bpf() do the same thing?
> >>
> >> It is easier to use write permission for BPF_PROG_ATTACH. But it is
> >> not easy to do the same for other bpf commands: BPF_PROG_LOAD and
> >> BPF_MAP_*. A lot of these commands don't have target concept. Maybe
> >> we should have target concept for all these commands. But that is a
> >> much bigger project. OTOH, "all or nothing" model allows all these
> >> commands at once.
> >
> > For BPF_PROG_LOAD, I admit I've never understood why permission is
> > required at all.  I think that CAP_SYS_ADMIN or similar should be
> > needed to get is_priv in the verifier, but I think that should mainly
> > be useful for tracing, and that requires lots of privilege anyway.
> > BPF_MAP_* is probably the trickiest part.  One solution would be some
> > kind of bpffs, but I'm sure other solutions are possible.
>
> Improving permission management of cgroup_bpf is another good topic to
> discuss. However, it is also an overkill for current use case.
>

I looked at the code some more, and I don't think this is so hard
after all.  As I understand it, all of the map..by_id stuff is, to
some extent, deprecated in favor of persistent maps.  As I see it, the
map..by_id calls should require privilege forever, although I can
imagine ways to scope that privilege to a namespace if the maps
themselves were to be scoped to a namespace.

Instead, unprivileged tools would use the persistent map interface
roughly like this:

$ bpftool map create /sys/fs/bpf/my_dir/filename type hash key 8 value
8 entries 64 name mapname

This would require that the caller have either CAP_DAC_OVERRIDE or
that the caller have permission to create files in /sys/fs/bpf/my_dir
(using the same rules as for any filesystem), and the resulting map
would end up owned by the creating user and have mode 0600 (or maybe
0666, or maybe a new bpf_attr parameter) modified by umask.  Then all
the various capable() checks that are currently involved in accessing
a persistent map would instead check FMODE_READ or FMODE_WRITE on the
map file as appropriate.

Half of this stuff already works.  I just set my system up like this:

$ ls -l /sys/fs/bpf
total 0
drwxr-xr-x. 3 luto luto 0 Aug  4 15:10 luto

$ mkdir /sys/fs/bpf/luto/test

$ ls -l /sys/fs/bpf/luto
total 0
drwxrwxr-x. 2 luto luto 0 Aug  4 15:10 test

I bet that making the bpf() syscalls work appropriately in this
context without privilege would only be a couple of hours of work.
The hard work, creating bpffs and making it function, is already done
:)

P.S. The docs for bpftool create are less than fantastic.  The
complete lack of any error message at all when the syscall returns
-EACCES is also not fantastic.

^ permalink raw reply

* RE: Slowness forming TIPC cluster with explicit node addresses
From: Jon Maloy @ 2019-08-04 21:53 UTC (permalink / raw)
  To: Chris Packham, tipc-discussion@lists.sourceforge.net
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1564722689.4914.27.camel@alliedtelesis.co.nz>



> -----Original Message-----
> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On
> Behalf Of Chris Packham
> Sent: 2-Aug-19 01:11
> To: Jon Maloy <jon.maloy@ericsson.com>; tipc-
> discussion@lists.sourceforge.net
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: Slowness forming TIPC cluster with explicit node addresses
> 
> On Mon, 2019-07-29 at 09:04 +1200, Chris Packham wrote:
> > On Fri, 2019-07-26 at 13:31 +0000, Jon Maloy wrote:
> > >
> > >
> > > >
> > > >
> > > > -----Original Message-----
> > > > From: netdev-owner@vger.kernel.org <netdev-
> owner@vger.kernel.org>
> > > > On Behalf Of Chris Packham
> > > > Sent: 25-Jul-19 19:37
> > > > To: tipc-discussion@lists.sourceforge.net
> > > > Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: Slowness forming TIPC cluster with explicit node
> > > > addresses
> > > >
> > > > Hi,
> > > >
> > > > I'm having problems forming a TIPC cluster between 2 nodes.
> > > >
> > > > This is the basic steps I'm going through on each node.
> > > >
> > > > modprobe tipc
> > > > ip link set eth2 up
> > > > tipc node set addr 1.1.5 # or 1.1.6 tipc bearer enable media eth
> > > > dev eth0
> > > eth2, I assume...
> > >
> > Yes sorry I keep switching between between Ethernet ports for testing
> > so I hand edited the email.
> >
> > >
> > > >
> > > >
> > > >
> > > > Then to confirm if the cluster is formed I use tipc link list
> > > >
> > > > [root@node-5 ~]# tipc link list
> > > > broadcast-link: up
> > > > ...
> > > >
> > > > Looking at tcpdump the two nodes are sending packets
> > > >
> > > > 22:30:05.782320 TIPC v2.0 1.1.5 > 0.0.0, headerlength 60 bytes,
> > > > MessageSize
> > > > 76 bytes, Neighbor Detection Protocol internal, messageType Link
> > > > request
> > > > 22:30:05.863555 TIPC v2.0 1.1.6 > 0.0.0, headerlength 60 bytes,
> > > > MessageSize
> > > > 76 bytes, Neighbor Detection Protocol internal, messageType Link
> > > > request
> > > >
> > > > Eventually (after a few minutes) the link does come up
> > > >
> > > > [root@node-6 ~]# tipc link list
> > > > broadcast-link: up
> > > > 1001006:eth2-1001005:eth2: up
> > > >
> > > > [root@node-5 ~]# tipc link list
> > > > broadcast-link: up
> > > > 1001005:eth2-1001006:eth2: up
> > > >
> > > > When I remove the "tipc node set addr" things seem to kick into
> > > > life straight away
> > > >
> > > > [root@node-5 ~]# tipc link list
> > > > broadcast-link: up
> > > > 0050b61bd2aa:eth2-0050b61e6dfa:eth2: up
> > > >
> > > > So there appears to be some difference in behaviour between having
> > > > an explicit node address and using the default. Unfortunately our
> > > > application relies on setting the node addresses.
> > > I do this many times a day, without any problems. If there would be
> > > any time difference, I would expect the 'auto configurable' version
> > > to be slower, because it involves a DAD step.
> > > Are you sure you don't have any other nodes running in your system?
> > >
> > > ///jon
> > >
> > Nope the two nodes are connected back to back. Does the number of
> > Ethernet interfaces make a difference? As you can see I've got 3 on
> > each node. One is completely disconnected, one is for booting over
> > TFTP
> >  (only used by U-boot) and the other is the USB Ethernet I'm using for
> > testing.
> >
> 
> So I can still reproduce this on nodes that only have one network interface and
> are the only things connected.
> 
> I did find one thing that helps
> 
> diff --git a/net/tipc/discover.c b/net/tipc/discover.c index
> c138d68e8a69..49921dad404a 100644
> --- a/net/tipc/discover.c
> +++ b/net/tipc/discover.c
> @@ -358,10 +358,10 @@ int tipc_disc_create(struct net *net, struct
> tipc_bearer *b,
>         tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
> 
>         /* Do we need an address trial period first ? */
> -       if (!tipc_own_addr(net)) {
> +//     if (!tipc_own_addr(net)) {
>                 tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
>                 msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
> -       }
> +//     }
>         memcpy(&d->dest, dest, sizeof(*dest));
>         d->net = net;
>         d->bearer_id = b->identity;
> 
> I think because with pre-configured addresses the duplicate address detection
> is skipped the shorter init phase is skipped. Would is make sense to
> unconditionally do the trial step? Or is there some better way to get things to
> transition with pre-assigned addresses.

I am on vacation until the end of next-week, so I can't give you any good analysis right now.
To do the trial step doesn’t make much sense to me, -it would only delay the setup unnecessarily (but with only 1 second).
Can you check the initial value of addr_trial_end when there a pre-configured address?

///jon


^ permalink raw reply

* [PATCH v6 0/3] mm/gup: add make_dirty arg to put_user_pages_dirty_lock()
From: john.hubbard @ 2019-08-04 21:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard

From: John Hubbard <jhubbard@nvidia.com>

Changes since v5:

* Patch #1: Fixed a bug that I introduced in v4:
  drivers/infiniband/sw/siw/siw_mem.c needs to refer to
  umem->page_chunk[i].plist, rather than umem->page_chunk[i].

Changes since v4:

* Christophe Hellwig's review applied: deleted siw_free_plist() and
  __qib_release_user_pages(), now that put_user_pages_dirty_lock() does
  what those routines were doing.

* Applied Bjorn's ACK for net/xdp, and Christophe's Reviewed-by for patch
  #1.

Changes since v3:

* Fixed an unused variable warning in siw_mem.c

Changes since v2:

* Critical bug fix: remove a stray "break;" from the new routine.

Changes since v1:

* Instead of providing __put_user_pages(), add an argument to
  put_user_pages_dirty_lock(), and delete put_user_pages_dirty().
  This is based on the following points:

    1. Lots of call sites become simpler if a bool is passed
    into put_user_page*(), instead of making the call site
    choose which put_user_page*() variant to call.

    2. Christoph Hellwig's observation that set_page_dirty_lock()
    is usually correct, and set_page_dirty() is usually a
    bug, or at least questionable, within a put_user_page*()
    calling chain.

* Added the Infiniband driver back to the patch series, because it is
  a caller of put_user_pages_dirty_lock().

Unchanged parts from the v1 cover letter (except for the diffstat):

Notes about the remaining patches to come:

There are about 50+ patches in my tree [2], and I'll be sending out the
remaining ones in a few more groups:

    * The block/bio related changes (Jerome mostly wrote those, but I've
      had to move stuff around extensively, and add a little code)

    * mm/ changes

    * other subsystem patches

    * an RFC that shows the current state of the tracking patch set. That
      can only be applied after all call sites are converted, but it's
      good to get an early look at it.

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

John Hubbard (3):
  mm/gup: add make_dirty arg to put_user_pages_dirty_lock()
  drivers/gpu/drm/via: convert put_page() to put_user_page*()
  net/xdp: convert put_page() to put_user_page*()

 drivers/gpu/drm/via/via_dmablit.c          |  10 +-
 drivers/infiniband/core/umem.c             |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c    |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |  13 +--
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   5 +-
 drivers/infiniband/sw/siw/siw_mem.c        |  19 +---
 include/linux/mm.h                         |   5 +-
 mm/gup.c                                   | 115 +++++++++------------
 net/xdp/xdp_umem.c                         |   9 +-
 9 files changed, 64 insertions(+), 122 deletions(-)

-- 
2.22.0


^ permalink raw reply

* [PATCH v6 1/3] mm/gup: add make_dirty arg to put_user_pages_dirty_lock()
From: john.hubbard @ 2019-08-04 21:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard,
	Ira Weiny
In-Reply-To: <20190804214042.4564-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

Provide a more capable variation of put_user_pages_dirty_lock(),
and delete put_user_pages_dirty(). This is based on the
following:

1. Lots of call sites become simpler if a bool is passed
into put_user_page*(), instead of making the call site
choose which put_user_page*() variant to call.

2. Christoph Hellwig's observation that set_page_dirty_lock()
is usually correct, and set_page_dirty() is usually a
bug, or at least questionable, within a put_user_page*()
calling chain.

This leads to the following API choices:

    * put_user_pages_dirty_lock(page, npages, make_dirty)

    * There is no put_user_pages_dirty(). You have to
      hand code that, in the rare case that it's
      required.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/infiniband/core/umem.c             |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c    |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |  13 +--
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   5 +-
 drivers/infiniband/sw/siw/siw_mem.c        |  19 +---
 include/linux/mm.h                         |   5 +-
 mm/gup.c                                   | 115 +++++++++------------
 7 files changed, 61 insertions(+), 106 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 08da840ed7ee..965cf9dea71a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 		page = sg_page_iter_page(&sg_iter);
-		if (umem->writable && dirty)
-			put_user_pages_dirty_lock(&page, 1);
-		else
-			put_user_page(page);
+		put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
 	}
 
 	sg_free_table(&umem->sg_head);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index b89a9b9aef7a..469acb961fbd 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty)
 {
-	if (dirty)
-		put_user_pages_dirty_lock(p, npages);
-	else
-		put_user_pages(p, npages);
+	put_user_pages_dirty_lock(p, npages, dirty);
 
 	if (mm) { /* during close after signal, mm can be NULL */
 		atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index bfbfbb7e0ff4..26c1fb8d45cc 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -37,15 +37,6 @@
 
 #include "qib.h"
 
-static void __qib_release_user_pages(struct page **p, size_t num_pages,
-				     int dirty)
-{
-	if (dirty)
-		put_user_pages_dirty_lock(p, num_pages);
-	else
-		put_user_pages(p, num_pages);
-}
-
 /**
  * qib_map_page - a safety wrapper around pci_map_page()
  *
@@ -124,7 +115,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
 	return 0;
 bail_release:
-	__qib_release_user_pages(p, got, 0);
+	put_user_pages_dirty_lock(p, got, false);
 bail:
 	atomic64_sub(num_pages, &current->mm->pinned_vm);
 	return ret;
@@ -132,7 +123,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
 void qib_release_user_pages(struct page **p, size_t num_pages)
 {
-	__qib_release_user_pages(p, num_pages, 1);
+	put_user_pages_dirty_lock(p, num_pages, true);
 
 	/* during close after signal, mm can be NULL */
 	if (current->mm)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..62e6ffa9ad78 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
 			page = sg_page(sg);
 			pa = sg_phys(sg);
-			if (dirty)
-				put_user_pages_dirty_lock(&page, 1);
-			else
-				put_user_page(page);
+			put_user_pages_dirty_lock(&page, 1, dirty);
 			usnic_dbg("pa: %pa\n", &pa);
 		}
 		kfree(chunk);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index 67171c82b0c4..1e197753bf2f 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -60,20 +60,6 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
 	return NULL;
 }
 
-static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
-			   bool dirty)
-{
-	struct page **p = chunk->plist;
-
-	while (num_pages--) {
-		if (!PageDirty(*p) && dirty)
-			put_user_pages_dirty_lock(p, 1);
-		else
-			put_user_page(*p);
-		p++;
-	}
-}
-
 void siw_umem_release(struct siw_umem *umem, bool dirty)
 {
 	struct mm_struct *mm_s = umem->owning_mm;
@@ -82,8 +68,9 @@ void siw_umem_release(struct siw_umem *umem, bool dirty)
 	for (i = 0; num_pages; i++) {
 		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
 
-		siw_free_plist(&umem->page_chunk[i], to_free,
-			       umem->writable && dirty);
+		put_user_pages_dirty_lock(umem->page_chunk[i].plist,
+					  to_free,
+					  umem->writable && dirty);
 		kfree(umem->page_chunk[i].plist);
 		num_pages -= to_free;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..9759b6a24420 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1057,8 +1057,9 @@ static inline void put_user_page(struct page *page)
 	put_page(page);
 }
 
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty);
+
 void put_user_pages(struct page **pages, unsigned long npages);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bac..7fefd7ab02c4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
 	unsigned int page_mask;
 };
 
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-				   unsigned long npages,
-				   set_dirty_func_t sdf)
-{
-	unsigned long index;
-
-	for (index = 0; index < npages; index++) {
-		struct page *page = compound_head(pages[index]);
-
-		/*
-		 * Checking PageDirty at this point may race with
-		 * clear_page_dirty_for_io(), but that's OK. Two key cases:
-		 *
-		 * 1) This code sees the page as already dirty, so it skips
-		 * the call to sdf(). That could happen because
-		 * clear_page_dirty_for_io() called page_mkclean(),
-		 * followed by set_page_dirty(). However, now the page is
-		 * going to get written back, which meets the original
-		 * intention of setting it dirty, so all is well:
-		 * clear_page_dirty_for_io() goes on to call
-		 * TestClearPageDirty(), and write the page back.
-		 *
-		 * 2) This code sees the page as clean, so it calls sdf().
-		 * The page stays dirty, despite being written back, so it
-		 * gets written back again in the next writeback cycle.
-		 * This is harmless.
-		 */
-		if (!PageDirty(page))
-			sdf(page);
-
-		put_user_page(page);
-	}
-}
-
 /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
  * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
  *
  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
  * variants called on that page.
  *
  * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
  *
  * Please see the put_user_page() documentation for details.
  *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
  *
  */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty)
 {
-	__put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+	unsigned long index;
 
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-	__put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+	/*
+	 * TODO: this can be optimized for huge pages: if a series of pages is
+	 * physically contiguous and part of the same compound page, then a
+	 * single operation to the head page should suffice.
+	 */
+
+	if (!make_dirty) {
+		put_user_pages(pages, npages);
+		return;
+	}
+
+	for (index = 0; index < npages; index++) {
+		struct page *page = compound_head(pages[index]);
+		/*
+		 * Checking PageDirty at this point may race with
+		 * clear_page_dirty_for_io(), but that's OK. Two key
+		 * cases:
+		 *
+		 * 1) This code sees the page as already dirty, so it
+		 * skips the call to set_page_dirty(). That could happen
+		 * because clear_page_dirty_for_io() called
+		 * page_mkclean(), followed by set_page_dirty().
+		 * However, now the page is going to get written back,
+		 * which meets the original intention of setting it
+		 * dirty, so all is well: clear_page_dirty_for_io() goes
+		 * on to call TestClearPageDirty(), and write the page
+		 * back.
+		 *
+		 * 2) This code sees the page as clean, so it calls
+		 * set_page_dirty(). The page stays dirty, despite being
+		 * written back, so it gets written back again in the
+		 * next writeback cycle. This is harmless.
+		 */
+		if (!PageDirty(page))
+			set_page_dirty_lock(page);
+		put_user_page(page);
+	}
 }
 EXPORT_SYMBOL(put_user_pages_dirty_lock);
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH v6 3/3] net/xdp: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 21:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard
In-Reply-To: <20190804214042.4564-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Acked-by: Björn Töpel <bjorn.topel@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 net/xdp/xdp_umem.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 83de74ca729a..17c4b3d3dc34 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -166,14 +166,7 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
 
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
-	unsigned int i;
-
-	for (i = 0; i < umem->npgs; i++) {
-		struct page *page = umem->pgs[i];
-
-		set_page_dirty_lock(page);
-		put_page(page);
-	}
+	put_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
 	kfree(umem->pgs);
 	umem->pgs = NULL;
-- 
2.22.0


^ permalink raw reply related

* [PATCH v6 2/3] drivers/gpu/drm/via: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-08-04 21:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard
In-Reply-To: <20190804214042.4564-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Also reverse the order of a comparison, in order to placate
checkpatch.pl.

Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/drm/via/via_dmablit.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index 062067438f1d..b5b5bf0ba65e 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -171,7 +171,6 @@ via_map_blit_for_device(struct pci_dev *pdev,
 static void
 via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 {
-	struct page *page;
 	int i;
 
 	switch (vsg->state) {
@@ -186,13 +185,8 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 		kfree(vsg->desc_pages);
 		/* fall through */
 	case dr_via_pages_locked:
-		for (i = 0; i < vsg->num_pages; ++i) {
-			if (NULL != (page = vsg->pages[i])) {
-				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
-					SetPageDirty(page);
-				put_page(page);
-			}
-		}
+		put_user_pages_dirty_lock(vsg->pages, vsg->num_pages,
+					  (vsg->direction == DMA_FROM_DEVICE));
 		/* fall through */
 	case dr_via_pages_alloc:
 		vfree(vsg->pages);
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH 1/1] bpf: introduce new helper udp_flow_src_port
From: Farid Zakaria @ 2019-08-04 20:43 UTC (permalink / raw)
  To: Y Song; +Cc: Alexei Starovoitov, Daniel Borkmann, netdev, bpf
In-Reply-To: <CACCo2jmcYAfY8zHJiT7NCb-Ct7Wguk9XHRc8QmZa7V3eJy0WTg@mail.gmail.com>

* re-sending as I've sent previously as HTML ... sorry *

First off, thank you for taking the time to review this patch.

It's not clear to me the backport you'd like for libbpf, I have been following
the documentation outlined in
https://www.kernel.org/doc/html/latest/bpf/index.html
I will hold off on changes to this patch as you've asked for it going forward.

This patch is inspired by a MPLSoUDP (https://tools.ietf.org/html/rfc7510)]
kernel module that was ported to eBPF -- our implementation is slightly
modified for our custom use case.

The Linux kernel provides a single abstraction for the src port for
UDP tunneling
via udp_flow_src_port. If it's improved eBPF filters would benefit if
the call is the same.

Exposing this function to eBPF programs would maintain feature parity
with other kernel
tunneling implementations.

Farid Zakaria

Farid Zakaria



On Sun, Aug 4, 2019 at 1:41 PM Farid Zakaria <farid.m.zakaria@gmail.com> wrote:
>
> First off, thank you for taking the time to review this patch.
>
> It's not clear to me the backport you'd like for libbpf, I have been following
> the documentation outlined in https://www.kernel.org/doc/html/latest/bpf/index.html
> I will hold off on changes to this patch as you've asked for it going forward.
>
> This patch is inspired by a MPLSoUDP (https://tools.ietf.org/html/rfc7510)]
> kernel module that was ported to eBPF -- our implementation is slightly
> modified for our custom use case.
>
> The Linux kernel provides a single abstraction for the src port for UDP tunneling
> via udp_flow_src_port. If it's improved eBPF filters would benefit if the call is the same.
>
> Exposing this function to eBPF programs would maintain feature parity with other kernel
> tunneling implementations.
>
> Cheers,
> Farid Zakaria
>
>
>
> On Sat, Aug 3, 2019 at 11:52 PM Y Song <ys114321@gmail.com> wrote:
>>
>> On Sat, Aug 3, 2019 at 8:29 PM Farid Zakaria <farid.m.zakaria@gmail.com> wrote:
>> >
>> > Foo over UDP uses UDP encapsulation to add additional entropy
>> > into the packets so that they get beter distribution across EMCP
>> > routes.
>> >
>> > Expose udp_flow_src_port as a bpf helper so that tunnel filters
>> > can benefit from the helper.
>> >
>> > Signed-off-by: Farid Zakaria <farid.m.zakaria@gmail.com>
>> > ---
>> >  include/uapi/linux/bpf.h                      | 21 +++++++--
>> >  net/core/filter.c                             | 20 ++++++++
>> >  tools/include/uapi/linux/bpf.h                | 21 +++++++--
>> >  tools/testing/selftests/bpf/bpf_helpers.h     |  2 +
>> >  .../bpf/prog_tests/udp_flow_src_port.c        | 28 +++++++++++
>> >  .../bpf/progs/test_udp_flow_src_port_kern.c   | 47 +++++++++++++++++++
>> >  6 files changed, 131 insertions(+), 8 deletions(-)
>> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/udp_flow_src_port.c
>> >  create mode 100644 tools/testing/selftests/bpf/progs/test_udp_flow_src_port_kern.c
>>
>> First, for each review, backport and sync with libbpf repo, in the future,
>> could you break the patch to two patches?
>>    1. kernel changes (net/core/filter.c, include/uapi/linux/bpf.h)
>>    2. tools/include/uapi/linux/bpf.h
>>    3. tools/testing/ changes
>>
>> Second, could you explain why existing __sk_buff->hash not enough?
>> there are corner cases where if __sk_buff->hash is 0 and the kernel did some
>> additional hashing, but maybe you can approximate in bpf program?
>> For case, min >= max, I suppose you can get min/max port values
>> from the user space for a particular net device and then calculate
>> the hash in the bpf program?
>> What I want to know if how much accuracy you will lose if you just
>> use __sk_buff->hash and do approximation in bpf program.
>>
>> >
>> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> > index 4393bd4b2419..90e814153dec 100644
>> > --- a/include/uapi/linux/bpf.h
>> > +++ b/include/uapi/linux/bpf.h
>> > @@ -2545,9 +2545,21 @@ union bpf_attr {
>> >   *             *th* points to the start of the TCP header, while *th_len*
>> >   *             contains **sizeof**\ (**struct tcphdr**).
>> >   *
>> > - *     Return
>> > - *             0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
>> > - *             error otherwise.
>> > + *  Return
>> > + *      0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
>> > + *      error otherwise.
>> > + *
>> > + * int bpf_udp_flow_src_port(struct sk_buff *skb, int min, int max, int use_eth)
>> > + *  Description
>> > + *      It's common to implement tunnelling inside a UDP protocol to provide
>> > + *      additional randomness to the packet. The destination port of the UDP
>> > + *      header indicates the inner packet type whereas the source port is used
>> > + *      for additional entropy.
>> > + *
>> > + *  Return
>> > + *      An obfuscated hash of the packet that falls within the
>> > + *      min & max port range.
>> > + *      If min >= max, the default port range is used
>> >   *
>> >   * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
>> >   *     Description
>> > @@ -2853,7 +2865,8 @@ union bpf_attr {
>> >         FN(sk_storage_get),             \
>> >         FN(sk_storage_delete),          \
>> >         FN(send_signal),                \
>> > -       FN(tcp_gen_syncookie),
>> > +       FN(tcp_gen_syncookie),  \
>> > +       FN(udp_flow_src_port),
>> >
>> >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>> >   * function eBPF program intends to call
>> > diff --git a/net/core/filter.c b/net/core/filter.c
>> > index 5a2707918629..fdf0ebb8c2c8 100644
>> > --- a/net/core/filter.c
>> > +++ b/net/core/filter.c
>> > @@ -2341,6 +2341,24 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = {
>> >         .arg4_type      = ARG_ANYTHING,
>> >  };
>> >
>> > +BPF_CALL_4(bpf_udp_flow_src_port, struct sk_buff *, skb, int, min,
>> > +          int, max, int, use_eth)
>> > +{
>> > +       struct net *net = dev_net(skb->dev);
>> > +
>> > +       return udp_flow_src_port(net, skb, min, max, use_eth);
>> > +}
>> > +
>> [...]

^ permalink raw reply

* Re: [PATCH net] net: dsa: mv88e6xxx: drop adjust_link to enabled phylink
From: Vladimir Oltean @ 2019-08-04 19:37 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, lkml, Andrew Lunn, Vivien Didelot, Florian Fainelli,
	David S. Miller, Ioana Ciornei
In-Reply-To: <20190731154239.19270-1-h.feurstein@gmail.com>

On Wed, 31 Jul 2019 at 18:43, Hubert Feurstein <h.feurstein@gmail.com> wrote:
>
> We have to drop the adjust_link callback in order to finally migrate to
> phylink.
>
> Otherwise we get the following warning during startup:
>   "mv88e6xxx 2188000.ethernet-1:10: Using legacy PHYLIB callbacks. Please
>    migrate to PHYLINK!"
>
> The warning is generated in the function dsa_port_link_register_of in
> dsa/port.c:
>
>   int dsa_port_link_register_of(struct dsa_port *dp)
>   {
>         struct dsa_switch *ds = dp->ds;
>
>         if (!ds->ops->adjust_link)
>                 return dsa_port_phylink_register(dp);
>
>         dev_warn(ds->dev,
>                  "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
>         [...]
>   }
>
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>
> ---

Reviewed-by: Vladimir Oltean <olteanv@gmail.com>

>  drivers/net/dsa/mv88e6xxx/chip.c | 26 --------------------------
>  1 file changed, 26 deletions(-)
>
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
> index 366f70bfe055..37e8babd035f 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -27,7 +27,6 @@
>  #include <linux/platform_data/mv88e6xxx.h>
>  #include <linux/netdevice.h>
>  #include <linux/gpio/consumer.h>
> -#include <linux/phy.h>
>  #include <linux/phylink.h>
>  #include <net/dsa.h>
>
> @@ -482,30 +481,6 @@ static int mv88e6xxx_phy_is_internal(struct dsa_switch *ds, int port)
>         return port < chip->info->num_internal_phys;
>  }
>
> -/* We expect the switch to perform auto negotiation if there is a real
> - * phy. However, in the case of a fixed link phy, we force the port
> - * settings from the fixed link settings.
> - */
> -static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
> -                                 struct phy_device *phydev)
> -{
> -       struct mv88e6xxx_chip *chip = ds->priv;
> -       int err;
> -
> -       if (!phy_is_pseudo_fixed_link(phydev) &&
> -           mv88e6xxx_phy_is_internal(ds, port))
> -               return;
> -
> -       mv88e6xxx_reg_lock(chip);
> -       err = mv88e6xxx_port_setup_mac(chip, port, phydev->link, phydev->speed,
> -                                      phydev->duplex, phydev->pause,
> -                                      phydev->interface);
> -       mv88e6xxx_reg_unlock(chip);
> -
> -       if (err && err != -EOPNOTSUPP)
> -               dev_err(ds->dev, "p%d: failed to configure MAC\n", port);
> -}
> -
>  static void mv88e6065_phylink_validate(struct mv88e6xxx_chip *chip, int port,
>                                        unsigned long *mask,
>                                        struct phylink_link_state *state)
> @@ -4755,7 +4730,6 @@ static int mv88e6xxx_port_egress_floods(struct dsa_switch *ds, int port,
>  static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
>         .get_tag_protocol       = mv88e6xxx_get_tag_protocol,
>         .setup                  = mv88e6xxx_setup,
> -       .adjust_link            = mv88e6xxx_adjust_link,
>         .phylink_validate       = mv88e6xxx_validate,
>         .phylink_mac_link_state = mv88e6xxx_link_state,
>         .phylink_mac_config     = mv88e6xxx_mac_config,
> --
> 2.22.0
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox