netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Willem de Bruijn <willemb@google.com>
To: netdev@vger.kernel.org
Cc: mst@redhat.com, jasowang@redhat.com,
	Willem de Bruijn <willemb@google.com>
Subject: [PATCH net-next RFC 09/10] sock: sendmsg zerocopy ulimit
Date: Thu, 20 Aug 2015 10:36:48 -0400	[thread overview]
Message-ID: <1440081408-12302-10-git-send-email-willemb@google.com> (raw)
In-Reply-To: <1440081408-12302-1-git-send-email-willemb@google.com>

From: Willem de Bruijn <willemb@google.com>

Bound the number of pages that a userspace process may pin.

Account pinned pages to the locked page count (`ulimit -l`) of the
caller and fail beyond the administrator controlled threshold, similar
to infiniband.

Use an atomic variable to avoid having to take mmap_sem. Taking the
lock is expensive and requires scheduling a worker on destruction,
as taking the lock may sleep, but ubuf_info are often destroyed in
atomic context.

The current mm_struct.pinned_vm_ is a hack. A non-RFC patchset would
convert unsigned long pinned_vm_ and all its callers (infiniband) to
atomic_long_t.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/mm_types.h |  1 +
 include/linux/skbuff.h   |  5 +++++
 net/core/skbuff.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0038ac7..dc6e12a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -402,6 +402,7 @@ struct mm_struct {
 	unsigned long total_vm;		/* Total pages mapped */
 	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
 	unsigned long pinned_vm;	/* Refcount permanently increased */
+	atomic_t pinned_vm_;
 	unsigned long shared_vm;	/* Shared pages (files) */
 	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
 	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c1ea855..95a9f75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@ struct ubuf_info {
 		};
 	};
 	atomic_t refcnt;
+
+	struct mmpin {
+		struct mm_struct *mm;
+		int num_pg;
+	} mmp;
 };
 
 #define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4ae60ee..3742968 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -840,6 +840,42 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+	unsigned long max_pg, num_pg, new_pg, old_pg;
+	struct mm_struct *mm;
+
+	if (capable(CAP_IPC_LOCK) || !size)
+		return 0;
+
+	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
+	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	mm = mmp->mm ? : current->mm;
+
+	do {
+		old_pg = atomic_read(&mm->pinned_vm_);
+		new_pg = old_pg + num_pg;
+		if (new_pg > max_pg)
+			return -ENOMEM;
+	} while (atomic_cmpxchg(&mm->pinned_vm_, old_pg, new_pg) != old_pg);
+
+	if (!mmp->mm) {
+		mmp->mm = mm;
+		atomic_inc(&mm->mm_count);
+	}
+
+	mmp->num_pg += num_pg;
+	return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+	if (mmp->mm) {
+		atomic_sub(mmp->num_pg, &mmp->mm->pinned_vm_);
+		mmdrop(mmp->mm);
+	}
+}
+
 /* must only be called from process context */
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {
@@ -852,6 +888,12 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 
 	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
 	uarg = (void *)skb->cb;
+	uarg->mmp.mm = NULL;
+
+	if (mm_account_pinned_pages(&uarg->mmp, size)) {
+		kfree_skb(skb);
+		return NULL;
+	}
 
 	uarg->callback = sock_zerocopy_callback;
 	uarg->id = ((u16)atomic_inc_return(&sk->sk_zckey)) - 1;
@@ -880,6 +922,8 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
 
 		next = atomic_read(&sk->sk_zckey);
 		if ((u16)(uarg->id + uarg->len) == next) {
+			if (mm_account_pinned_pages(&uarg->mmp, size))
+				return NULL;
 			uarg->len++;
 			atomic_set(&sk->sk_zckey, ++next);
 			return uarg;
@@ -946,6 +990,8 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
 void sock_zerocopy_put(struct ubuf_info *uarg)
 {
 	if (uarg && atomic_dec_and_test(&uarg->refcnt)) {
+		mm_unaccount_pinned_pages(&uarg->mmp);
+
 		/* if !len, there was only 1 call, and it was aborted */
 		if (uarg->callback && uarg->len)
 			uarg->callback(uarg, true);
-- 
2.5.0.276.gf5e568e

  parent reply	other threads:[~2015-08-20 14:36 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-20 14:36 [PATCH net-next RFC 00/10] socket sendmsg MSG_ZEROCOPY Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 01/10] sock: skb_copy_ubufs support for compound pages Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 02/10] sock: add sendmsg zerocopy Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 03/10] sock: enable " Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 04/10] sock: sendmsg zerocopy notification coalescing Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 05/10] tcp: enable sendmsg zerocopy Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 06/10] udp: " Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 07/10] raw: enable sendmsg zerocopy with hdrincl Willem de Bruijn
2015-08-20 14:36 ` [PATCH net-next RFC 08/10] packet: enable sendmsg zerocopy Willem de Bruijn
2015-08-20 14:36 ` Willem de Bruijn [this message]
2015-08-20 22:56 ` [PATCH net-next RFC 00/10] socket sendmsg MSG_ZEROCOPY David Miller
2015-08-21  2:49   ` Willem de Bruijn
2015-08-21  5:17     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440081408-12302-10-git-send-email-willemb@google.com \
    --to=willemb@google.com \
    --cc=jasowang@redhat.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).