[PATCH 3/3] io_uring: place ring SQ/CQ arrays under memcg memory limits

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/3] io_uring: place ring SQ/CQ arrays under memcg memory limits
Date: Wed, 10 Feb 2021 08:16:04 -0700	[thread overview]
Message-ID: <20210210151604.498311-4-axboe@kernel.dk> (raw)
In-Reply-To: <20210210151604.498311-1-axboe@kernel.dk>

Instead of imposing rlimit memlock limits for the rings themselves,
ensure that we account them properly under memcg with __GFP_ACCOUNT.
We retain rlimit memlock for registered buffers, this is just for the
ring arrays themselves.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 85 ++++++---------------------------------------------
 1 file changed, 10 insertions(+), 75 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bffed6aa5722..7a1e4ecf5f94 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1017,11 +1017,6 @@ static const struct io_op_def io_op_defs[] = {
 	},
 };
 
-enum io_mem_account {
-	ACCT_LOCKED,
-	ACCT_PINNED,
-};
-
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 struct files_struct *files);
@@ -8355,25 +8350,16 @@ static inline int __io_account_mem(struct user_struct *user,
 	return 0;
 }
 
-static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
-			     enum io_mem_account acct)
+static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
 	if (ctx->limit_mem)
 		__io_unaccount_mem(ctx->user, nr_pages);
 
-	if (ctx->mm_account) {
-		if (acct == ACCT_LOCKED) {
-			mmap_write_lock(ctx->mm_account);
-			ctx->mm_account->locked_vm -= nr_pages;
-			mmap_write_unlock(ctx->mm_account);
-		}else if (acct == ACCT_PINNED) {
-			atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
-		}
-	}
+	if (ctx->mm_account)
+		atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
 }
 
-static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
-			  enum io_mem_account acct)
+static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
 	int ret;
 
@@ -8383,15 +8369,8 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
 			return ret;
 	}
 
-	if (ctx->mm_account) {
-		if (acct == ACCT_LOCKED) {
-			mmap_write_lock(ctx->mm_account);
-			ctx->mm_account->locked_vm += nr_pages;
-			mmap_write_unlock(ctx->mm_account);
-		} else if (acct == ACCT_PINNED) {
-			atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
-		}
-	}
+	if (ctx->mm_account)
+		atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
 
 	return 0;
 }
@@ -8411,7 +8390,7 @@ static void io_mem_free(void *ptr)
 static void *io_mem_alloc(size_t size)
 {
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
-				__GFP_NORETRY;
+				__GFP_NORETRY | __GFP_ACCOUNT;
 
 	return (void *) __get_free_pages(gfp_flags, get_order(size));
 }
@@ -8445,18 +8424,6 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
 	return off;
 }
 
-static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
-{
-	size_t pages;
-
-	pages = (size_t)1 << get_order(
-		rings_size(sq_entries, cq_entries, NULL));
-	pages += (size_t)1 << get_order(
-		array_size(sizeof(struct io_uring_sqe), sq_entries));
-
-	return pages;
-}
-
 static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
 {
 	int i, j;
@@ -8471,7 +8438,7 @@ static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
 			unpin_user_page(imu->bvec[j].bv_page);
 
 		if (imu->acct_pages)
-			io_unaccount_mem(ctx, imu->acct_pages, ACCT_PINNED);
+			io_unaccount_mem(ctx, imu->acct_pages);
 		kvfree(imu->bvec);
 		imu->nr_bvecs = 0;
 	}
@@ -8569,7 +8536,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 	if (!imu->acct_pages)
 		return 0;
 
-	ret = io_account_mem(ctx, imu->acct_pages, ACCT_PINNED);
+	ret = io_account_mem(ctx, imu->acct_pages);
 	if (ret)
 		imu->acct_pages = 0;
 	return ret;
@@ -8949,14 +8916,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	/* if we failed setting up the ctx, we might not have any rings */
 	io_iopoll_try_reap_events(ctx);
 
-	/*
-	 * Do this upfront, so we won't have a grace period where the ring
-	 * is closed but resources aren't reaped yet. This can cause
-	 * spurious failure in setting up a new ring.
-	 */
-	io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries),
-			 ACCT_LOCKED);
-
 	INIT_WORK(&ctx->exit_work, io_ring_exit_work);
 	/*
 	 * Use system_unbound_wq to avoid spawning tons of event kworkers
@@ -9780,7 +9739,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	struct user_struct *user = NULL;
 	struct io_ring_ctx *ctx;
 	struct file *file;
-	bool limit_mem;
 	int ret;
 
 	if (!entries)
@@ -9821,26 +9779,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	}
 
 	user = get_uid(current_user());
-	limit_mem = !capable(CAP_IPC_LOCK);
-
-	if (limit_mem) {
-		ret = __io_account_mem(user,
-				ring_pages(p->sq_entries, p->cq_entries));
-		if (ret) {
-			free_uid(user);
-			return ret;
-		}
-	}
 
 	ctx = io_ring_ctx_alloc(p);
 	if (!ctx) {
-		if (limit_mem)
-			__io_unaccount_mem(user, ring_pages(p->sq_entries,
-								p->cq_entries));
 		free_uid(user);
 		return -ENOMEM;
 	}
 	ctx->compat = in_compat_syscall();
+	ctx->limit_mem = !capable(CAP_IPC_LOCK);
 	ctx->user = user;
 	ctx->creds = get_current_cred();
 #ifdef CONFIG_AUDIT
@@ -9876,17 +9822,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 		goto err;
 	}
 #endif
-
-	/*
-	 * Account memory _before_ installing the file descriptor. Once
-	 * the descriptor is installed, it can get closed at any time. Also
-	 * do this before hitting the general error path, as ring freeing
-	 * will un-account as well.
-	 */
-	io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
-		       ACCT_LOCKED);
-	ctx->limit_mem = limit_mem;
-
 	ret = io_allocate_scq_urings(ctx, p);
 	if (ret)
 		goto err;
-- 
2.30.0

     prev parent reply	other threads:[~2021-02-10 15:17 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-10 15:16 [PATCHES 0/3] Mem accounting and IRQ req cache Jens Axboe
2021-02-10 15:16 ` [PATCH 1/3] io_uring: enable req cache for IRQ driven IO Jens Axboe
2021-02-10 15:16 ` [PATCH 2/3] io_uring: enable kmemcg account for io_uring requests Jens Axboe
2021-02-10 15:16 ` Jens Axboe [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:bffed6aa572 dfblob:7a1e4ecf5f9 )
 OR (
bs:"[PATCH 3/3] io_uring: place ring SQ/CQ arrays under memcg memory limits" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210210151604.498311-4-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.