All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Ming Lei <ming.lei@redhat.com>,
	Hannes Reinecke <hare@suse.de>,
	Bart Van Assche <bvanassche@acm.org>,
	John Garry <john.garry@huawei.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [PATCH 5/5] blk-mq: check and shrink freed request pool page
Date: Fri, 21 Aug 2020 02:03:35 +0800	[thread overview]
Message-ID: <20200820180335.3109216-6-ming.lei@redhat.com> (raw)
In-Reply-To: <20200820180335.3109216-1-ming.lei@redhat.com>

request pool pages may take a bit more space, and each request queue may
hold one unused request pool at most, so memory waste can be big when
there are lots of request queues.

Schedule a delayed work to check if tags->rqs[] still may refer to
page in freed request pool page. If no any request in tags->rqs[] refers
to the freed request pool page, release the page now. Otherwise,
schedule the delayed work after 10 seconds for check & release the
pages.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         | 55 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 56 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c644f5cb1549..2865920086ea 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2365,11 +2365,63 @@ static void blk_mq_release_rqs_page(struct page *page)
 	__free_pages(page, blk_mq_rqs_page_order(page));
 }
 
+#define SHRINK_RQS_PAGE_DELAY   (10 * HZ)
+
 static void blk_mq_free_rqs_page(struct blk_mq_tag_set *set, struct page *page)
 {
 	spin_lock(&set->free_page_list_lock);
 	list_add_tail(&page->lru, &set->free_page_list);
 	spin_unlock(&set->free_page_list_lock);
+
+	schedule_delayed_work(&set->rqs_page_shrink, SHRINK_RQS_PAGE_DELAY);
+}
+
+static bool blk_mq_can_shrink_rqs_page(struct blk_mq_tag_set *set,
+               struct page *pg)
+{
+	unsigned hctx_idx = blk_mq_rqs_page_hctx_idx(pg);
+	struct blk_mq_tags *tags = set->tags[hctx_idx];
+	unsigned long start = (unsigned long)page_address(pg);
+	unsigned long end = start + order_to_size(blk_mq_rqs_page_order(pg));
+	int i;
+
+	for (i = 0; i < set->queue_depth; i++) {
+		unsigned long rq_addr = (unsigned long)tags->rqs[i];
+		if (rq_addr >= start && rq_addr < end)
+			return false;
+	}
+	return true;
+}
+
+static void blk_mq_rqs_page_shrink_work(struct work_struct *work)
+{
+	struct blk_mq_tag_set *set =
+		container_of(work, struct blk_mq_tag_set, rqs_page_shrink.work);
+	LIST_HEAD(pg_list);
+	struct page *page, *tmp;
+	bool resched;
+
+	spin_lock(&set->free_page_list_lock);
+	list_splice_init(&set->free_page_list, &pg_list);
+	spin_unlock(&set->free_page_list_lock);
+
+	mutex_lock(&set->tag_list_lock);
+	list_for_each_entry_safe(page, tmp, &pg_list, lru) {
+		if (blk_mq_can_shrink_rqs_page(set, page)) {
+			list_del_init(&page->lru);
+			blk_mq_release_rqs_page(page);
+		}
+	}
+	mutex_unlock(&set->tag_list_lock);
+
+	spin_lock(&set->free_page_list_lock);
+	list_splice_init(&pg_list, &set->free_page_list);
+	resched = !list_empty(&set->free_page_list);
+	spin_unlock(&set->free_page_list_lock);
+
+	if (resched)
+		schedule_delayed_work(&set->rqs_page_shrink,
+				SHRINK_RQS_PAGE_DELAY);
 }
 
 static void blk_mq_release_all_rqs_page(struct blk_mq_tag_set *set)
@@ -2377,6 +2429,8 @@ static void blk_mq_release_all_rqs_page(struct blk_mq_tag_set *set)
 	struct page *page;
 	LIST_HEAD(pg_list);
 
+	cancel_delayed_work_sync(&set->rqs_page_shrink);
+
 	spin_lock(&set->free_page_list_lock);
 	list_splice_init(&set->free_page_list, &pg_list);
 	spin_unlock(&set->free_page_list_lock);
@@ -3527,6 +3581,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 
 	spin_lock_init(&set->free_page_list_lock);
 	INIT_LIST_HEAD(&set->free_page_list);
+	INIT_DELAYED_WORK(&set->rqs_page_shrink, blk_mq_rqs_page_shrink_work);
 
 	ret = blk_mq_alloc_map_and_requests(set);
 	if (ret)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4c2b135dbbe1..b2adf99dbbef 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -250,6 +250,7 @@ struct blk_mq_tag_set {
 
 	spinlock_t		free_page_list_lock;
 	struct list_head	free_page_list;
+	struct delayed_work     rqs_page_shrink;
 };
 
 /**
-- 
2.25.2


  parent reply	other threads:[~2020-08-20 18:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-20 18:03 [PATCH 0/5] blk-mq: fix use-after-free on stale request Ming Lei
2020-08-20 18:03 ` [PATCH 1/5] blk-mq: define max_order for allocating rqs pages as macro Ming Lei
2020-08-20 18:03 ` [PATCH 2/5] blk-mq: add helper of blk_mq_get_hw_queue_node Ming Lei
2020-08-25  8:55   ` John Garry
2020-08-20 18:03 ` [PATCH 3/5] blk-mq: add helpers for allocating/freeing pages of request pool Ming Lei
2020-08-20 18:03 ` [PATCH 4/5] blk-mq: cache freed request pool pages Ming Lei
2020-08-20 18:03 ` Ming Lei [this message]
2020-08-20 20:30 ` [PATCH 0/5] blk-mq: fix use-after-free on stale request Bart Van Assche
2020-08-21  2:49   ` Ming Lei
2020-08-26 12:03     ` John Garry
2020-08-26 12:24       ` Ming Lei
2020-08-26 12:34         ` Ming Lei
2020-08-26 12:56           ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200820180335.3109216-6-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=john.garry@huawei.com \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.