All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-5.9 0/3] rw iovec copy cleanup
@ 2020-07-13 19:59 Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Small cleanups related to preparing ->iter and ->iovec
of ->io to async. The last patch is needed for the "completion data"
series.

Pavel Begunkov (3):
  io_uring: simplify io_req_map_rw()
  io_uring: add a helper for async rw iovec prep
  io_uring: follow **iovec idiom in io_import_iovec

 fs/io_uring.c | 78 ++++++++++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 41 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3] io_uring: simplify io_req_map_rw()
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Don't deref req->io->rw every time, but put it in a local variable. This
looks prettier, generates less instructions and doesn't break alias
analysis.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6eae2fb469f9..d9c10070dcba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2827,15 +2827,17 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
 			  struct iovec *iovec, struct iovec *fast_iov,
 			  struct iov_iter *iter)
 {
-	req->io->rw.nr_segs = iter->nr_segs;
-	req->io->rw.size = io_size;
-	req->io->rw.iov = iovec;
-	if (!req->io->rw.iov) {
-		req->io->rw.iov = req->io->rw.fast_iov;
-		if (req->io->rw.iov != fast_iov)
-			memcpy(req->io->rw.iov, fast_iov,
+	struct io_async_rw *rw = &req->io->rw;
+
+	rw->nr_segs = iter->nr_segs;
+	rw->size = io_size;
+	if (!iovec) {
+		rw->iov = rw->fast_iov;
+		if (rw->iov != fast_iov)
+			memcpy(rw->iov, fast_iov,
 			       sizeof(struct iovec) * iter->nr_segs);
 	} else {
+		rw->iov = iovec;
 		req->flags |= REQ_F_NEED_CLEANUP;
 	}
 }
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] io_uring: add a helper for async rw iovec prep
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
  2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Preparing reads/writes for async is a bit tricky. Extract a helper to
not repeat it twice.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d9c10070dcba..217dbb6563e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2871,11 +2871,27 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 	return 0;
 }
 
+static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
+				   bool force_nonblock)
+{
+	struct io_async_ctx *io = req->io;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	io->rw.iov = io->rw.fast_iov;
+	req->io = NULL;
+	ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
+	req->io = io;
+	if (unlikely(ret < 0))
+		return ret;
+
+	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+	return 0;
+}
+
 static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			bool force_nonblock)
 {
-	struct io_async_ctx *io;
-	struct iov_iter iter;
 	ssize_t ret;
 
 	ret = io_prep_rw(req, sqe, force_nonblock);
@@ -2888,17 +2904,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	/* either don't need iovec imported or already have it */
 	if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
 		return 0;
-
-	io = req->io;
-	io->rw.iov = io->rw.fast_iov;
-	req->io = NULL;
-	ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
-	req->io = io;
-	if (ret < 0)
-		return ret;
-
-	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-	return 0;
+	return io_rw_prep_async(req, READ, force_nonblock);
 }
 
 static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
@@ -3042,8 +3048,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			 bool force_nonblock)
 {
-	struct io_async_ctx *io;
-	struct iov_iter iter;
 	ssize_t ret;
 
 	ret = io_prep_rw(req, sqe, force_nonblock);
@@ -3058,17 +3062,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	/* either don't need iovec imported or already have it */
 	if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
 		return 0;
-
-	io = req->io;
-	io->rw.iov = io->rw.fast_iov;
-	req->io = NULL;
-	ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
-	req->io = io;
-	if (ret < 0)
-		return ret;
-
-	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-	return 0;
+	return io_rw_prep_async(req, WRITE, force_nonblock);
 }
 
 static int io_write(struct io_kiocb *req, bool force_nonblock,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 21:09   ` Jens Axboe
  2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
  3 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As for import_iovec(), return !=NULL iovec from io_import_iovec() only
when it should be freed, that includes returning NULL when iovec is
already in req->io, because it shoulb be deallocated by other means,
e.g. inside op handler. After io_setup_async_rw() local iovec to ->io,
just mark it NULL, to follow the idea in io_{read,write} as well.

That's easier to follow, and especially useful if we want to reuse
per-op space for completion data.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 217dbb6563e7..0b9c0333d8c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2739,10 +2739,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 	if (req->io) {
 		struct io_async_rw *iorw = &req->io->rw;
 
-		*iovec = iorw->iov;
-		iov_iter_init(iter, rw, *iovec, iorw->nr_segs, iorw->size);
-		if (iorw->iov == iorw->fast_iov)
-			*iovec = NULL;
+		iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
+		*iovec = NULL;
 		return iorw->size;
 	}
 
@@ -3025,6 +3023,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 						inline_vecs, &iter);
 			if (ret)
 				goto out_free;
+			/* it's copied and will be cleaned with ->io */
+			iovec = NULL;
 			/* if we can retry, do so with the callbacks armed */
 			if (io_rw_should_retry(req)) {
 				ret2 = io_iter_do_read(req, &iter);
@@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 		}
 	}
 out_free:
-	if (!(req->flags & REQ_F_NEED_CLEANUP))
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
@@ -3142,12 +3141,13 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 						inline_vecs, &iter);
 			if (ret)
 				goto out_free;
+			/* it's copied and will be cleaned with ->io */
+			iovec = NULL;
 			return -EAGAIN;
 		}
 	}
 out_free:
-	if (!(req->flags & REQ_F_NEED_CLEANUP))
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09   ` Jens Axboe
  2020-07-13 21:12     ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>  		}
>  	}
>  out_free:
> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
> -		kfree(iovec);
> +	kfree(iovec);
>  	return ret;
>  }

Faster to do:

if (iovec)
	kfree(iovec)

to avoid a stupid call. Kind of crazy, but I just verified with this one
as well that it's worth about 1.3% CPU in my stress test.

Apart from that, looks good, I just folded in that change.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-5.9 0/3] rw iovec copy cleanup
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
                   ` (2 preceding siblings ...)
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09 ` Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> Small cleanups related to preparing ->iter and ->iovec
> of ->io to async. The last patch is needed for the "completion data"
> series.

Looks good to me, applied.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:09   ` Jens Axboe
@ 2020-07-13 21:12     ` Pavel Begunkov
  2020-07-13 21:16       ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:12 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 14/07/2020 00:09, Jens Axboe wrote:
> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>  		}
>>  	}
>>  out_free:
>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>> -		kfree(iovec);
>> +	kfree(iovec);
>>  	return ret;
>>  }
> 
> Faster to do:
> 
> if (iovec)
> 	kfree(iovec)
> 
> to avoid a stupid call. Kind of crazy, but I just verified with this one
> as well that it's worth about 1.3% CPU in my stress test.

That looks crazy indeed

> Apart from that, looks good, I just folded in that change.

Great, thanks

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:12     ` Pavel Begunkov
@ 2020-07-13 21:16       ` Jens Axboe
  2020-07-13 21:18         ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:16 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 3:12 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:09, Jens Axboe wrote:
>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>  		}
>>>  	}
>>>  out_free:
>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>> -		kfree(iovec);
>>> +	kfree(iovec);
>>>  	return ret;
>>>  }
>>
>> Faster to do:
>>
>> if (iovec)
>> 	kfree(iovec)
>>
>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>> as well that it's worth about 1.3% CPU in my stress test.
> 
> That looks crazy indeed

I suspect what needs to happen is that kfree should be something ala:

static inline void kfree(void *ptr)
{
	if (ptr)
		__kfree(ptr);
}

to avoid silly games like this. Needs to touch all three slab
allocators, though definitely in the trivial category.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:16       ` Jens Axboe
@ 2020-07-13 21:18         ` Pavel Begunkov
  2020-07-13 21:26           ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:18 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 14/07/2020 00:16, Jens Axboe wrote:
> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>> On 14/07/2020 00:09, Jens Axboe wrote:
>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>>  		}
>>>>  	}
>>>>  out_free:
>>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>> -		kfree(iovec);
>>>> +	kfree(iovec);
>>>>  	return ret;
>>>>  }
>>>
>>> Faster to do:
>>>
>>> if (iovec)
>>> 	kfree(iovec)
>>>
>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>> as well that it's worth about 1.3% CPU in my stress test.
>>
>> That looks crazy indeed
> 
> I suspect what needs to happen is that kfree should be something ala:
> 
> static inline void kfree(void *ptr)
> {
> 	if (ptr)
> 		__kfree(ptr);
> }
> 
> to avoid silly games like this. Needs to touch all three slab
> allocators, though definitely in the trivial category.

Just thought the same, but not sure it's too common to have kfree(NULL).

The drop is probably because of extra call + cold jumps with unlikely().

void kfree() {
	trace_kfree(_RET_IP_, objp);

	if (unlikely(ZERO_OR_NULL_PTR(objp)))
		return;
}

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:18         ` Pavel Begunkov
@ 2020-07-13 21:26           ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:26 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 3:18 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:16, Jens Axboe wrote:
>> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>>> On 14/07/2020 00:09, Jens Axboe wrote:
>>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>>>  		}
>>>>>  	}
>>>>>  out_free:
>>>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>>> -		kfree(iovec);
>>>>> +	kfree(iovec);
>>>>>  	return ret;
>>>>>  }
>>>>
>>>> Faster to do:
>>>>
>>>> if (iovec)
>>>> 	kfree(iovec)
>>>>
>>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>>> as well that it's worth about 1.3% CPU in my stress test.
>>>
>>> That looks crazy indeed
>>
>> I suspect what needs to happen is that kfree should be something ala:
>>
>> static inline void kfree(void *ptr)
>> {
>> 	if (ptr)
>> 		__kfree(ptr);
>> }
>>
>> to avoid silly games like this. Needs to touch all three slab
>> allocators, though definitely in the trivial category.
> 
> Just thought the same, but not sure it's too common to have kfree(NULL).

Right, except the io_read/io_write path it'll be 100% common unless you
have more than the inline number of segments.

I see the same thing for eg the slab should_failslab() call, which isn't
inlined even if the kconfig isn't enabled. And for should_fail_bio()
as well. Those two add up to another ~1% or so of pointless overhead.

> The drop is probably because of extra call + cold jumps with unlikely().
> 
> void kfree() {
> 	trace_kfree(_RET_IP_, objp);
> 
> 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
> 		return;
> }

Must be, since the kfree() one adds up to more than the two above ones
that are just the call itself.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-07-13 21:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
2020-07-13 21:09   ` Jens Axboe
2020-07-13 21:12     ` Pavel Begunkov
2020-07-13 21:16       ` Jens Axboe
2020-07-13 21:18         ` Pavel Begunkov
2020-07-13 21:26           ` Jens Axboe
2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.