All of lore.kernel.org
 help / color / mirror / Atom feed
From: <cl@rock-chips.com>
To: heiko@sntech.de
Cc: juri.lelli@redhat.com, mark.rutland@arm.com,
	geert+renesas@glider.be, peterz@infradead.org,
	catalin.marinas@arm.com, bsegall@google.com, will@kernel.org,
	mpe@ellerman.id.au, linux@armlinux.org.uk,
	dietmar.eggemann@arm.com, ben.dooks@codethink.co.uk,
	mgorman@suse.de, Liang Chen <cl@rock-chips.com>,
	huangtao@rock-chips.com, keescook@chromium.org,
	anshuman.khandual@arm.com, rostedt@goodmis.org,
	tglx@linutronix.de, surenb@google.com, mingo@redhat.com,
	allison@lohutok.net, linux-arm-kernel@lists.infradead.org,
	wad@chromium.org, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, luto@amacapital.net,
	george_davis@mentor.com, sudeep.holla@arm.com,
	akpm@linux-foundation.org, info@metux.net,
	kstewart@linuxfoundation.org
Subject: [PATCH v1 1/1] sched/fair: do not preempt current task if it is going to call schedule()
Date: Thu,  5 Mar 2020 16:16:11 +0800	[thread overview]
Message-ID: <20200305081611.29323-2-cl@rock-chips.com> (raw)
In-Reply-To: <20200305081611.29323-1-cl@rock-chips.com>

From: Liang Chen <cl@rock-chips.com>

when we create a kthread with ktrhead_create_on_cpu(),the child thread
entry is ktread.c:ktrhead() which will be preempted by the parent after
call complete(done) while schedule() is not called yet,then the parent
will call wait_task_inactive(child) but the child is still on the runqueue,
so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
time,especially on startup.

  parent                             child
ktrhead_create_on_cpu()
  wait_fo_completion(&done) -----> ktread.c:ktrhead()
                             |----- complete(done);--wakeup and preempted by parent
 kthread_bind() <------------|  |-> schedule();--dequeue here
  wait_task_inactive(child)     |
   schedule_hrtimeout(1 jiffy) -|

So we hope the child just wakeup parent but not preempted by parent, and the
child is going to call schedule() soon,then the parent will not call
schedule_hrtimeout(1 jiffy) as the child is already dequeue.

The same issue for ktrhead_park()&&kthread_parkme().
This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Signed-off-by: Liang Chen <cl@rock-chips.com>
---
 arch/arm/include/asm/thread_info.h   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 include/linux/sched.h                | 15 +++++++++++++++
 kernel/kthread.c                     |  4 ++++
 kernel/sched/fair.c                  |  4 ++++
 5 files changed, 25 insertions(+)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 0d0d5178e2c3..51802991ba1f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..332786f11dc3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -78,6 +78,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
 #define TIF_SSBD		25	/* Wants SSB mitigation */
 #define TIF_TAGGED_ADDR		26	/* Allow tagged user addresses */
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04278493bf15..cb9058d2cf0b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1768,6 +1768,21 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline void set_tsk_going_to_sched(struct task_struct *tsk)
+{
+	set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline void clear_tsk_going_to_sched(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline int test_tsk_going_to_sched(struct task_struct *tsk)
+{
+	return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
+}
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..8a4e4c9cdc22 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
 
+		set_tsk_going_to_sched(current);
 		complete(&self->parked);
 		schedule();
+		clear_tsk_going_to_sched(current);
 	}
 	__set_current_state(TASK_RUNNING);
 }
@@ -245,8 +247,10 @@ static int kthread(void *_create)
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
+	set_tsk_going_to_sched(current);
 	complete(done);
 	schedule();
+	clear_tsk_going_to_sched(current);
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..28a308743bf8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4330,6 +4330,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
+	if (test_tsk_going_to_sched(rq_of(cfs_rq)->curr))
+		return;
 
 	if (cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
@@ -6633,6 +6635,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	 */
 	if (test_tsk_need_resched(curr))
 		return;
+	if (test_tsk_going_to_sched(curr))
+		return;
 
 	/* Idle tasks are by definition preempted by non-idle tasks. */
 	if (unlikely(task_has_idle_policy(curr)) &&
-- 
2.17.1




_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

WARNING: multiple messages have this Message-ID (diff)
From: <cl@rock-chips.com>
To: heiko@sntech.de
Cc: mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	akpm@linux-foundation.org, tglx@linutronix.de,
	mpe@ellerman.id.au, surenb@google.com, ben.dooks@codethink.co.uk,
	anshuman.khandual@arm.com, catalin.marinas@arm.com,
	will@kernel.org, keescook@chromium.org, luto@amacapital.net,
	wad@chromium.org, mark.rutland@arm.com, geert+renesas@glider.be,
	george_davis@mentor.com, sudeep.holla@arm.com,
	linux@armlinux.org.uk, gregkh@linuxfoundation.org,
	info@metux.net, kstewart@linuxfoundation.org,
	allison@lohutok.net, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, huangtao@rock-chips.com,
	Liang Chen <cl@rock-chips.com>
Subject: [PATCH v1 1/1] sched/fair: do not preempt current task if it is going to call schedule()
Date: Thu,  5 Mar 2020 16:16:11 +0800	[thread overview]
Message-ID: <20200305081611.29323-2-cl@rock-chips.com> (raw)
In-Reply-To: <20200305081611.29323-1-cl@rock-chips.com>

From: Liang Chen <cl@rock-chips.com>

when we create a kthread with ktrhead_create_on_cpu(),the child thread
entry is ktread.c:ktrhead() which will be preempted by the parent after
call complete(done) while schedule() is not called yet,then the parent
will call wait_task_inactive(child) but the child is still on the runqueue,
so the parent will schedule_hrtimeout() for 1 jiffy,it will waste a lot of
time,especially on startup.

  parent                             child
ktrhead_create_on_cpu()
  wait_fo_completion(&done) -----> ktread.c:ktrhead()
                             |----- complete(done);--wakeup and preempted by parent
 kthread_bind() <------------|  |-> schedule();--dequeue here
  wait_task_inactive(child)     |
   schedule_hrtimeout(1 jiffy) -|

So we hope the child just wakeup parent but not preempted by parent, and the
child is going to call schedule() soon,then the parent will not call
schedule_hrtimeout(1 jiffy) as the child is already dequeue.

The same issue for ktrhead_park()&&kthread_parkme().
This patch can save 120ms on rk312x startup with CONFIG_HZ=300.

Signed-off-by: Liang Chen <cl@rock-chips.com>
---
 arch/arm/include/asm/thread_info.h   |  1 +
 arch/arm64/include/asm/thread_info.h |  1 +
 include/linux/sched.h                | 15 +++++++++++++++
 kernel/kthread.c                     |  4 ++++
 kernel/sched/fair.c                  |  4 ++++
 5 files changed, 25 insertions(+)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 0d0d5178e2c3..51802991ba1f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..332786f11dc3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -78,6 +78,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
 #define TIF_SSBD		25	/* Wants SSB mitigation */
 #define TIF_TAGGED_ADDR		26	/* Allow tagged user addresses */
+#define TIF_GOING_TO_SCHED	27	/* task is going to call schedule() */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04278493bf15..cb9058d2cf0b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1768,6 +1768,21 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline void set_tsk_going_to_sched(struct task_struct *tsk)
+{
+	set_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline void clear_tsk_going_to_sched(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED);
+}
+
+static inline int test_tsk_going_to_sched(struct task_struct *tsk)
+{
+	return unlikely(test_tsk_thread_flag(tsk, TIF_GOING_TO_SCHED));
+}
+
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b262f47046ca..8a4e4c9cdc22 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -199,8 +199,10 @@ static void __kthread_parkme(struct kthread *self)
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
 
+		set_tsk_going_to_sched(current);
 		complete(&self->parked);
 		schedule();
+		clear_tsk_going_to_sched(current);
 	}
 	__set_current_state(TASK_RUNNING);
 }
@@ -245,8 +247,10 @@ static int kthread(void *_create)
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
+	set_tsk_going_to_sched(current);
 	complete(done);
 	schedule();
+	clear_tsk_going_to_sched(current);
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..28a308743bf8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4330,6 +4330,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
+	if (test_tsk_going_to_sched(rq_of(cfs_rq)->curr))
+		return;
 
 	if (cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
@@ -6633,6 +6635,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	 */
 	if (test_tsk_need_resched(curr))
 		return;
+	if (test_tsk_going_to_sched(curr))
+		return;
 
 	/* Idle tasks are by definition preempted by non-idle tasks. */
 	if (unlikely(task_has_idle_policy(curr)) &&
-- 
2.17.1




  reply	other threads:[~2020-03-05  8:17 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-05  8:16 [PATCH v1 0/1] wait_task_inactive() spend too much time on system startup cl
2020-03-05  8:16 ` cl
2020-03-05  8:16 ` cl [this message]
2020-03-05  8:16   ` [PATCH v1 1/1] sched/fair: do not preempt current task if it is going to call schedule() cl
2020-03-05  8:43   ` Kees Cook
2020-03-05  8:43     ` Kees Cook
2020-03-05  9:58   ` Peter Zijlstra
2020-03-05  9:58     ` Peter Zijlstra
2020-03-05 17:30     ` Kees Cook
2020-03-05 17:30       ` Kees Cook
2020-03-05 13:22   ` kbuild test robot
2020-03-05 13:22     ` kbuild test robot
2020-03-05 13:22     ` kbuild test robot
2020-03-05 13:33   ` kbuild test robot
2020-03-05 13:33     ` kbuild test robot
2020-03-05 13:33     ` kbuild test robot
2020-03-05 13:33   ` kbuild test robot
2020-03-05 13:33     ` kbuild test robot
2020-03-05 13:33     ` kbuild test robot
2020-03-05 13:59   ` kbuild test robot
2020-03-05 13:59     ` kbuild test robot
2020-03-05 13:59     ` kbuild test robot
2020-03-05 14:38   ` kbuild test robot
2020-03-05 14:38     ` kbuild test robot
2020-03-05 14:38     ` kbuild test robot
2020-03-05 15:14     ` [kbuild-all] " Li, Philip
2020-03-05 15:14       ` Li, Philip
2020-03-05 15:14       ` Li, Philip

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200305081611.29323-2-cl@rock-chips.com \
    --to=cl@rock-chips.com \
    --cc=akpm@linux-foundation.org \
    --cc=allison@lohutok.net \
    --cc=anshuman.khandual@arm.com \
    --cc=ben.dooks@codethink.co.uk \
    --cc=bsegall@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=geert+renesas@glider.be \
    --cc=george_davis@mentor.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=heiko@sntech.de \
    --cc=huangtao@rock-chips.com \
    --cc=info@metux.net \
    --cc=juri.lelli@redhat.com \
    --cc=keescook@chromium.org \
    --cc=kstewart@linuxfoundation.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=luto@amacapital.net \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sudeep.holla@arm.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=wad@chromium.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.