* [Patch] export sched_setscheduler() for kernel module use
@ 2004-11-15 18:35 Dean Nelson
2004-11-15 18:58 ` Chris Wright
0 siblings, 1 reply; 16+ messages in thread
From: Dean Nelson @ 2004-11-15 18:35 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
This patch exports sched_setscheduler() so that it can be used by a kernel
module to set a kthread's scheduling policy and associated parameters.
Signed-off-by: Dean Nelson <dcn@sgi.com>
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h 2004-11-12 09:40:26.000000000 -0600
+++ linux-2.6/include/linux/sched.h 2004-11-15 06:49:02.000000000 -0600
@@ -727,6 +727,7 @@
extern int task_nice(const task_t *p);
extern int task_curr(const task_t *p);
extern int idle_cpu(int cpu);
+extern int sched_setscheduler(pid_t, int, struct sched_param *);
void yield(void);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c 2004-11-12 09:40:26.000000000 -0600
+++ linux-2.6/kernel/sched.c 2004-11-15 06:48:58.000000000 -0600
@@ -2938,7 +2938,7 @@
*/
rq = task_rq_lock(p, &flags);
/*
- * The RT priorities are set via setscheduler(), but we still
+ * The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
* not SCHED_NORMAL:
@@ -3072,26 +3072,22 @@
p->prio = p->static_prio;
}
-/*
- * setscheduler - change the scheduling policy and/or RT priority of a thread.
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of
+ * a thread.
+ * @pid: the pid in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
*/
-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+int sched_setscheduler(pid_t pid, int policy, struct sched_param *param)
{
- struct sched_param lp;
- int retval = -EINVAL;
+ int retval;
int oldprio, oldpolicy = -1;
prio_array_t *array;
unsigned long flags;
runqueue_t *rq;
task_t *p;
- if (!param || pid < 0)
- goto out_nounlock;
-
- retval = -EFAULT;
- if (copy_from_user(&lp, param, sizeof(struct sched_param)))
- goto out_nounlock;
-
/*
* We play safe to avoid deadlocks.
*/
@@ -3117,9 +3113,10 @@
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
retval = -EINVAL;
- if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
+ if (param->sched_priority < 0 ||
+ param->sched_priority > MAX_USER_RT_PRIO-1)
goto out_unlock;
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
+ if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
goto out_unlock;
retval = -EPERM;
@@ -3130,7 +3127,7 @@
!capable(CAP_SYS_NICE))
goto out_unlock;
- retval = security_task_setscheduler(p, policy, &lp);
+ retval = security_task_setscheduler(p, policy, param);
if (retval)
goto out_unlock;
/*
@@ -3149,7 +3146,7 @@
deactivate_task(p, task_rq(p));
retval = 0;
oldprio = p->prio;
- __setscheduler(p, policy, lp.sched_priority);
+ __setscheduler(p, policy, param->sched_priority);
if (array) {
__activate_task(p, task_rq(p));
/*
@@ -3166,20 +3163,33 @@
task_rq_unlock(rq, &flags);
out_unlock:
read_unlock_irq(&tasklist_lock);
-out_nounlock:
return retval;
}
+EXPORT_SYMBOL_GPL(sched_setscheduler);
+
+int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+{
+ struct sched_param lparam;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+
+ if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
+ return -EFAULT;
+
+ return sched_setscheduler(pid, policy, &lparam);
+}
/**
* sys_sched_setscheduler - set/change the scheduler policy and RT priority
* @pid: the pid in question.
- * @policy: new policy
+ * @policy: new policy.
* @param: structure containing the new RT priority.
*/
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param __user *param)
{
- return setscheduler(pid, policy, param);
+ return do_sched_setscheduler(pid, policy, param);
}
/**
@@ -3189,7 +3199,7 @@
*/
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
{
- return setscheduler(pid, -1, param);
+ return do_sched_setscheduler(pid, -1, param);
}
/**
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 18:35 [Patch] " Dean Nelson
@ 2004-11-15 18:58 ` Chris Wright
2004-11-15 20:33 ` Dean Nelson
0 siblings, 1 reply; 16+ messages in thread
From: Chris Wright @ 2004-11-15 18:58 UTC (permalink / raw)
To: Dean Nelson; +Cc: akpm, linux-kernel
* Dean Nelson (dcn@sgi.com) wrote:
> +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
this should be static.
thanks,
-chris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 18:58 ` Chris Wright
@ 2004-11-15 20:33 ` Dean Nelson
2004-11-15 20:41 ` Jan Engelhardt
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: Dean Nelson @ 2004-11-15 20:33 UTC (permalink / raw)
To: Chris Wright; +Cc: akpm, linux-kernel
On Mon, Nov 15, 2004 at 10:58:01AM -0800, Chris Wright wrote:
> * Dean Nelson (dcn@sgi.com) wrote:
> > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
>
> this should be static.
You're right. I made another change in that one now passes the task_struct
pointer to sched_setscheduler() instead of the pid. This requires that
the caller of sched_setscheduler() hold the tasklist_lock. The new patch
for people's feedback follows.
Thanks,
Dean
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h 2004-11-12 09:40:26.000000000 -0600
+++ linux-2.6/include/linux/sched.h 2004-11-15 13:43:48.000000000 -0600
@@ -727,6 +727,7 @@
extern int task_nice(const task_t *p);
extern int task_curr(const task_t *p);
extern int idle_cpu(int cpu);
+extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
void yield(void);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c 2004-11-12 09:40:26.000000000 -0600
+++ linux-2.6/kernel/sched.c 2004-11-15 14:22:45.000000000 -0600
@@ -2938,7 +2938,7 @@
*/
rq = task_rq_lock(p, &flags);
/*
- * The RT priorities are set via setscheduler(), but we still
+ * The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
* not SCHED_NORMAL:
@@ -3072,67 +3072,50 @@
p->prio = p->static_prio;
}
-/*
- * setscheduler - change the scheduling policy and/or RT priority of a thread.
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of
+ * a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
+ *
+ * The caller of this function must be holding the tasklist_lock.
*/
-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param)
{
- struct sched_param lp;
- int retval = -EINVAL;
+ int retval;
int oldprio, oldpolicy = -1;
prio_array_t *array;
unsigned long flags;
runqueue_t *rq;
- task_t *p;
-
- if (!param || pid < 0)
- goto out_nounlock;
-
- retval = -EFAULT;
- if (copy_from_user(&lp, param, sizeof(struct sched_param)))
- goto out_nounlock;
-
- /*
- * We play safe to avoid deadlocks.
- */
- read_lock_irq(&tasklist_lock);
-
- p = find_process_by_pid(pid);
- retval = -ESRCH;
- if (!p)
- goto out_unlock;
recheck:
/* double check policy once rq lock held */
if (policy < 0)
policy = oldpolicy = p->policy;
- else {
- retval = -EINVAL;
- if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ else if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_NORMAL)
- goto out_unlock;
- }
+ return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
- retval = -EINVAL;
- if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
- goto out_unlock;
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
- goto out_unlock;
+ if (param->sched_priority < 0 ||
+ param->sched_priority > MAX_USER_RT_PRIO-1)
+ return -EINVAL;
+ if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
+ return -EINVAL;
- retval = -EPERM;
if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
if ((current->euid != p->euid) && (current->euid != p->uid) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
- retval = security_task_setscheduler(p, policy, &lp);
+ retval = security_task_setscheduler(p, policy, param);
if (retval)
- goto out_unlock;
+ return retval;
/*
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
@@ -3147,9 +3130,8 @@
array = p->array;
if (array)
deactivate_task(p, task_rq(p));
- retval = 0;
oldprio = p->prio;
- __setscheduler(p, policy, lp.sched_priority);
+ __setscheduler(p, policy, param->sched_priority);
if (array) {
__activate_task(p, task_rq(p));
/*
@@ -3164,22 +3146,41 @@
resched_task(rq->curr);
}
task_rq_unlock(rq, &flags);
-out_unlock:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sched_setscheduler);
+
+static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+{
+ int retval;
+ struct sched_param lparam;
+ struct task_struct *p;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+ if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
+ return -EFAULT;
+ read_lock_irq(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (!p) {
+ read_unlock_irq(&tasklist_lock);
+ return -ESRCH;
+ }
+ retval = sched_setscheduler(p, policy, &lparam);
read_unlock_irq(&tasklist_lock);
-out_nounlock:
return retval;
}
/**
* sys_sched_setscheduler - set/change the scheduler policy and RT priority
* @pid: the pid in question.
- * @policy: new policy
+ * @policy: new policy.
* @param: structure containing the new RT priority.
*/
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param __user *param)
{
- return setscheduler(pid, policy, param);
+ return do_sched_setscheduler(pid, policy, param);
}
/**
@@ -3189,7 +3190,7 @@
*/
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
{
- return setscheduler(pid, -1, param);
+ return do_sched_setscheduler(pid, -1, param);
}
/**
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 20:33 ` Dean Nelson
@ 2004-11-15 20:41 ` Jan Engelhardt
2004-11-15 21:03 ` Dean Nelson
2004-11-15 21:27 ` Chris Wright
2004-11-16 10:48 ` Ingo Molnar
2 siblings, 1 reply; 16+ messages in thread
From: Jan Engelhardt @ 2004-11-15 20:41 UTC (permalink / raw)
To: Dean Nelson; +Cc: Chris Wright, akpm, linux-kernel
>> * Dean Nelson (dcn@sgi.com) wrote:
>> > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
>>
>> this should be static.
>
>You're right. I made another change in that one now passes the task_struct
>pointer to sched_setscheduler() instead of the pid. This requires that
>the caller of sched_setscheduler() hold the tasklist_lock. The new patch
>for people's feedback follows.
Hi,
can you elaborate a little why passing the task struct/pid is better/worse,
respectively?
Jan Engelhardt
--
Gesellschaft für Wissenschaftliche Datenverarbeitung
Am Fassberg, 37077 Göttingen, www.gwdg.de
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 20:41 ` Jan Engelhardt
@ 2004-11-15 21:03 ` Dean Nelson
0 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-11-15 21:03 UTC (permalink / raw)
To: Jan Engelhardt; +Cc: akpm, linux-kernel
On Mon, Nov 15, 2004 at 09:41:00PM +0100, Jan Engelhardt wrote:
> >> * Dean Nelson (dcn@sgi.com) wrote:
> >> > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
> >>
> >> this should be static.
> >
> >You're right. I made another change in that one now passes the task_struct
> >pointer to sched_setscheduler() instead of the pid. This requires that
> >the caller of sched_setscheduler() hold the tasklist_lock. The new patch
> >for people's feedback follows.
>
> Hi,
>
> can you elaborate a little why passing the task struct/pid is better/worse,
> respectively?
The idea of passing the task_struct was to avoid the potential cost
of having to search for the task again if one was working with the
task_struct already. We saw in a particular benchmark that had thousands
of processes and a kernel module that called find_process_by_pid() for
each of them as it performed its services on their behalf, that the
cost was quite high. I believe the benchmark was sped up some 16x by
eliminating the need to map a pid to a task structure (i.e., a
task_struct pointers were saved with the pid in this module's
internal tables).
In my particular case (XPC module) this is not an issue because I'm
dealing with the current task, so if I were to pass 0 for the pid,
find_process_by_pid() would return immediately with the current
task_struct pointer.
With passing the task_struct pointer the caller has to be holding
the tasklist_lock, which may be it's main detraction. If one only
has the pid, they can call find_task_by_pid_type() themselves,
since it is an exported symbol.
I'm quite open to go with whatever the community prefers on this
issue.
Thanks,
Dean
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 20:33 ` Dean Nelson
2004-11-15 20:41 ` Jan Engelhardt
@ 2004-11-15 21:27 ` Chris Wright
2004-12-09 14:36 ` Dean Nelson
2004-11-16 10:48 ` Ingo Molnar
2 siblings, 1 reply; 16+ messages in thread
From: Chris Wright @ 2004-11-15 21:27 UTC (permalink / raw)
To: Dean Nelson; +Cc: Chris Wright, akpm, linux-kernel
* Dean Nelson (dcn@sgi.com) wrote:
> On Mon, Nov 15, 2004 at 10:58:01AM -0800, Chris Wright wrote:
> > * Dean Nelson (dcn@sgi.com) wrote:
> > > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
> >
> > this should be static.
>
> You're right. I made another change in that one now passes the task_struct
> pointer to sched_setscheduler() instead of the pid. This requires that
> the caller of sched_setscheduler() hold the tasklist_lock. The new patch
> for people's feedback follows.
This now means callers of sched_setscheduler hold tasklist_lock, also
with irq off. I think it's safer to let the core function do that.
It's a touchy area that's ripe for deadlock.
thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 20:33 ` Dean Nelson
2004-11-15 20:41 ` Jan Engelhardt
2004-11-15 21:27 ` Chris Wright
@ 2004-11-16 10:48 ` Ingo Molnar
2004-11-16 20:18 ` Dean Nelson
2 siblings, 1 reply; 16+ messages in thread
From: Ingo Molnar @ 2004-11-16 10:48 UTC (permalink / raw)
To: Dean Nelson; +Cc: Chris Wright, akpm, linux-kernel
* Dean Nelson <dcn@sgi.com> wrote:
> On Mon, Nov 15, 2004 at 10:58:01AM -0800, Chris Wright wrote:
> > * Dean Nelson (dcn@sgi.com) wrote:
> > > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
> >
> > this should be static.
>
> You're right. I made another change in that one now passes the
> task_struct pointer to sched_setscheduler() instead of the pid. This
> requires that the caller of sched_setscheduler() hold the
> tasklist_lock. The new patch for people's feedback follows.
could you make sched_setscheduler() also include a parameter for the
nice value, so that ->static_prio could be set at the same time too
(which would have relevance if SCHED_OTHER is used)? This would make it
a generic kernel-internal API to change all the priority parameters.
Looks good otherwise.
Ingo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-16 10:48 ` Ingo Molnar
@ 2004-11-16 20:18 ` Dean Nelson
2004-11-16 22:36 ` Ingo Molnar
0 siblings, 1 reply; 16+ messages in thread
From: Dean Nelson @ 2004-11-16 20:18 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Chris Wright, akpm, linux-kernel
On Tue, Nov 16, 2004 at 11:48:21AM +0100, Ingo Molnar wrote:
>
> * Dean Nelson <dcn@sgi.com> wrote:
>
> > On Mon, Nov 15, 2004 at 10:58:01AM -0800, Chris Wright wrote:
> > > * Dean Nelson (dcn@sgi.com) wrote:
> > > > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
> > >
> > > this should be static.
> >
> > You're right. I made another change in that one now passes the
> > task_struct pointer to sched_setscheduler() instead of the pid. This
> > requires that the caller of sched_setscheduler() hold the
> > tasklist_lock. The new patch for people's feedback follows.
>
> could you make sched_setscheduler() also include a parameter for the
> nice value, so that ->static_prio could be set at the same time too
> (which would have relevance if SCHED_OTHER is used)? This would make it
> a generic kernel-internal API to change all the priority parameters.
> Looks good otherwise.
Yeah, I can do that. I'll probably be getting back to you with a
question or two, if what you're after isn't obvious once I start
making the changes for the nice parameter.
Thanks,
Dean
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-16 22:36 ` Ingo Molnar
@ 2004-11-16 22:01 ` Chris Friesen
2004-11-16 23:05 ` Ingo Molnar
2004-12-08 20:34 ` Dean Nelson
1 sibling, 1 reply; 16+ messages in thread
From: Chris Friesen @ 2004-11-16 22:01 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Dean Nelson, Chris Wright, akpm, linux-kernel
Ingo Molnar wrote:
> Using the linear priority has the
> advantage of not having to pass any policy value - priorities between 0
> and 99 implicitly mean SCHED_FIFO, priorities above that would mean
> SCHED_NORMAL, a pretty natural and compact interface.
Just curious--why FIFO and not RR?
Chris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-16 20:18 ` Dean Nelson
@ 2004-11-16 22:36 ` Ingo Molnar
2004-11-16 22:01 ` Chris Friesen
2004-12-08 20:34 ` Dean Nelson
0 siblings, 2 replies; 16+ messages in thread
From: Ingo Molnar @ 2004-11-16 22:36 UTC (permalink / raw)
To: Dean Nelson; +Cc: Chris Wright, akpm, linux-kernel
* Dean Nelson <dcn@sgi.com> wrote:
> > could you make sched_setscheduler() also include a parameter for the
> > nice value, so that ->static_prio could be set at the same time too
> > (which would have relevance if SCHED_OTHER is used)? This would make it
> > a generic kernel-internal API to change all the priority parameters.
> > Looks good otherwise.
>
> Yeah, I can do that. I'll probably be getting back to you with a
> question or two, if what you're after isn't obvious once I start
> making the changes for the nice parameter.
another potential API would be to use the linear priority range that the
scheduler has internally, from 0 (RT prio 99) to 140 (nice +19). I'm not
sure which solution is the better one. Using the linear priority has the
advantage of not having to pass any policy value - priorities between 0
and 99 implicitly mean SCHED_FIFO, priorities above that would mean
SCHED_NORMAL, a pretty natural and compact interface.
Ingo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-16 22:01 ` Chris Friesen
@ 2004-11-16 23:05 ` Ingo Molnar
0 siblings, 0 replies; 16+ messages in thread
From: Ingo Molnar @ 2004-11-16 23:05 UTC (permalink / raw)
To: Chris Friesen; +Cc: Dean Nelson, Chris Wright, akpm, linux-kernel
* Chris Friesen <cfriesen@nortelnetworks.com> wrote:
> Ingo Molnar wrote:
> >Using the linear priority has the
> >advantage of not having to pass any policy value - priorities between 0
> >and 99 implicitly mean SCHED_FIFO, priorities above that would mean
> >SCHED_NORMAL, a pretty natural and compact interface.
>
> Just curious--why FIFO and not RR?
it shouldnt matter - if it matters, i.e. if any of these threads uses up
significant amount of CPU time then they should probably _not_ have a RT
priority to begin with.
Ingo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-16 22:36 ` Ingo Molnar
2004-11-16 22:01 ` Chris Friesen
@ 2004-12-08 20:34 ` Dean Nelson
2004-12-09 12:46 ` Ingo Molnar
1 sibling, 1 reply; 16+ messages in thread
From: Dean Nelson @ 2004-12-08 20:34 UTC (permalink / raw)
To: Ingo Molnar; +Cc: chrisw, akpm, linux-kernel
On Tue, Nov 16, 2004 at 11:36:08PM +0100, Ingo Molnar wrote:
>
> * Dean Nelson <dcn@sgi.com> wrote:
>
> > > could you make sched_setscheduler() also include a parameter for the
> > > nice value, so that ->static_prio could be set at the same time too
> > > (which would have relevance if SCHED_OTHER is used)? This would make it
> > > a generic kernel-internal API to change all the priority parameters.
> > > Looks good otherwise.
> >
> > Yeah, I can do that. I'll probably be getting back to you with a
> > question or two, if what you're after isn't obvious once I start
> > making the changes for the nice parameter.
>
> another potential API would be to use the linear priority range that the
> scheduler has internally, from 0 (RT prio 99) to 140 (nice +19). I'm not
> sure which solution is the better one. Using the linear priority has the
> advantage of not having to pass any policy value - priorities between 0
> and 99 implicitly mean SCHED_FIFO, priorities above that would mean
> SCHED_NORMAL, a pretty natural and compact interface.
I realize that I don't know where you are ultimately headed with your
ideas for scheduling changes, but as things are it doesn't make sense
to me to drop the SCHED_RR scheduling policy. There may be existing
users who depend on the preemptive nature of this policy. It seems too
much of a risk to eliminate this policy at this time.
Regarding the nice argument itself, it strikes me that it needs to be an
optional argument in the sense that the caller should be able to indicate
that they're not passing a nice value. To simply pass the task's current
nice value has a window of vulnerability in that after fetching the current
nice value via TASK_NICE(p) but before doing anything with it, another
thread could change the task's nice value to something else, then we
end up setting it back to what it was. Would it be better to have callers
pass the nice value as NICE_TO_PRIO(nice), with sched_setscheduler()
treating zero as no nice value arg was passed, and ensuring if non-zero
that '-20 <= PRIO_TO_NICE(nice_prio) <= 19' was true? Or do you simply
want to ignore the window and always pass?
If the nice value arg was passed, would we want to call
security_task_setnice() as sys_nice() does? Also, set_user_nice()
allows the realtime tasks to set the nice value (p->static_prio =
NICE_TO_PRIO(nice)) but it doesn't take effect until the policy is
changed to SCHED_NORMAL. I'm assuming we wouldn't support this in
sched_setscheduler() since you want to tie the nice value arg to
the SCHED_NORMAL policy only.
I'm assuming that we need to allow for the struct sched_param pointer
to be NULL, if the caller of sched_scheduler() only wanted to set the
nice value?
Having asked all of these questions, I must say that I'm not clear on
the value of adding the nice argument to sched_setscheduler().
Who do you envision calling sched_setscheduler() with it set? Will it
be replacing set_user_nice() at some point?
I'm still willing to add a nice argument if you feel it's appropriate
(and I can get answers to my many questions :-)). But Would it be
acceptable to you to separate this work into two parts? The first
part would be along the lines of breaking up setscheduler() into
do_sched_setscheduler() and sched_setscheduler(), and exporting
sched_setscheduler() like in my proposed patch. And the second part
would be to add your nice argument to sched_setscheduler(). I'm
thinking it may take some fiddling to arrive at a proper nice arg
patch, and I really need to get the exporting of sched_setscheduler()
patch accepted in a timely fashion.
Thanks,
Dean
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-12-08 20:34 ` Dean Nelson
@ 2004-12-09 12:46 ` Ingo Molnar
0 siblings, 0 replies; 16+ messages in thread
From: Ingo Molnar @ 2004-12-09 12:46 UTC (permalink / raw)
To: Dean Nelson; +Cc: chrisw, akpm, linux-kernel
* Dean Nelson <dcn@sgi.com> wrote:
> > another potential API would be to use the linear priority range that the
> > scheduler has internally, from 0 (RT prio 99) to 140 (nice +19). I'm not
> > sure which solution is the better one. Using the linear priority has the
> > advantage of not having to pass any policy value - priorities between 0
> > and 99 implicitly mean SCHED_FIFO, priorities above that would mean
> > SCHED_NORMAL, a pretty natural and compact interface.
>
> I realize that I don't know where you are ultimately headed with your
> ideas for scheduling changes, but as things are it doesn't make sense
> to me to drop the SCHED_RR scheduling policy. There may be existing
> users who depend on the preemptive nature of this policy. It seems too
> much of a risk to eliminate this policy at this time.
agreed ... that's the weak point. Oh well. So this leaves your original
patch. If someone wants to change the nice value it can be done
separately. I.e. roughly the same API for kernelspace as for userspace.
Ingo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Patch] export sched_setscheduler() for kernel module use
2004-11-15 21:27 ` Chris Wright
@ 2004-12-09 14:36 ` Dean Nelson
0 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-12-09 14:36 UTC (permalink / raw)
To: Chris Wright; +Cc: mingo, akpm, linux-kernel
On Mon, Nov 15, 2004 at 01:27:49PM -0800, Chris Wright wrote:
> * Dean Nelson (dcn@sgi.com) wrote:
> > On Mon, Nov 15, 2004 at 10:58:01AM -0800, Chris Wright wrote:
> > > * Dean Nelson (dcn@sgi.com) wrote:
> > > > +int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
> > >
> > > this should be static.
> >
> > You're right. I made another change in that one now passes the task_struct
> > pointer to sched_setscheduler() instead of the pid. This requires that
> > the caller of sched_setscheduler() hold the tasklist_lock. The new patch
> > for people's feedback follows.
>
> This now means callers of sched_setscheduler hold tasklist_lock, also
> with irq off. I think it's safer to let the core function do that.
> It's a touchy area that's ripe for deadlock.
After some further investigation, I think I was mistaken in saying that
the caller of sched_setscheduler() must hold the tasklist_lock.
If you look at the example of sys_setpriority() and sys_nice(), both
of which call set_user_nice(), the first one does so via set_one_prio()
while holding the tasklist_lock, the second one does so while not
holding the tasklist_lock. The difference seems to be whether the caller
was operating against a task_struct located by way of pid or uid (like
calling find_task_by_pid()), which is the case for sys_setpriority(),
whereas sys_nice() operates against the current task_struct.
Now my proposed sched_setscheduler() is very similar to set_user_nice().
And sys_sched_setscheduler()/do_sched_setscheduler() is very similar to
sys_setpriority()/set_one_prio(). And the kernel module (XPC) that I'm
attempting to get accepted by the community would be analagous to
sys_nice() in that its call to sched_setscheduler() would be against
the current task.
So if there is a problem with my proposed patch in regards to the
tasklist_lock, then it would seem to me that there is a problem
with the exiting sys_setpriority(), set_one_prio(), sys_nice(),
set_user_nice() code.
Or am I missing something?
Thanks,
Dean
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH] export sched_setscheduler() for kernel module use
@ 2004-12-13 20:14 Dean Nelson
2004-12-15 9:49 ` Ingo Molnar
0 siblings, 1 reply; 16+ messages in thread
From: Dean Nelson @ 2004-12-13 20:14 UTC (permalink / raw)
To: mingo; +Cc: linux-kernel
This patch exports sched_setscheduler() so that it can be used by a kernel
module to set a kthread's scheduling policy and associated parameters.
Signed-off-by: Dean Nelson <dcn@sgi.com>
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h 2004-12-09 09:08:24.551332932 -0600
+++ linux-2.6/include/linux/sched.h 2004-12-09 09:08:32.286956855 -0600
@@ -740,6 +740,7 @@
extern int task_nice(const task_t *p);
extern int task_curr(const task_t *p);
extern int idle_cpu(int cpu);
+extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
void yield(void);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c 2004-12-09 09:08:24.552309405 -0600
+++ linux-2.6/kernel/sched.c 2004-12-09 09:12:28.183308108 -0600
@@ -2955,7 +2955,7 @@
*/
rq = task_rq_lock(p, &flags);
/*
- * The RT priorities are set via setscheduler(), but we still
+ * The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
* not SCHED_NORMAL:
@@ -3087,67 +3087,48 @@
p->prio = p->static_prio;
}
-/*
- * setscheduler - change the scheduling policy and/or RT priority of a thread.
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of
+ * a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
*/
-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param)
{
- struct sched_param lp;
- int retval = -EINVAL;
+ int retval;
int oldprio, oldpolicy = -1;
prio_array_t *array;
unsigned long flags;
runqueue_t *rq;
- task_t *p;
-
- if (!param || pid < 0)
- goto out_nounlock;
-
- retval = -EFAULT;
- if (copy_from_user(&lp, param, sizeof(struct sched_param)))
- goto out_nounlock;
- /*
- * We play safe to avoid deadlocks.
- */
- read_lock_irq(&tasklist_lock);
-
- p = find_process_by_pid(pid);
-
- retval = -ESRCH;
- if (!p)
- goto out_unlock;
recheck:
/* double check policy once rq lock held */
if (policy < 0)
policy = oldpolicy = p->policy;
- else {
- retval = -EINVAL;
- if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ else if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_NORMAL)
- goto out_unlock;
- }
+ return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
- retval = -EINVAL;
- if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
- goto out_unlock;
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
- goto out_unlock;
+ if (param->sched_priority < 0 ||
+ param->sched_priority > MAX_USER_RT_PRIO-1)
+ return -EINVAL;
+ if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
+ return -EINVAL;
- retval = -EPERM;
if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
if ((current->euid != p->euid) && (current->euid != p->uid) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
- retval = security_task_setscheduler(p, policy, &lp);
+ retval = security_task_setscheduler(p, policy, param);
if (retval)
- goto out_unlock;
+ return retval;
/*
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
@@ -3162,9 +3143,8 @@
array = p->array;
if (array)
deactivate_task(p, task_rq(p));
- retval = 0;
oldprio = p->prio;
- __setscheduler(p, policy, lp.sched_priority);
+ __setscheduler(p, policy, param->sched_priority);
if (array) {
__activate_task(p, task_rq(p));
/*
@@ -3179,22 +3159,41 @@
resched_task(rq->curr);
}
task_rq_unlock(rq, &flags);
-out_unlock:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sched_setscheduler);
+
+static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+{
+ int retval;
+ struct sched_param lparam;
+ struct task_struct *p;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+ if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
+ return -EFAULT;
+ read_lock_irq(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (!p) {
+ read_unlock_irq(&tasklist_lock);
+ return -ESRCH;
+ }
+ retval = sched_setscheduler(p, policy, &lparam);
read_unlock_irq(&tasklist_lock);
-out_nounlock:
return retval;
}
/**
* sys_sched_setscheduler - set/change the scheduler policy and RT priority
* @pid: the pid in question.
- * @policy: new policy
+ * @policy: new policy.
* @param: structure containing the new RT priority.
*/
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param __user *param)
{
- return setscheduler(pid, policy, param);
+ return do_sched_setscheduler(pid, policy, param);
}
/**
@@ -3204,7 +3203,7 @@
*/
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
{
- return setscheduler(pid, -1, param);
+ return do_sched_setscheduler(pid, -1, param);
}
/**
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] export sched_setscheduler() for kernel module use
2004-12-13 20:14 [PATCH] export sched_setscheduler() for kernel module use Dean Nelson
@ 2004-12-15 9:49 ` Ingo Molnar
0 siblings, 0 replies; 16+ messages in thread
From: Ingo Molnar @ 2004-12-15 9:49 UTC (permalink / raw)
To: Dean Nelson; +Cc: linux-kernel, Andrew Morton
* Dean Nelson <dcn@sgi.com> wrote:
> This patch exports sched_setscheduler() so that it can be used by a kernel
> module to set a kthread's scheduling policy and associated parameters.
>
> Signed-off-by: Dean Nelson <dcn@sgi.com>
looks good for post-2.6.10.
Acked-by: Ingo Molnar <mingo@elte.hu>
Ingo
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2004-12-15 9:49 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-12-13 20:14 [PATCH] export sched_setscheduler() for kernel module use Dean Nelson
2004-12-15 9:49 ` Ingo Molnar
-- strict thread matches above, loose matches on Subject: below --
2004-11-15 18:35 [Patch] " Dean Nelson
2004-11-15 18:58 ` Chris Wright
2004-11-15 20:33 ` Dean Nelson
2004-11-15 20:41 ` Jan Engelhardt
2004-11-15 21:03 ` Dean Nelson
2004-11-15 21:27 ` Chris Wright
2004-12-09 14:36 ` Dean Nelson
2004-11-16 10:48 ` Ingo Molnar
2004-11-16 20:18 ` Dean Nelson
2004-11-16 22:36 ` Ingo Molnar
2004-11-16 22:01 ` Chris Friesen
2004-11-16 23:05 ` Ingo Molnar
2004-12-08 20:34 ` Dean Nelson
2004-12-09 12:46 ` Ingo Molnar
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.