> +/*
> + * Init/Free related code
> + */
> +static int
> +rt_init(struct scheduler *ops)
> +{
> + struct rt_private *prv = xzalloc(struct rt_private);
> +
> + if ( prv == NULL )
> + return -ENOMEM;

Newline in here.

Changed, thanks!

> + ops->sched_data = prv;

Is it safe to set ops->sched_data with a half constructed rt_private? I
suspect this wants to be the very last (non-debug) action in this function.

I think it should be fine. I double checked the _init function in sched_credit2.c. It has the similar operation: first assign prv to ops->sched_data and then set the value of prv.

Of course, I can switch them. But I'm not sure if that really matter. :-)

> +static void *
> +rt_alloc_domdata(const struct scheduler *ops, struct domain *dom)
> +{
> + unsigned long flags;
> + struct rt_dom *sdom;
> + struct rt_private * prv = RT_PRIV(ops);
> +
> + printtime();
> + printk("dom=%d\n", dom->domain_id);
> +
> + sdom = xzalloc(struct rt_dom);
> + if ( sdom == NULL )
> + {
> + printk("%s, xzalloc failed\n", __func__);
> + return NULL;
> + }
> +
> + INIT_LIST_HEAD(&sdom->vcpu);
> + INIT_LIST_HEAD(&sdom->sdom_elem);
> + sdom->dom = dom;
> +
> + /* spinlock here to insert the dom */
> + spin_lock_irqsave(&prv->lock, flags);
> + list_add_tail(&sdom->sdom_elem, &(prv->sdom));
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> + return (void *)sdom;

Bogus cast.

I think we have to cast it to void * because the definition of this function asks the return type to be void *. In addition, the credit2 scheduler also did the same cast in this _alloc_domdata function. So I guess this should be fine?

> +/*
> + * set/get each vcpu info of each domain
> + */
> +static int
> +rt_dom_cntl(
> + const struct scheduler *ops,
> + struct domain *d,
> + struct xen_domctl_scheduler_op *op)
> +{
> + xen_domctl_sched_rt_params_t *local_sched;
> + struct rt_dom * const sdom = RT_DOM(d);
> + struct list_head *iter;
> + int vcpu_index = 0;
> + int rc = -EINVAL;
> +
> + switch ( op->cmd )
> + {
> + case XEN_DOMCTL_SCHEDOP_getnumvcpus:
> + op->u.rt.nr_vcpus = 0;
> + list_for_each( iter, &sdom->vcpu )
> + vcpu_index++;
> + op->u.rt.nr_vcpus = vcpu_index;
> + rc = 0;
> + break;
> + case XEN_DOMCTL_SCHEDOP_getinfo:
> + /* for debug use, whenever adjust Dom0 parameter, do global dump */
> + if ( d->domain_id == 0 )
> + rt_dump(ops);
> +
> + op->u.rt.nr_vcpus = 0;
> + list_for_each( iter, &sdom->vcpu )
> + vcpu_index++;
> + op->u.rt.nr_vcpus = vcpu_index;
> + local_sched = xzalloc_array(xen_domctl_sched_rt_params_t, vcpu_index);
> + vcpu_index = 0;
> + list_for_each( iter, &sdom->vcpu )
> + {
> + struct rt_vcpu * svc = list_entry(iter, struct rt_vcpu, sdom_elem);
> +
> + local_sched[vcpu_index].budget = svc->budget;
> + local_sched[vcpu_index].period = svc->period;
> + local_sched[vcpu_index].index = vcpu_index;
> + vcpu_index++;
> + }
> + copy_to_guest(op->u.rt.vcpu, local_sched, vcpu_index);

This will clobber guest heap if vcpu_index is greater than the allocated
space.

This is a good point! I will pass the size of the array to the kernel and check that the number of the array's elements is not smaller than the number of vcpus.

You also unconditionally leak local_sched, but there is no need
for an allocation in the first place.

I will add the xfree() after copy_to_guest().

I have a question: how can I avoid allocating the local_sched?

> + rc = 0;
> + break;
> + case XEN_DOMCTL_SCHEDOP_putinfo:
> + list_for_each( iter, &sdom->vcpu )
> + {
> + struct rt_vcpu * svc = list_entry(iter, struct rt_vcpu, sdom_elem);
> +
> + /* adjust per VCPU parameter */
> + if ( op->u.rt.vcpu_index == svc->vcpu->vcpu_id )
> + {
> + vcpu_index = op->u.rt.vcpu_index;
> +
> + if ( vcpu_index < 0 )
> + printk("XEN_DOMCTL_SCHEDOP_putinfo: vcpu_index=%d\n",
> + vcpu_index);
> + else
> + printk("XEN_DOMCTL_SCHEDOP_putinfo: "
> + "vcpu_index=%d, period=%"PRId64", budget=%"PRId64"\n",
> + vcpu_index, op->u.rt.period, op->u.rt.budget);
> +
> + svc->period = op->u.rt.period;
> + svc->budget = op->u.rt.budget;
> +
> + break;
> + }
> + }
> + rc = 0;
> + break;
> + }
> +
> + return rc;
> +}
> +
> +static struct rt_private _rt_priv;
> +
> +const struct scheduler sched_rt_def = {
> + .name = "SMP RT Scheduler",
> + .opt_name = "rt",

Should these names reflect RT_DS as opposed to simply RT?

DS (Deferrable Server) is just one kind of server mechanisms for global Earliest Deadline First scheduling. We can add other server mechanisms in the same file sched_rt.c to extend this Real Time scheduler. But we don't want to change/affect user's interface when we add more server mechanisms.

The .opt_name will affect the user's interface when user choose the rt scheduler, If we change it to rt_ds, we will have to change it to rt again when we have more server mechanisms implemented. Then users will have to change their configuration (i.e., the command line value sched=) to the new name rt. Because this could potentially affect users' interface, I think it's better to use rt here. What do you think?

> + .sched_id = XEN_SCHEDULER_RT_DS,
> + .sched_data = &_rt_priv,
> +
> + .dump_cpu_state = rt_dump_pcpu,
> + .dump_settings = rt_dump,
> + .init = rt_init,
> + .deinit = rt_deinit,
> + .alloc_pdata = rt_alloc_pdata,
> + .free_pdata = rt_free_pdata,
> + .alloc_domdata = rt_alloc_domdata,
> + .free_domdata = rt_free_domdata,
> + .init_domain = rt_dom_init,
> + .destroy_domain = rt_dom_destroy,
> + .alloc_vdata = rt_alloc_vdata,
> + .free_vdata = rt_free_vdata,
> + .insert_vcpu = rt_vcpu_insert,
> + .remove_vcpu = rt_vcpu_remove,
> +
> + .adjust = rt_dom_cntl,
> +
> + .pick_cpu = rt_cpu_pick,
> + .do_schedule = rt_schedule,
> + .sleep = rt_vcpu_sleep,
> + .wake = rt_vcpu_wake,
> + .context_saved = rt_context_saved,
> +};
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index e9eb0bc..f2df400 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -68,6 +68,7 @@ static const struct scheduler *schedulers[] = {
> &sched_sedf_def,
> &sched_credit_def,
> &sched_credit2_def,
> + &sched_rt_def,
> &sched_arinc653_def,
> };
>
> @@ -1092,7 +1093,8 @@ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
>
> if ( (op->sched_id != DOM2OP(d)->sched_id) ||
> ((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
> - (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
> + (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo) &&
> + (op->cmd != XEN_DOMCTL_SCHEDOP_getnumvcpus)) )
> return -EINVAL;
>
> /* NB: the pluggable scheduler code needs to take care
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 5b11bbf..8d4b973 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -339,6 +339,18 @@ struct xen_domctl_max_vcpus {
> typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
>
> +/*
> + * This structure is used to pass to rt scheduler from a
> + * privileged domain to Xen
> + */
> +struct xen_domctl_sched_rt_params {
> + /* get vcpus' info */
> + int64_t period; /* s_time_t type */
> + int64_t budget;
> + int index;

Index is clearly an unsigned quantity. For alignment and compatibility,
uint64_t would make the most sense. Alternatively, uint32_t and an
explicit uint32_t pad field.

Agree. I have changed it to uint16_t because the vcpu_index is uint16_t in the tool stack.

Thank you very much for your comments and suggestions! Looking forward to your reply! ;-)

Best,

Meng