diff -r e2f36d066b7b tools/libxc/Makefile --- a/tools/libxc/Makefile Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/libxc/Makefile Thu Jan 08 01:32:01 2009 -0500 @@ -17,6 +17,7 @@ CTRL_SRCS-y += xc_private.c CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_csched.c +CTRL_SRCS-y += xc_sdp.c CTRL_SRCS-y += xc_tbuf.c CTRL_SRCS-y += xc_pm.c CTRL_SRCS-y += xc_cpu_hotplug.c diff -r e2f36d066b7b tools/libxc/xc_sdp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_sdp.c Thu Jan 08 01:32:01 2009 -0500 @@ -0,0 +1,53 @@ +/**************************************************************************** + * (C) 2009 -- Zhiyuan Shao -- Huazhong Univers. of Sci.&Tech. PRC + **************************************************************************** + * + * File: xc_sdp.c + * Author: Zhiyuan Shao + * + * Description: XC Interface to the SDP scheduler + * + */ +#include "xc_private.h" + +int +xc_sdp_domain_set( + int xc_handle, + uint32_t domid, + uint16_t priority ) +{ + DECLARE_DOMCTL; + struct xen_domctl_sched_sdp *p = &domctl.u.scheduler_op.u.sdp; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_SDP; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo; + + p->pri = priority; + + return do_domctl(xc_handle, &domctl); +} + +int +xc_sdp_domain_get( + int xc_handle, + uint32_t domid, + uint16_t *priority) +{ + DECLARE_DOMCTL; + int ret; + struct xen_domctl_sched_sdp *p = &domctl.u.scheduler_op.u.sdp; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_SDP; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo; + + ret = do_domctl(xc_handle, &domctl); + if ( ret == 0 ) + *priority = p->pri; + + return ret; +} + diff -r e2f36d066b7b tools/libxc/xc_sedf.c --- a/tools/libxc/xc_sedf.c Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/libxc/xc_sedf.c Thu Jan 08 01:32:01 2009 -0500 @@ -60,5 +60,6 @@ *latency = p->latency; *extratime = p->extratime; *weight = p->weight; + return ret; } diff -r e2f36d066b7b tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/libxc/xenctrl.h Thu Jan 08 01:32:01 2009 -0500 @@ -447,6 +447,14 @@ int xc_sched_credit_domain_get(int xc_handle, uint32_t domid, struct xen_domctl_sched_credit *sdom); + +//defined for SDP +int xc_sdp_domain_set( int xc_handle, + uint32_t domid, + uint16_t priority ); +int xc_sdp_domain_get( int xc_handle, + uint32_t domid, + uint16_t *priority ); /** * This function sends a trigger to a domain. diff -r e2f36d066b7b tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Jan 08 01:32:01 2009 -0500 @@ -1180,7 +1180,7 @@ uint16_t extratime, weight; static char *kwd_list[] = { "domid", "period", "slice", "latency", "extratime", "weight",NULL }; - + if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid, &period, &slice, &latency, &extratime, &weight) ) @@ -1213,6 +1213,41 @@ "latency", latency, "extratime", extratime, "weight", weight); +} + +static PyObject *pyxc_sched_sdp_domain_set(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + static char *kwd_list[] = { "domid", "priority", NULL }; + uint32_t domid; + uint16_t priority; + + if( !PyArg_ParseTupleAndKeywords(args, kwds, "ih", kwd_list, + &domid, &priority) ) + return NULL; + + if ( xc_sdp_domain_set(self->xc_handle, domid, priority) != 0 ) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_sched_sdp_domain_get(XcObject *self, PyObject *args) +{ + uint32_t domid; + uint16_t priority; + + if( !PyArg_ParseTuple(args, "i", &domid) ) + return NULL; + + if ( xc_sdp_domain_get(self->xc_handle, domid, &priority) != 0 ) + return pyxc_error_to_exception(); + + return Py_BuildValue("{s:i,s:H}", + "domid", domid, + "priority", priority ); } static PyObject *pyxc_shadow_control(PyObject *self, @@ -1736,6 +1771,25 @@ "Returns: [dict]\n" " weight [short]: domain's scheduling weight\n"}, + { "sched_sdp_domain_set", + (PyCFunction)pyxc_sched_sdp_domain_set, + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with the\n" + "Simple Dynamic Priority scheduler.\n" + " domid [int]: domain id to set\n" + " priority [short]: domain's priority\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "sched_sdp_domain_get", + (PyCFunction)pyxc_sched_sdp_domain_get, + METH_VARARGS, "\n" + "Get the scheduling parameters for a domain when running with the\n" + "Simple Dynamic Priority scheduler.\n" + " domid [int]: domain id to query\n" + "Returns: [dict]\n" + " domain [int]: domainID\n" + " priority [short]: domain's priority\n"}, + { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, METH_VARARGS | METH_KEYWORDS, "\n" @@ -2051,6 +2105,7 @@ /* Expose some libxc constants to Python */ PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF); PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT); + PyModule_AddIntConstant(m, "XEN_SCHEDULER_SDP", XEN_SCHEDULER_SDP); } diff -r e2f36d066b7b tools/python/xen/xend/XendConfig.py --- a/tools/python/xen/xend/XendConfig.py Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/xend/XendConfig.py Thu Jan 08 01:32:02 2009 -0500 @@ -589,6 +589,8 @@ int(sxp.child_value(sxp_cfg, "cpu_weight", 256)) cfg["vcpus_params"]["cap"] = \ int(sxp.child_value(sxp_cfg, "cpu_cap", 0)) + cfg["vcpus_params"]["priority"] = \ + int(sxp.child_value(sxp_cfg, "cpu_priority", 80)) # Only extract options we know about. extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \ diff -r e2f36d066b7b tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/xend/XendDomain.py Thu Jan 08 01:32:02 2009 -0500 @@ -1452,6 +1452,8 @@ @type domid: int or string. @rtype: 0 """ + print "domain_cpu_sedf_set is called" + dominfo = self.domain_lookup_nr(domid) if not dominfo: raise XendInvalidDomain(str(domid)) @@ -1482,6 +1484,44 @@ ['latency', sedf_info['latency']], ['extratime', sedf_info['extratime']], ['weight', sedf_info['weight']]] + + except Exception, ex: + raise XendError(str(ex)) + + def domain_cpu_sdp_set(self, domid, priority): + """Set SDP scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: 0 + """ + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + return xc.sdp_domain_set(dominfo.getDomid(), priority) + except Exception, ex: + raise XendError(str(ex)) + + def domain_cpu_sdp_get(self, domid): + """Get SDP scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: SXP object + @return: The parameters for sdp scheduler for a domain. + """ + print "domain_cpu_sdp_get is called" + + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + sdp_info = xc.sdp_domain_get(dominfo.getDomid()) + # return sxpr + return ['sdp', + ['domid', sdp_info['domid']], + ['priority', sdp_info['priority']]] except Exception, ex: raise XendError(str(ex)) @@ -1529,6 +1569,60 @@ try: return xc.shadow_mem_control(dominfo.getDomid(), mb=mb) except Exception, ex: + raise XendError(str(ex)) + + def domain_sched_sdp_get(self, domid): + """Get sdp scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: dict with keys 'priority' + @return: sdp scheduler parameters + """ + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + try: + return xc.xc_sdp_domain_get(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + else: + return {'priority' : dominfo.getPri()} + + def domain_sched_sdp_set(self, domid, priority = None): + """Set sdp scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @type priority: int + @rtype: 0 + """ + set_priority = False + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + if priority is None: + priority = int(80) + elif priority < 0 or priority > 100: + raise XendError("priority is out of range") + else: + set_priority = True + + assert type(priority) == int + + rc = 0 + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + rc = xc.xc_sdp_domain_set(dominfo.getDomid(), priority) + if rc == 0: + if set_priority: + dominfo.setPri(priority) + self.managed_config_save(dominfo) + return rc + except Exception, ex: + log.exception(ex) raise XendError(str(ex)) def domain_sched_credit_get(self, domid): diff -r e2f36d066b7b tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/xend/XendNode.py Thu Jan 08 01:32:02 2009 -0500 @@ -555,6 +555,8 @@ return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_SDP: + return 'sdp' else: return 'unknown' @@ -714,6 +716,8 @@ return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_SDP: + return 'sdp' else: return 'unknown' diff -r e2f36d066b7b tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/xend/server/SrvDomain.py Thu Jan 08 01:32:02 2009 -0500 @@ -136,6 +136,8 @@ def op_cpu_sedf_set(self, _, req): + print "op_cpu_sedf_set is called" + fn = FormFn(self.xd.domain_cpu_sedf_set, [['dom', 'int'], ['period', 'int'], @@ -145,7 +147,23 @@ ['weight', 'int']]) val = fn(req.args, {'dom': self.dom.domid}) return val - + + def op_cpu_sdp_get(self, _, req): + print "op_cpu_sdp_get is called" + + fn = FormFn(self.xd.domain_cpu_sdp_get, + [['dom', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + + + def op_cpu_sdp_set(self, _, req): + fn = FormFn(self.xd.domain_cpu_sdp_set, + [['dom', 'int'], + ['priority', 'int']]) + val = fn(req.args, {'dom': self.dom.domid}) + return val + def op_domain_sched_credit_get(self, _, req): fn = FormFn(self.xd.domain_sched_credit_get, [['dom', 'str']]) diff -r e2f36d066b7b tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Dec 22 13:48:40 2008 +0000 +++ b/tools/python/xen/xm/main.py Thu Jan 08 01:32:02 2009 -0500 @@ -152,6 +152,7 @@ 'sched-sedf' : (' [options]', 'Get/set EDF parameters.'), 'sched-credit': ('[-d [-w[=WEIGHT]|-c[=CAP]]]', 'Get/set credit scheduler parameters.'), + 'sched-sdp' : (' [options]', 'Get/set SDP parameters.'), 'sysrq' : (' ', 'Send a sysrq to a domain.'), 'debug-keys' : ('', 'Send debug keys to Xen.'), 'trigger' : (' []', @@ -235,6 +236,10 @@ 'Flag (0 or 1) controls if domain can run in extra time.'), ('-w [FLOAT]', '--weight[=FLOAT]', 'CPU Period/slice (do not set with --period/--slice)'), + ), + 'sched-sdp': ( + ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'), + ('-p PRIORITY', '--priority=PRIORITY', 'Relative Priority[0,100](int)'), ), 'sched-credit': ( ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'), @@ -360,6 +365,7 @@ scheduler_commands = [ "sched-credit", "sched-sedf", + "sched-sdp", ] device_commands = [ @@ -955,6 +961,15 @@ 'latency' : get_info('latency', int, -1), 'extratime': get_info('extratime', int, -1), 'weight' : get_info('weight', int, -1), + } + +def parse_sdp_info(info): + def get_info(n, t, d): + return t(sxp.child_value(info, n, d)) + + return { + 'domid' : get_info('domid', int, -1), + 'priority' : get_info('priority', int, -1), } def domid_match(domid, info): @@ -1544,7 +1559,6 @@ print '%-33s %4s %-4s %-4s %-7s %-5s %-6s' % \ ('Name','ID','Period(ms)', 'Slice(ms)', 'Lat(ms)', 'Extra','Weight') - for d in doms: # fetch current values so as not to clobber them try: @@ -1571,6 +1585,76 @@ # not setting values, display info else: print_sedf(sedf_info) + +def xm_sched_sdp(args): + xenapi_unsupported() + + def print_sdp(info): + info['priority'] = info['priority'] + print( ("%(name)-32s %(domid)5d %(priority)5d") % info) + + check_sched_type('sdp') + + # we want to just display current info if no parameters are passed + if len(args) == 0: + domid = None + else: + # we expect at least a domain id (name or number) + # and at most a domid up to 5 options with values + arg_check(args, "sched-sdp", 1, 10) + domid = args[0] + # drop domid from args since get_opt doesn't recognize it + args = args[1:] + + opts = {} + try: + (options, params) = getopt.gnu_getopt(args, 'p', ['priority=']) + except getopt.GetoptError, opterr: + err(opterr) + usage('sched-sdp') + + # convert to nanoseconds if needed + for (k, v) in options: + if k in ['-p', '--priority']: + opts['priority'] = v + + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) + for dom in getDomains(None, 'running')]) + if domid is not None and doms == []: + err("Domain '%s' does not exist." % domid) + usage('sched-sdp') + + # print header if we aren't setting any parameters + if len(opts.keys()) == 0: + print '%-33s %4s %-4s' % \ + ('Name','ID','Priority') + + for d in doms: + # fetch current values so as not to clobber them + try: + sdp_raw = server.xend.domain.cpu_sdp_get(d['domid']) + except xmlrpclib.Fault: + # domain does not support sched-sdp? + print "seems platfrom does not support sched-sdp" + sdp_raw = {} + + sdp_info = parse_sdp_info(sdp_raw) + sdp_info['name'] = d['name'] + # update values in case of call to set + if len(opts.keys()) > 0: + for k in opts.keys(): + sdp_info[k]=opts[k] + + # send the update, converting user input + v = map(int, [sdp_info['priority']]) + rv = server.xend.domain.cpu_sdp_set(d['domid'], *v) + if int(rv) != 0: + err("Failed to set sdp parameters (rv=%d)."%(rv)) + + # not setting values, display info + else: + print_sdp(sdp_info) def xm_sched_credit(args): """Get/Set options for Credit Scheduler.""" @@ -2825,6 +2909,7 @@ # scheduler "sched-sedf": xm_sched_sedf, "sched-credit": xm_sched_credit, + "sched-sdp": xm_sched_sdp, # block "block-attach": xm_block_attach, "block-detach": xm_block_detach, diff -r e2f36d066b7b xen/common/Makefile --- a/xen/common/Makefile Mon Dec 22 13:48:40 2008 +0000 +++ b/xen/common/Makefile Thu Jan 08 01:32:02 2009 -0500 @@ -13,6 +13,7 @@ obj-y += rangeset.o obj-y += sched_credit.o obj-y += sched_sedf.o +obj-y += sched_sdp.o obj-y += schedule.o obj-y += shutdown.o obj-y += softirq.o diff -r e2f36d066b7b xen/common/sched_sdp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/common/sched_sdp.c Thu Jan 08 01:32:02 2009 -0500 @@ -0,0 +1,736 @@ +/**************************************************************************** + * (C) 2008-2009 Zhiyuan Shao, Huazhong University of Sci.&Tech. PRC + **************************************************************************** + * + * File: common/sched_sdp.c + * Author: Zhiyuan Shao + * + * Description: Scheduler for Client Virtualization + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + The Marcos to be used +*/ +#define SDP_MSECS_PER_TICK 10 +#define SDP_MSECS_PER_TSLICE 10 + +#define SDP_PCPU(_c) \ + ((struct sdp_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define SDP_DOM(_dom) ((struct sdp_dom *) (_dom)->sched_priv) +#define SDP_VCPU(_vcpu) ((struct sdp_vcpu *) (_vcpu)->sched_priv) + +#define SDP_PRI_IDLE 0 +#define SDP_BONUS_IDLE -1024 + +#define SDP_PRI_DEFAULT 80 +#define SDP_BONUS_DEFAULT 0 + +#define RUNQ(_cpu) (&(SDP_PCPU(_cpu)->runq)) + + +/* + * Physical CPU + */ +struct sdp_pcpu { + struct list_head runq; + uint32_t runq_sort_last; + struct timer ticker; + unsigned int tick; +}; + +/* + * Domain + */ +struct sdp_dom { + struct list_head active_vcpu; + struct list_head active_sdom_elem; + struct domain *dom; + uint16_t pri; + uint16_t active_vcpu_count; +}; + +/* + * Virtual CPU + */ +//I added a bonus field to the schedule vcpu structure. +// the intention is explained as follows: +// if the vcpu is actually woken up, it will has some bonus, +// that is MILLSEC(sdp_vcpu->pri - SDP_PRI_DEFAULT). +// and the runqueue is actually sorted by this bonus field, not the priority field! +// if the vcpu is scheduled out, its bonus value will be substracted with its r_time +// till it becomes anther zero again. + +//rules of wake up: +// if the newly wake has lower priority than current, it will NOT preempt; +// if the newly wake has higher priority than current, it preempt +struct sdp_vcpu { + struct list_head runq_elem; + struct list_head active_vcpu_elem; + struct sdp_dom *sdom; + struct vcpu *vcpu; + uint16_t flags; + uint16_t pri; + s32 bonus; +}; + +/* + * System-wide private data + */ +struct sdp_private { + spinlock_t lock; + struct list_head active_sdom; + uint32_t ncpus; + unsigned int master; + cpumask_t idlers; + //if resort_before_sched_needed = 1, + // resort it before scheduling, and set it back to 0 + short resort_before_sched_needed; +}; + +/* + * Global variables + */ +static struct sdp_private sdp_priv; + + +/* ====================================== Utility routines =================================== */ +static inline int +__cycle_cpu(int cpu, const cpumask_t *mask) +{ + int nxt = next_cpu(cpu, *mask); + if (nxt == NR_CPUS) + nxt = first_cpu(*mask); + return nxt; +} + +static inline int +__vcpu_on_runq(struct sdp_vcpu *svc) +{ + return !list_empty(&svc->runq_elem); +} + +static inline struct sdp_vcpu * +__runq_elem(struct list_head *elem) +{ + return list_entry(elem, struct sdp_vcpu, runq_elem); +} + +static inline void +__runq_insert(unsigned int cpu, struct sdp_vcpu *svc) +{ + const struct list_head * const runq = RUNQ(cpu); + struct sdp_vcpu * iter_svc; + struct list_head *iter; + + BUG_ON( __vcpu_on_runq(svc) ); + BUG_ON( cpu != svc->vcpu->processor ); + + list_for_each( iter, runq ) + { + iter_svc = __runq_elem(iter); + if ( svc->bonus > iter_svc->bonus ) + break; + } + + list_add_tail(&svc->runq_elem, iter); +} + +static inline void +__runq_remove(struct sdp_vcpu *svc) +{ + BUG_ON( !__vcpu_on_runq(svc) ); + list_del_init(&svc->runq_elem); +} + +static inline void +__runq_tickle(unsigned int cpu, struct sdp_vcpu *new) +{ + struct sdp_vcpu * const cur = + SDP_VCPU(per_cpu(schedule_data, cpu).curr); + cpumask_t mask; + + ASSERT(cur); + cpus_clear(mask); + + /* If strictly higher priority than current VCPU, signal the CPU */ + if ( new->pri > cur->pri ) + { +// printk( "ready to call scheduling procedure on cpu:%d\n", cpu ); + cpu_set(cpu, mask); + } +// else{ +// printk( "but at cpu %d new->pri<=cur->pri, new->pri=%d, new->domid=%d, new->vcpuid=%d\n", +// cpu, new->pri, new->vcpu->domain->domain_id, new->vcpu->vcpu_id ); +// printk( "cur->pri=%d, cur->domid=%d, cur->vcpuid=%d\n", +// cur->pri, cur->vcpu->domain->domain_id, cur->vcpu->vcpu_id ); +// } + + /* Send scheduler interrupts to designated CPUs */ + if ( !cpus_empty(mask) ) + cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ); +} +/* +static void +sdp_tick(void *_cpu) +{ + unsigned int cpu = (unsigned long)_cpu; + struct sdp_pcpu *spc = SDP_PCPU(cpu); + + spc->tick++; + + set_timer(&spc->ticker, NOW() + MILLISECS(SDP_MSECS_PER_TICK)); +} +*/ +static int +sdp_pcpu_init( int cpu ) +{ + struct sdp_pcpu *spc; + unsigned long flags; + +// printk( "sdp_pcpu_init is called, cpu = %d.\n", cpu ); + + /* Allocate per-PCPU info */ + spc = xmalloc(struct sdp_pcpu); + if ( spc == NULL ) return -ENOMEM; + + spin_lock_irqsave(&sdp_priv.lock, flags); + + /* Initialize/update system-wide config */ + if ( sdp_priv.ncpus <= cpu ) + sdp_priv.ncpus = cpu + 1; + if ( sdp_priv.master >= sdp_priv.ncpus ) + sdp_priv.master = cpu; + +// init_timer(&spc->ticker, sdp_tick, (void *)(unsigned long)cpu, cpu); + INIT_LIST_HEAD(&spc->runq); + per_cpu(schedule_data, cpu).sched_priv = spc; + + /* Start off idling... */ + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + cpu_set(cpu, sdp_priv.idlers); + + spin_unlock_irqrestore(&sdp_priv.lock, flags); + +// printk( "sdp_pcpu_init returned.\n" ); + + return 0; +} + +static void +sdp_dump_vcpu(struct sdp_vcpu *svc) +{ + struct sdp_dom * const sdom = svc->sdom; + + printk("[%i.%i] pri=%i flags=%x cpu=%i bonus=%d", + svc->vcpu->domain->domain_id, + svc->vcpu->vcpu_id, + svc->pri, + svc->flags, + svc->vcpu->processor, + svc->bonus ); + + if ( sdom ) + { + printk(" priority=%d", sdom->pri ); + } + + printk("\n"); +} + +//resort the runq. note, the items here is sorted by using the bonus field +// we use bubble sorting algorithm here +static void +sdp_runq_resort( unsigned int cpu ) +{ + struct sdp_pcpu * const spc = SDP_PCPU(cpu); + struct list_head *runq, *elem, *follow; + struct sdp_vcpu *svc_elem, *svc_follow; + int i, swapped; + + runq = &spc->runq; + elem = runq->next; + + for( i=0; i<10; i++){ + //allow for 10 times of scanning here. + swapped = 0; + while ( elem != runq ) + { + follow = elem; + elem = elem->next; + if (elem == runq ) break; + + //compare the two svc + svc_elem = __runq_elem(elem); + svc_follow = __runq_elem(follow); + + if( svc_elem->bonus > svc_follow->bonus ){ + //swap the two + printk( "SWAP running queue. pri->domid:%d vcpuid:%d bonus:%d\t", + svc_follow->vcpu->domain->domain_id, svc_follow->vcpu->vcpu_id, svc_follow->bonus ); + printk( "next: domid:%d, vcpuid:%d, bonus:%d\n", + svc_elem->vcpu->domain->domain_id, svc_elem->vcpu->vcpu_id, svc_elem->bonus ); + list_del( elem ); + list_add_tail( elem, follow ); + swapped = 1; + break; + } + } + + if ( swapped == 0 ) break; + } +// if( i>0 ) +// printk( "===============sdp_runq_resort: sorted for %d times===\n", i ); +} + +/* ====================================== Exposed routines =================================== */ +static int +sdp_dom_init(struct domain *dom) +{ + struct sdp_dom *sdom; + +// printk( "sdp_dom_init is called\n" ); + + if ( is_idle_domain(dom) ) + return 0; + + sdom = xmalloc(struct sdp_dom); + if ( sdom == NULL ) return -ENOMEM; + + /* Initialize */ + INIT_LIST_HEAD(&sdom->active_vcpu); + sdom->active_vcpu_count = 0; + INIT_LIST_HEAD(&sdom->active_sdom_elem); + sdom->dom = dom; + sdom->pri = SDP_PRI_DEFAULT; //set a default value for later change + dom->sched_priv = sdom; + + //join the scheduling anyway, add to sdp_priv + // actually, it does not matter if it join sdp_priv, but it may works + // if later, we find a global adjustment mechanism is needed. + list_add(&sdom->active_sdom_elem, &sdp_priv.active_sdom); + + return 0; +} + +static void +sdp_dom_destroy(struct domain *dom) +{ + struct sdp_dom *sdom = SDP_DOM(dom); //Note, sdom==NULL for IDLE domain! + + list_del( &sdom->active_sdom_elem ); + xfree(SDP_DOM(dom)); +} + +static int +sdp_vcpu_init(struct vcpu *vc) +{ + struct domain * const dom = vc->domain; + struct sdp_dom *sdom = SDP_DOM(dom); //Note, sdom==NULL for IDLE domain! + struct sdp_vcpu *svc; + +// printk( "sdp_vcpu_init is called, processor:%d\n", vc->processor ); +// printk( "domid:%d vcpuid:%d\n", dom->domain_id, vc->vcpu_id ); + + /* Allocate per-VCPU info */ + svc = xmalloc(struct sdp_vcpu); + if ( svc == NULL ) return -ENOMEM; + + INIT_LIST_HEAD(&svc->runq_elem); + INIT_LIST_HEAD(&svc->active_vcpu_elem); + svc->sdom = sdom; + svc->vcpu = vc; + svc->pri = is_idle_domain(dom) ? SDP_PRI_IDLE : sdom->pri; + vc->sched_priv = svc; + + //join the sdom now. it is useful, since the priority value may change + // if domctl hypercall is made later. however, as idle domain has no + // such sdom, leave it + if ( svc->pri != SDP_PRI_IDLE ){ + svc->bonus = SDP_BONUS_DEFAULT; + list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); + sdom->active_vcpu_count++; + //not actived, but also increase it by one. + // later, we should change it a better name, such as vcpu_count + }else + svc->bonus = SDP_BONUS_IDLE; + + /* Allocate per-PCPU info */ + if ( unlikely(!SDP_PCPU(vc->processor)) ) + if ( sdp_pcpu_init(vc->processor) != 0 ) + return -1; + + return 0; +} + +static void +sdp_vcpu_destroy(struct vcpu *vc) +{ + struct sdp_vcpu * const svc = SDP_VCPU(vc); + struct sdp_dom * const sdom = svc->sdom; + unsigned long flags; + +// printk( "sdp_vcpu_destroy is called\n" ); + + BUG_ON( sdom == NULL ); + BUG_ON( !list_empty(&svc->runq_elem) ); + + spin_lock_irqsave(&sdp_priv.lock, flags); + + if ( !list_empty(&svc->active_vcpu_elem) ) + list_del( &svc->active_vcpu_elem ); + +// if ( !list_empty(&svc->active_vcpu_elem) ) +// __csched_vcpu_acct_stop_locked(svc); + + spin_unlock_irqrestore(&sdp_priv.lock, flags); + + xfree(svc); + +} + +static void +sdp_vcpu_sleep(struct vcpu *vc) +{ + struct sdp_vcpu * const svc = SDP_VCPU(vc); + +// printk( "sdp_vcpu_sleep is called\n" ); + + BUG_ON( is_idle_vcpu(vc) ); + + if ( per_cpu(schedule_data, vc->processor).curr == vc ) + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + else if ( __vcpu_on_runq(svc) ) + __runq_remove(svc); +} + +//rules of wake up: +// if the newly wake has lower priority than current, it will NOT preempt; +// if the newly wake has higher priority than current, it preempt +static void +sdp_vcpu_wake(struct vcpu *vc) +{ + struct sdp_vcpu * const svc = SDP_VCPU(vc); + const unsigned int cpu = vc->processor; + +// printk( "sdp_vcpu_wake is called, woken vcpu id:%d domainid:%d, wake point:%d\n", +// vc->vcpu_id, vc->domain->domain_id, cpu ); + + BUG_ON( is_idle_vcpu(vc) ); + + if ( unlikely(per_cpu(schedule_data, cpu).curr == vc) ) + { + spin_lock_irq(&sdp_priv.lock); + if( svc->pri > SDP_PRI_DEFAULT ) + svc->bonus = MILLISECS( svc->pri - SDP_PRI_DEFAULT ); + else + svc->bonus = 0; + spin_unlock_irq(&sdp_priv.lock); + //no need to preempt since it is running + return; + } + if ( unlikely(__vcpu_on_runq(svc)) ) + { + spin_lock_irq(&sdp_priv.lock); + if( svc->pri > SDP_PRI_DEFAULT ){ + svc->bonus = MILLISECS( svc->pri - SDP_PRI_DEFAULT ); + sdp_priv.resort_before_sched_needed = 1; +// printk( "wake on runq, at %d PCPU, domid:%d vcpuid:%d, resort needed.\n", +// cpu, vc->domain->domain_id, vc->vcpu_id ); + }else + svc->bonus = 0; + spin_unlock_irq(&sdp_priv.lock); + + if( svc->pri > SDP_VCPU(per_cpu(schedule_data, cpu).curr)->pri ) + __runq_tickle(cpu, svc); + //need to consider preemption + return; + } + + if( svc->pri > SDP_PRI_DEFAULT ) + svc->bonus = MILLISECS( svc->pri - SDP_PRI_DEFAULT ); + else + svc->bonus = 0; + + /* Put the VCPU on the runq and tickle CPUs */ + __runq_insert(cpu, svc); + + if( svc->pri > SDP_VCPU(per_cpu(schedule_data, cpu).curr)->pri ) + __runq_tickle(cpu, svc); +} + +static int +sdp_dom_cntl( + struct domain *d, + struct xen_domctl_scheduler_op *op) +{ + struct sdp_dom * const sdom = SDP_DOM(d); + unsigned long flags; + + struct list_head *iter_vcpu, *next_vcpu; + struct sdp_vcpu * svc; + +// printk( "sdp_dom_cntl is called\n" ); + + if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) + { +// printk( "return priority:%d\n", sdom->pri ); + op->u.sdp.pri = sdom->pri; + } + else + { + ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo); + +// printk( "put info, new priority for domain %d is %d\n", d->domain_id, op->u.sdp.pri ); + + spin_lock_irqsave(&sdp_priv.lock, flags); + + //need to browse the active_vcpu queue of sdom to make sure every vcpu + // change their pri accordingly + if ( op->u.sdp.pri != 0 ) + sdom->pri = op->u.sdp.pri; + //change the pri value of vcpus that belong to this domain accordingly + printk( "number of VCPUs of this domain is %d.\n", sdom->active_vcpu_count ); + list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu ){ + svc = list_entry(iter_vcpu, struct sdp_vcpu, active_vcpu_elem); + BUG_ON( sdom != svc->sdom ); + svc->pri = sdom->pri; + printk( "vcpu :%d, new pri:%d\n", svc->vcpu->vcpu_id, svc->pri ); + } + + spin_unlock_irqrestore(&sdp_priv.lock, flags); + } + + return 0; +} + +static int +sdp_cpu_pick(struct vcpu *vc) +{ + cpumask_t cpus; + cpumask_t idlers; + int cpu; + +// printk( "sdp_cpu_pick is called. vc->id:%d,vc->processor:%d\n", +// vc->vcpu_id, vc->processor ); + /* + * Pick from online CPUs in VCPU's affinity mask, giving a + * preference to its current processor if it's in there. + */ + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : __cycle_cpu(vc->processor, &cpus); + ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) ); + + /* + * Try to find an idle processor within the above constraints. + * + * In multi-core and multi-threaded CPUs, not all idle execution + * vehicles are equal! + * + * We give preference to the idle execution vehicle with the most + * idling neighbours in its grouping. This distributes work across + * distinct cores first and guarantees we don't do something stupid + * like run two VCPUs on co-hyperthreads while there are idle cores + * or sockets. + */ + idlers = sdp_priv.idlers; + cpu_set(cpu, idlers); + cpus_and(cpus, cpus, idlers); + cpu_clear(cpu, cpus); + + while ( !cpus_empty(cpus) ) + { + cpumask_t cpu_idlers; + cpumask_t nxt_idlers; + int nxt; + + nxt = __cycle_cpu(cpu, &cpus); + + if ( cpu_isset(cpu, cpu_core_map[nxt]) ) + { + ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) ); + cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]); + cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]); + } + else + { + ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) ); + cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]); + cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]); + } + + if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) ) + { + cpu = nxt; + cpu_clear(cpu, cpus); + } + else + { + cpus_andnot(cpus, cpus, nxt_idlers); + } + } + +// printk( "sdp_cpu_pick returns %d\n", cpu ); + return cpu; +} + +static struct task_slice +sdp_schedule(s_time_t now) +{ + const int cpu = smp_processor_id(); + struct list_head * const runq = RUNQ(cpu); + struct sdp_vcpu * const scurr = SDP_VCPU(current); + struct sdp_vcpu *snext; + struct task_slice ret; + +// static int debug_time =0; + + //reorder the queue before scheduleing? + if ( sdp_priv.resort_before_sched_needed ){ + sdp_runq_resort( cpu ); + sdp_priv.resort_before_sched_needed = 0; + } + + //recalculate the bonus value of this vcpu + if ( scurr->bonus > 0 ){ + scurr->bonus -= (now - scurr->vcpu->runstate.state_entry_time); + if ( scurr->bonus < 0 ) scurr->bonus = 0; + } + + /* + * Select next runnable local VCPU (ie top of local runq) + */ + if ( vcpu_runnable(current) ) + __runq_insert(cpu, scurr); + else + BUG_ON( is_idle_vcpu(current) || list_empty(runq) ); + + snext = __runq_elem(runq->next); + + __runq_remove(snext); + + /* + * Return task to run next... + */ + ret.time = MILLISECS(SDP_MSECS_PER_TSLICE); + ret.task = snext->vcpu; + + return ret; +} + +static void sdp_dump_pcpu(int cpu) +{ + struct list_head *runq, *iter; + struct sdp_pcpu *spc; + struct sdp_vcpu *svc; + int loop; + char cpustr[100]; + + spc = SDP_PCPU(cpu); + runq = &spc->runq; + + cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_sibling_map[cpu]); + printk("sibling=%s, ", cpustr); + cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_core_map[cpu]); + printk("core=%s\n", cpustr); + + /* current VCPU */ + svc = SDP_VCPU(per_cpu(schedule_data, cpu).curr); + if ( svc ) + { + printk("\trun: "); + sdp_dump_vcpu(svc); + }else{ + printk( "no currently running vcpu!\n" ); + } + + loop = 0; + list_for_each( iter, runq ) + { + svc = __runq_elem(iter); + if ( svc ) + { + printk("\t%3d: ", ++loop); + sdp_dump_vcpu(svc); + } + } + +} + +static void sdp_dump(void) +{ +// printk( "sdp_dump is called\n" ); +} + +static void sdp_init(void) +{ + spin_lock_init(&sdp_priv.lock); + INIT_LIST_HEAD(&sdp_priv.active_sdom); + sdp_priv.ncpus = 0; + sdp_priv.master = UINT_MAX; + cpus_clear(sdp_priv.idlers); +// printk( "sdp_init is called\n" ); +} + +/* +static __init int sdp_start_tickers(void) +{ + struct sdp_pcpu *spc; + unsigned int cpu; + + printk( "sdp_start_tickers is called\n" ); + + if ( sdp_priv.ncpus == 0 ) + return 0; + + for_each_online_cpu ( cpu ) + { + spc = SDP_PCPU(cpu); + set_timer(&spc->ticker, NOW() + MILLISECS(SDP_MSECS_PER_TICK)); + } + + printk( "sdp_start_tickers is returned\n" ); + return 0; +} +__initcall(sdp_start_tickers); +*/ + +struct scheduler sched_sdp_def = { + .name = "Scheduler for Client Virtualization", + .opt_name = "sdp", + .sched_id = XEN_SCHEDULER_SDP, + + .init_domain = sdp_dom_init, + .destroy_domain = sdp_dom_destroy, + + .init_vcpu = sdp_vcpu_init, + .destroy_vcpu = sdp_vcpu_destroy, + + .sleep = sdp_vcpu_sleep, + .wake = sdp_vcpu_wake, + + .adjust = sdp_dom_cntl, + + .pick_cpu = sdp_cpu_pick, + .do_schedule = sdp_schedule, + + .dump_cpu_state = sdp_dump_pcpu, + .dump_settings = sdp_dump, + .init = sdp_init, +}; + diff -r e2f36d066b7b xen/common/schedule.c --- a/xen/common/schedule.c Mon Dec 22 13:48:40 2008 +0000 +++ b/xen/common/schedule.c Thu Jan 08 01:32:02 2009 -0500 @@ -51,9 +51,11 @@ extern struct scheduler sched_sedf_def; extern struct scheduler sched_credit_def; +extern struct scheduler sched_sdp_def; static struct scheduler *schedulers[] = { &sched_sedf_def, &sched_credit_def, + &sched_sdp_def, NULL }; diff -r e2f36d066b7b xen/include/public/domctl.h --- a/xen/include/public/domctl.h Mon Dec 22 13:48:40 2008 +0000 +++ b/xen/include/public/domctl.h Thu Jan 08 01:32:02 2009 -0500 @@ -294,6 +294,7 @@ /* Scheduler types. */ #define XEN_SCHEDULER_SEDF 4 #define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_SDP 6 /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 @@ -312,6 +313,9 @@ uint16_t weight; uint16_t cap; } credit; + struct xen_domctl_sched_sdp { + uint16_t pri; + } sdp; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;