From mboxrd@z Thu Jan 1 00:00:00 1970 From: Joe Jin Subject: Re: [PATCH] xend: do not polling vcpus info if guest state is not RUNNING or PAUSED Date: Tue, 19 Nov 2013 18:41:37 +0800 Message-ID: <528B4061.1000305@oracle.com> References: <528B017D.5020202@oracle.com> <528B1B4F.2010102@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <528B1B4F.2010102@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: =?ISO-8859-1?Q?Roger_Pau_Monn=E9?= , Konrad Rzeszutek Wilk , ian.jackson@eu.citrix.com, Ian Campbell , Keir Fraser Cc: xen-devel List-Id: xen-devel@lists.xenproject.org On 11/19/13 16:03, Roger Pau Monn=E9 wrote: > On 19/11/13 07:13, Joe Jin wrote: >> When created new guest on NUMA server, xend tried to get the best node by >> calculated all vcpus info, the race is if other geust is rebooting, the >> guest in the list when entered find_relaxed_node(), but when call >> getVCPUInfo() the guest be terminated, then getVCPUInfo() will fail with >> below error: >> >> [2013-09-04 20:01:26 6254] ERROR (XendDomainInfo:496) VM start failed >> Traceback (most recent call last): >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", = line 482, in start >> XendTask.log_progress(31, 60, self._initDomain) >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendTask.py", line 2= 09, in log_progress >> retval =3D func(*args, **kwds) >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", = line 2918, in _initDomain >> node =3D self._setCPUAffinity() >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", = line 2835, in _setCPUAffinity >> best_node =3D find_relaxed_node(candidate_node_list)[0] >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", = line 2803, in find_relaxed_node >> cpuinfo =3D dom.getVCPUInfo() >> File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", = line 1600, in getVCPUInfo >> raise XendError(str(exn)) >> XendError: (3, 'No such process') >> >> This patch will let find_relaxed_node() only polling the RUNNING or PAUS= ED >> guest vpus info to avoid the race. >> >> Signed-off-by: Joe Jin >> --- >> tools/python/xen/xend/XendDomainInfo.py | 2 ++ >> 1 files changed, 2 insertions(+), 0 deletions(-) >> >> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/= xend/XendDomainInfo.py >> index e9d3e7e..66e4b9f 100644 >> --- a/tools/python/xen/xend/XendDomainInfo.py >> +++ b/tools/python/xen/xend/XendDomainInfo.py >> @@ -2734,6 +2734,8 @@ class XendDomainInfo: >> from xen.xend import XendDomain >> doms =3D XendDomain.instance().list('all') >> for dom in filter (lambda d: d.domid !=3D self.domid, d= oms): >> + if dom._stateGet() not in (DOM_STATE_RUNNING,DOM_ST= ATE_PAUSED): >> + continue > = > Isn't it possible that the domain has rebooted and is no longer there > between this two calls? > = > IMHO it's very unlikely, but there's still a window where getVCPUInfo > could fail. > = Yes your right, this patch just reduce the window. = I created a new patch for this, please comment! [PATCH] xend: getVCPUInfo should handle died domain When created new guest on NUMA server, xend tried to get the best node by calculated all vcpus info, the race is if other geust is rebooting, the guest in the list when entered find_relaxed_node(), but when call getVCPUInfo() the guest already be terminated, then getVCPUInfo() will fail with below error: [2013-09-04 20:01:26 6254] ERROR (XendDomainInfo:496) VM start failed Traceback (most recent call last): File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", lin= e 482, in start XendTask.log_progress(31, 60, self._initDomain) File "/usr/lib64/python2.4/site-packages/xen/xend/XendTask.py", line 209,= in log_progress retval =3D func(*args, **kwds) File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", lin= e 2918, in _initDomain node =3D self._setCPUAffinity() File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", lin= e 2835, in _setCPUAffinity best_node =3D find_relaxed_node(candidate_node_list)[0] File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", lin= e 2803, in find_relaxed_node cpuinfo =3D dom.getVCPUInfo() File "/usr/lib64/python2.4/site-packages/xen/xend/XendDomainInfo.py", lin= e 1600, in getVCPUInfo raise XendError(str(exn)) XendError: (3, 'No such process') This patch will handle the situation. Signed-off-by: Joe Jin --- tools/python/xen/xend/XendDomainInfo.py | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xen= d/XendDomainInfo.py index e9d3e7e..c6414ed 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -34,6 +34,7 @@ import os import stat import shutil import traceback +import errno from types import StringTypes = import xen.lowlevel.xc @@ -1541,6 +1542,9 @@ class XendDomainInfo: return sxpr = except RuntimeError, exn: + # Domain already died. + if exn.args[0] =3D=3D errno.ESRCH: + return sxpr raise XendError(str(exn)) = = -- = 1.7.1