All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Fix xend xenstore handling
@ 2007-12-21 14:57 John Levon
  2007-12-21 15:08 ` Daniel P. Berrange
  0 siblings, 1 reply; 7+ messages in thread
From: John Levon @ 2007-12-21 14:57 UTC (permalink / raw)
  To: xen-devel


This is the cause of the "reboot loop" xend failures I reported earlier.
Note that I've only tested this patch against 3.1, however the code,
and the fix, is the same in unstable. I realise it's late in the cycle
but this bug is bad enough to need fixing IMHO.

cheers,
john

PS you might get duplicate mails later, Sun's mail server was playing up

# HG changeset patch
# User john.levon@sun.com
# Date 1198248324 28800
# Node ID b29e155a85ef663170b0651798c84db79bafcdd8
# Parent  d7974e8d8f5131356e7c8fc87f880d737044e91b
Fix xend xenstore handling.

xend can get into a situation where two processes are attempting to
interact with the xenstore socket, with disastrous results. Fix the two
bad users of xstransact, add a big warning, and fix the destructor so
future mistakes will be detected earlier.

Signed-off-by: John Levon <john.levon@sun.com>

diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1533,10 +1533,9 @@ class XendDomainInfo:
                 except:
                     # Log and swallow any exceptions in removal --
                     # there's nothing more we can do.
-                        log.exception("Device release failed: %s; %s; %s",
-                                      self.info['name_label'], devclass, dev)
-
-            
+                    log.exception("Device release failed: %s; %s; %s",
+                                  self.info['name_label'], devclass, dev)
+        t.abort()
 
     def getDeviceController(self, name):
         """Get the device controller for this domain, and if it
@@ -1848,7 +1847,6 @@ class XendDomainInfo:
         # build list of phantom devices to be removed after normal devices
         plist = []
         if self.domid is not None:
-            from xen.xend.xenstore.xstransact import xstransact
             t = xstransact("%s/device/vbd" % GetDomainPath(self.domid))
             for dev in t.list():
                 backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \
@@ -1858,6 +1856,7 @@ class XendDomainInfo:
                                       % backend_phantom_vbd)
                     plist.append(backend_phantom_vbd)
                     plist.append(frontend_phantom_vbd)
+            t.abort()
         return plist
 
     def _cleanup_phantom_devs(self, plist):
diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py
+++ b/tools/python/xen/xend/server/pciif.py
@@ -22,8 +22,6 @@ from xen.xend import sxp
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server.DevController import DevController
 
diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py
+++ b/tools/python/xen/xend/xenstore/xstransact.py
@@ -7,8 +7,16 @@
 
 from xen.xend.xenstore.xsutil import xshandle
 
+class xstransact:
+    """WARNING: Be very careful if you're instantiating an xstransact object
+       yourself (i.e. not using the capitalized static helpers like .Read().
+       It is essential that you clean up the object in place via
+       t.commit/abort(): GC can happen at any time, including contexts where
+       it's not safe to to use the shared xenstore socket fd. In particular,
+       if xend forks, and GC occurs, we can have two processes trying to
+       use the same xenstore fd, and all hell breaks loose.
+       """
 
-class xstransact:
 
     def __init__(self, path = ""):
         
@@ -22,8 +30,9 @@ class xstransact:
         self.in_transaction = True
 
     def __del__(self):
+        # see above.
         if self.in_transaction:
-            xshandle().transaction_end(self.transaction, True)
+            raise RuntimeError("ERROR: GC of live transaction")
 
     def commit(self):
         if not self.in_transaction:

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [PATCH] Fix xend xenstore handling
@ 2007-12-21 14:45 john.levon
  0 siblings, 0 replies; 7+ messages in thread
From: john.levon @ 2007-12-21 14:45 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User john.levon@sun.com
# Date 1198248324 28800
# Node ID b29e155a85ef663170b0651798c84db79bafcdd8
# Parent  d7974e8d8f5131356e7c8fc87f880d737044e91b
Fix xend xenstore handling.

xend can get into a situation where two processes are attempting to
interact with the xenstore socket, with disastrous results. Fix the two
bad users of xstransact, add a big warning, and fix the destructor so
future mistakes will be detected earlier.

Signed-off-by: John Levon <john.levon@sun.com>

diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1533,10 +1533,9 @@ class XendDomainInfo:
                 except:
                     # Log and swallow any exceptions in removal --
                     # there's nothing more we can do.
-                        log.exception("Device release failed: %s; %s; %s",
-                                      self.info['name_label'], devclass, dev)
-
-            
+                    log.exception("Device release failed: %s; %s; %s",
+                                  self.info['name_label'], devclass, dev)
+        t.abort()
 
     def getDeviceController(self, name):
         """Get the device controller for this domain, and if it
@@ -1848,7 +1847,6 @@ class XendDomainInfo:
         # build list of phantom devices to be removed after normal devices
         plist = []
         if self.domid is not None:
-            from xen.xend.xenstore.xstransact import xstransact
             t = xstransact("%s/device/vbd" % GetDomainPath(self.domid))
             for dev in t.list():
                 backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \
@@ -1858,6 +1856,7 @@ class XendDomainInfo:
                                       % backend_phantom_vbd)
                     plist.append(backend_phantom_vbd)
                     plist.append(frontend_phantom_vbd)
+            t.abort()
         return plist
 
     def _cleanup_phantom_devs(self, plist):
diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py
+++ b/tools/python/xen/xend/server/pciif.py
@@ -22,8 +22,6 @@ from xen.xend import sxp
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server.DevController import DevController
 
diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py
+++ b/tools/python/xen/xend/xenstore/xstransact.py
@@ -7,8 +7,16 @@
 
 from xen.xend.xenstore.xsutil import xshandle
 
+class xstransact:
+    """WARNING: Be very careful if you're instantiating an xstransact object
+       yourself (i.e. not using the capitalized static helpers like .Read().
+       It is essential that you clean up the object in place via
+       t.commit/abort(): GC can happen at any time, including contexts where
+       it's not safe to to use the shared xenstore socket fd. In particular,
+       if xend forks, and GC occurs, we can have two processes trying to
+       use the same xenstore fd, and all hell breaks loose.
+       """
 
-class xstransact:
 
     def __init__(self, path = ""):
         
@@ -22,8 +30,9 @@ class xstransact:
         self.in_transaction = True
 
     def __del__(self):
+        # see above.
         if self.in_transaction:
-            xshandle().transaction_end(self.transaction, True)
+            raise RuntimeError("ERROR: GC of live transaction")
 
     def commit(self):
         if not self.in_transaction:

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [PATCH] Fix xend xenstore handling
@ 2007-12-21  3:42 john.levon
  2007-12-22  0:53 ` John Levon
  0 siblings, 1 reply; 7+ messages in thread
From: john.levon @ 2007-12-21  3:42 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User john.levon@sun.com
# Date 1198195706 28800
# Node ID e57a1b9ef7ebb5fbd3d99af512d202eacd51662c
# Parent  a87624a82a68cbee7487aea465431ac1415637c5
Fix xend xenstore handling.

xend can get into a situation where two processes are attempting to
interact with the xenstore socket, with disastrous results. Fix the two
bad users of xstransact, add a big warning, and fix the destructor so
future mistakes will be detected earlier.

Signed-off-by: John Levon <john.levon@sun.com>

diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -1537,10 +1537,9 @@ class XendDomainInfo:
                 except:
                     # Log and swallow any exceptions in removal --
                     # there's nothing more we can do.
-                        log.exception("Device release failed: %s; %s; %s",
-                                      self.info['name_label'], devclass, dev)
-
-            
+                    log.exception("Device release failed: %s; %s; %s",
+                                  self.info['name_label'], devclass, dev)
+        t.abort()
 
     def getDeviceController(self, name):
         """Get the device controller for this domain, and if it
@@ -1852,7 +1851,6 @@ class XendDomainInfo:
         # build list of phantom devices to be removed after normal devices
         plist = []
         if self.domid is not None:
-            from xen.xend.xenstore.xstransact import xstransact
             t = xstransact("%s/device/vbd" % GetDomainPath(self.domid))
             for dev in t.list():
                 backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \
@@ -1862,6 +1860,7 @@ class XendDomainInfo:
                                       % backend_phantom_vbd)
                     plist.append(backend_phantom_vbd)
                     plist.append(frontend_phantom_vbd)
+            t.abort()
         return plist
 
     def _cleanup_phantom_devs(self, plist):
diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py
+++ b/tools/python/xen/xend/server/pciif.py
@@ -22,8 +22,6 @@ from xen.xend import sxp
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server.DevController import DevController
 
diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py
+++ b/tools/python/xen/xend/xenstore/xstransact.py
@@ -7,8 +7,16 @@
 
 from xen.xend.xenstore.xsutil import xshandle
 
+class xstransact:
+    """WARNING: Be very careful if you're instantiating an xstransact object
+       yourself (i.e. not using the capitalized static helpers like .Read().
+       It is essential that you clean up the object in place via
+       t.commit/abort(): GC can happen at any time, including contexts where
+       it's not safe to to use the shared xenstore socket fd. In particular,
+       if xend forks, and GC occurs, we can have two processes trying to
+       use the same xenstore fd, and all hell breaks loose.
+       """
 
-class xstransact:
 
     def __init__(self, path = ""):
         
@@ -22,8 +30,9 @@ class xstransact:
         self.in_transaction = True
 
     def __del__(self):
+        # see above.
         if self.in_transaction:
-            xshandle().transaction_end(self.transaction, True)
+            raise RuntimeError("ERROR: GC of live transaction")
 
     def commit(self):
         if not self.in_transaction:

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-12-22  0:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-21 14:57 [PATCH] Fix xend xenstore handling John Levon
2007-12-21 15:08 ` Daniel P. Berrange
2007-12-21 15:27   ` John Levon
2007-12-21 15:31     ` Daniel P. Berrange
  -- strict thread matches above, loose matches on Subject: below --
2007-12-21 14:45 john.levon
2007-12-21  3:42 john.levon
2007-12-22  0:53 ` John Levon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.