From: "Andres Lagar-Cavilla" <andres@lagarcavilla.org>
To: "Hao, Xudong" <xudong.hao@intel.com>
Cc: Keir Fraser <keir@xen.org>,
"xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
Tim Deegan <tim@xen.org>, "JBeulich@suse.com" <jbeulich@suse.com>,
"Zhang, Xiantao" <xiantao.zhang@intel.com>
Subject: Re: Deadlocks by p2m_lock and event_lock
Date: Wed, 14 Mar 2012 20:37:28 -0700 [thread overview]
Message-ID: <c2174f5635a44fb46d2e1ac949f8cb83.squirrel@webmail.lagarcavilla.org> (raw)
In-Reply-To: <403610A45A2B5242BD291EDAE8B37D300FCEA0AA@SHSMSX102.ccr.corp.intel.com>
> Works by tested.
>
> Thanks,
Thanks to you!
Andres
> -Xudong
>
>
>> -----Original Message-----
>> From: Andres Lagar-Cavilla [mailto:andres@lagarcavilla.org]
>> Sent: Wednesday, March 14, 2012 11:11 PM
>> To: Hao, Xudong
>> Cc: Tim Deegan; Keir Fraser; xen-devel@lists.xensource.com; Zhang,
>> Xiantao;
>> JBeulich@suse.com
>> Subject: RE: [Xen-devel] Deadlocks by p2m_lock and event_lock
>>
>> Can you give this a try? (Tim, Keir, Jan, if Xudong reports success, can
>> you
>> please apply?)
>>
>> Thanks,
>> Andres
>>
>> # HG changeset patch
>> # User Andres Lagar-Cavilla <andres@lagarcavilla.org> # Date 1331737660
>> 14400 # Node ID fe10f0433f6279091c193127d95d4d39b44a72ed
>> # Parent 5d20d2f6ffed0a49f030f04a8870f1926babbcbf
>> x86/mm: Fix deadlock between p2m and event channel locks.
>>
>> The hvm io emulation code holds the p2m lock for the duration of the
>> emulation,
>> which may include sending an event to qemu. On a separate path,
>> map_domain_pirq grabs the event channel and p2m locks in opposite order.
>>
>> Fix this by ensuring liveness of the ram_gfn used by io emulation, with
>> a page
>> ref.
>>
>> Reported-by: "Hao, Xudong" <xudong.hao@intel.com>
>> Signed-off-by: "Hao, Xudong" <xudong.hao@intel.com>
>> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
>>
>> diff -r 5d20d2f6ffed -r fe10f0433f62 xen/arch/x86/hvm/emulate.c
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -77,6 +77,17 @@ static int hvmemul_do_io(
>> return X86EMUL_RETRY;
>> }
>>
>> + /* Maintain a ref on the mfn to ensure liveness. Put the gfn
>> + * to avoid potential deadlock wrt event channel lock, later. */
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + if ( !get_page(mfn_to_page(mfn_x(ram_mfn)),
>> + curr->domain) )
>> + {
>> + put_gfn(curr->domain, ram_gfn);
>> + return X86EMUL_RETRY;
>> + }
>> + put_gfn(curr->domain, ram_gfn);
>> +
>> /*
>> * Weird-sized accesses have undefined behaviour: we discard writes
>> * and read all-ones.
>> @@ -87,7 +98,8 @@ static int hvmemul_do_io(
>> ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
>> if ( dir == IOREQ_READ )
>> memset(p_data, ~0, size);
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_UNHANDLEABLE;
>> }
>>
>> @@ -108,7 +120,8 @@ static int hvmemul_do_io(
>> unsigned int bytes = vio->mmio_large_write_bytes;
>> if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
>> {
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_OKAY;
>> }
>> }
>> @@ -120,7 +133,8 @@ static int hvmemul_do_io(
>> {
>> memcpy(p_data, &vio->mmio_large_read[addr - pa],
>> size);
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_OKAY;
>> }
>> }
>> @@ -134,7 +148,8 @@ static int hvmemul_do_io(
>> vio->io_state = HVMIO_none;
>> if ( p_data == NULL )
>> {
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_UNHANDLEABLE;
>> }
>> goto finish_access;
>> @@ -144,11 +159,13 @@ static int hvmemul_do_io(
>> (addr == (vio->mmio_large_write_pa +
>> vio->mmio_large_write_bytes)) )
>> {
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_RETRY;
>> }
>> default:
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_UNHANDLEABLE;
>> }
>>
>> @@ -156,7 +173,8 @@ static int hvmemul_do_io(
>> {
>> gdprintk(XENLOG_WARNING, "WARNING: io already pending
>> (%d)?\n",
>> p->state);
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_UNHANDLEABLE;
>> }
>>
>> @@ -208,7 +226,8 @@ static int hvmemul_do_io(
>>
>> if ( rc != X86EMUL_OKAY )
>> {
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return rc;
>> }
>>
>> @@ -244,7 +263,8 @@ static int hvmemul_do_io(
>> }
>> }
>>
>> - put_gfn(curr->domain, ram_gfn);
>> + if ( mfn_valid(mfn_x(ram_mfn)) )
>> + put_page(mfn_to_page(mfn_x(ram_mfn)));
>> return X86EMUL_OKAY;
>> }
>>
>>
>>
>>
>> > I prefer to the 2nd, I made a patch and testing show it works.
>> >
>> > diff -r 5d20d2f6ffed xen/arch/x86/hvm/emulate.c
>> > --- a/xen/arch/x86/hvm/emulate.c Fri Mar 09 16:54:24 2012 +0000
>> > +++ b/xen/arch/x86/hvm/emulate.c Wed Mar 14 15:11:52 2012 -0400
>> > @@ -60,20 +60,23 @@
>> > ioreq_t *p = get_ioreq(curr);
>> > unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
>> > p2m_type_t p2mt;
>> > - mfn_t ram_mfn;
>> > + unsigned long ram_mfn;
>> > int rc;
>> >
>> > /* Check for paged out page */
>> > - ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
>> > + ram_mfn = mfn_x(get_gfn_unshare(curr->domain, ram_gfn, &p2mt));
>> > + if ( mfn_valid(ram_mfn) )
>> > + get_page(mfn_to_page(ram_mfn), curr->domain);
>> > + put_gfn(curr->domain, ram_gfn);
>> > if ( p2m_is_paging(p2mt) )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > p2m_mem_paging_populate(curr->domain, ram_gfn);
>> > return X86EMUL_RETRY;
>> > }
>> > if ( p2m_is_shared(p2mt) )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_RETRY;
>> > }
>> >
>> > @@ -87,7 +90,7 @@
>> > ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
>> > if ( dir == IOREQ_READ )
>> > memset(p_data, ~0, size);
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_UNHANDLEABLE;
>> > }
>> >
>> > @@ -108,7 +111,7 @@
>> > unsigned int bytes = vio->mmio_large_write_bytes;
>> > if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_OKAY;
>> > }
>> > }
>> > @@ -120,7 +123,7 @@
>> > {
>> > memcpy(p_data, &vio->mmio_large_read[addr - pa],
>> > size);
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_OKAY;
>> > }
>> > }
>> > @@ -134,7 +137,7 @@
>> > vio->io_state = HVMIO_none;
>> > if ( p_data == NULL )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_UNHANDLEABLE;
>> > }
>> > goto finish_access;
>> > @@ -144,11 +147,11 @@
>> > (addr == (vio->mmio_large_write_pa +
>> > vio->mmio_large_write_bytes)) )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_RETRY;
>> > }
>> > default:
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_UNHANDLEABLE;
>> > }
>> >
>> > @@ -156,7 +159,7 @@
>> > {
>> > gdprintk(XENLOG_WARNING, "WARNING: io already pending
>> (%d)?\n",
>> > p->state);
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_UNHANDLEABLE;
>> > }
>> >
>> > @@ -208,7 +211,7 @@
>> >
>> > if ( rc != X86EMUL_OKAY )
>> > {
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return rc;
>> > }
>> >
>> > @@ -244,7 +247,7 @@
>> > }
>> > }
>> >
>> > - put_gfn(curr->domain, ram_gfn);
>> > + put_page(mfn_to_page(ram_mfn));
>> > return X86EMUL_OKAY;
>> > }
>> >
>> >
>> > Thanks,
>> > -Xudong
>> >
>> >
>> >> -----Original Message-----
>> >> From: Andres Lagar-Cavilla [mailto:andres@lagarcavilla.org]
>> >> Sent: Wednesday, March 14, 2012 2:46 AM
>> >> To: Hao, Xudong
>> >> Cc: Tim Deegan; Keir Fraser; xen-devel@lists.xensource.com; Zhang,
>> >> Xiantao; JBeulich@suse.com
>> >> Subject: RE: [Xen-devel] Deadlocks by p2m_lock and event_lock
>> >>
>> >> > Hi, Tim and Andres
>> >> > The patch fix part of this issue. In handle_mmio, function
>> >> > hvmemul_do_io() is called and p2m lock was held again by calling
>> >> > get_gfn_unshare(), still trigger a deadlocks.
>> >>
>> >> Typically hvmemul_do_io gets the zero gfn, because in many cases
>> >> that's the 'rma_gpa' it is passed. However, in the case of mmio, and
>> >> particularly stdvga, ram_gpa is the data to be copied to the
>> >> framebuffer. So it is in principle ok to get_gfn in hvmemul_do_io.
>> >>
>> >> There are two solutions
>> >> 1. msix_capability_init does not call p2m_change_entry_type_global.
>> >> See my previous email. If we want to resync the
>> >> EPT/NPT/traditional/VTD/IOMMU/superduper TLBs, we can just do that
>> >> explicitly. I hope.
>> >>
>> >> 2. hvmemul_do_io does gets a ref on the mfn underlying ram_gpa, and
>> >> holds that for the critical section, instead of the p2m lock. One way
>> >> to achieve this is
>> >>
>> >> /* Check for paged out page */
>> >> ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
>> >> if ( this or that )
>> >> { ... handle ... }
>> >> if ( mfn_valid(ram_mfn) )
>> >> get_page(mfn_to_page(ram_mfn, curr->domain));
>> >> put_gfn(curr->domain, ram_gfn)
>> >>
>> >> /* replace all put_gfn in all exit paths by put_page */
>> >>
>> >> This will ensure the target page is live and sane while not holding
>> >> the p2m lock.
>> >> Xudong, did that make sense? Do you think you could try that and
>> >> report back?
>> >>
>> >> Thanks!
>> >> Andres
>> >>
>> >> >
>> >> > (XEN) Xen call trace:
>> >> > (XEN) [<ffff82c4801261a3>] _spin_lock+0x1b/0xa8
>> >> > (XEN) [<ffff82c4801070d3>]
>> notify_via_xen_event_channel+0x21/0x106
>> >> > (XEN) [<ffff82c4801b6883>] hvm_buffered_io_send+0x1f1/0x21b
>> >> > (XEN) [<ffff82c4801bbd3a>] stdvga_intercept_mmio+0x491/0x4c7
>> >> > (XEN) [<ffff82c4801b5d58>] hvm_io_intercept+0x218/0x244
>> >> > (XEN) [<ffff82c4801aa931>] hvmemul_do_io+0x55a/0x716
>> >> > (XEN) [<ffff82c4801aab1a>] hvmemul_do_mmio+0x2d/0x2f
>> >> > (XEN) [<ffff82c4801ab239>] hvmemul_write+0x181/0x1a2
>> >> > (XEN) [<ffff82c4801963f0>] x86_emulate+0xcad3/0xfbdf
>> >> > (XEN) [<ffff82c4801a9d2e>] hvm_emulate_one+0x120/0x1af
>> >> > (XEN) [<ffff82c4801b63cb>] handle_mmio+0x4e/0x1d1
>> >> > (XEN) [<ffff82c4801afd72>]
>> hvm_hap_nested_page_fault+0x210/0x37f
>> >> > (XEN) [<ffff82c4801d2419>] vmx_vmexit_handler+0x1523/0x17d0
>> >> >
>> >> > Thanks,
>> >> > -Xudong
>> >> >
>> >> >> -----Original Message-----
>> >> >> From: Tim Deegan [mailto:tim@xen.org]
>> >> >> Sent: Saturday, March 10, 2012 12:56 AM
>> >> >> To: Andres Lagar-Cavilla
>> >> >> Cc: Hao, Xudong; Keir Fraser; xen-devel@lists.xensource.com;
>> >> >> Zhang, Xiantao; JBeulich@suse.com
>> >> >> Subject: Re: [Xen-devel] Deadlocks by p2m_lock and event_lock
>> >> >>
>> >> >> At 08:29 -0800 on 09 Mar (1331281767), Andres Lagar-Cavilla wrote:
>> >> >> > >> I don't know about the event lock, but it seems unwise to
>> >> >> > >> call in to handle_mmio with a gfn lock held. How about
>> >> >> > >> fixing the other
>> >> >> path?
>> >> >> > >>
>> >> >> > >> diff -r 04673ecb9d78 xen/arch/x86/hvm/hvm.c
>> >> >> > >> --- a/xen/arch/x86/hvm/hvm.c Thu Mar 08 16:40:05 2012 +0000
>> >> >> > >> +++ b/xen/arch/x86/hvm/hvm.c Fri Mar 09 11:15:25 2012 +0000
>> >> >> > >> @@ -1324,10 +1324,11 @@ int
>> >> hvm_hap_nested_page_fault(unsigned l
>> >> >> > >> if ( (p2mt == p2m_mmio_dm) ||
>> >> >> > >> (access_w && (p2mt == p2m_ram_ro)) )
>> >> >> > >> {
>> >> >> > >> + put_gfn(p2m->domain, gfn);
>> >> >> > >> if ( !handle_mmio() )
>> >> >> > >> hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> >> >> > >> rc = 1;
>> >> >> > >> - goto out_put_gfn;
>> >> >> > >> + goto out;
>> >> >> > >> }
>> >> >> > >>
>> >> >> > >> #ifdef __x86_64__
>> >> >> > >> @@ -1379,6 +1380,7 @@ int
>> hvm_hap_nested_page_fault(unsigned
>> >> >> > >> l
>> >> >> > >>
>> >> >> > >> out_put_gfn:
>> >> >> > >> put_gfn(p2m->domain, gfn);
>> >> >> > >> +out:
>> >> >> > >> if ( paged )
>> >> >> > >> p2m_mem_paging_populate(v->domain, gfn);
>> >> >> > >> if ( req_ptr )
>> >> >> > >
>> >> >> > > Yes, that's fine to release the p2m lock earlier than
>> >> handle_mmio.
>> >> >> >
>> >> >> > Ack
>> >> >>
>> >> >> OK, applied.
>> >> >>
>> >> >> Tim.
>> >> >
>> >>
>> >
>> >
>>
>
>
next prev parent reply other threads:[~2012-03-15 3:37 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-09 10:58 Deadlocks by p2m_lock and event_lock Hao, Xudong
2012-03-09 11:20 ` Tim Deegan
2012-03-09 11:44 ` Hao, Xudong
2012-03-09 16:29 ` Andres Lagar-Cavilla
2012-03-09 16:55 ` Tim Deegan
2012-03-13 7:51 ` Hao, Xudong
2012-03-13 15:27 ` Andres Lagar-Cavilla
2012-03-13 18:26 ` Andres Lagar-Cavilla
2012-03-14 9:20 ` Jan Beulich
2012-03-14 14:18 ` Andres Lagar-Cavilla
2012-03-13 18:45 ` Andres Lagar-Cavilla
2012-03-14 7:12 ` Hao, Xudong
2012-03-14 8:28 ` Zhang, Yang Z
2012-03-14 14:20 ` Andres Lagar-Cavilla
2012-03-14 15:10 ` Andres Lagar-Cavilla
2012-03-15 2:19 ` Hao, Xudong
2012-03-15 3:37 ` Andres Lagar-Cavilla [this message]
2012-03-15 10:44 ` Tim Deegan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c2174f5635a44fb46d2e1ac949f8cb83.squirrel@webmail.lagarcavilla.org \
--to=andres@lagarcavilla.org \
--cc=jbeulich@suse.com \
--cc=keir@xen.org \
--cc=tim@xen.org \
--cc=xen-devel@lists.xensource.com \
--cc=xiantao.zhang@intel.com \
--cc=xudong.hao@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).