qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* qemu ppc64 crash when adding CPU
@ 2023-12-04 18:36 Michal Suchánek
  2023-12-04 18:57 ` Stefan Hajnoczi
  0 siblings, 1 reply; 3+ messages in thread
From: Michal Suchánek @ 2023-12-04 18:36 UTC (permalink / raw)
  To: qemu-devel

Hello,

When running a VM with libvirt I get:

/usr/bin/qemu-system-ppc64 --version
QEMU emulator version 8.1.3 (Virtualization / 15.5)
Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers

/usr/bin/qemu-system-ppc64 -name
guest=sles12sp5-ppc64le,debug-threads=on -S -object
{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-11-sles12sp5-ppc64le/master-key.aes"}
-machine
pseries-7.1,usb=off,dump-guest-core=off,memory-backend=ppc_spapr.ram
-accel tcg -cpu POWER9 -m 4096 -object
{"qom-type":"memory-backend-ram","id":"ppc_spapr.ram","size":4294967296}
-overcommit mem-lock=off -smp 16,sockets=1,dies=1,cores=2,threads=8
-uuid a6ad6a7d-125b-4525-b452-241ce2000eda -display none -no-user-config
-nodefaults -chardev socket,id=charmonitor,fd=29,server=on,wait=off -mon
chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown
-boot strict=on -device
{"driver":"qemu-xhci","p2":15,"p3":15,"id":"usb","bus":"pci.0","addr":"0x3"}
-device
{"driver":"virtio-scsi-pci","id":"scsi0","bus":"pci.0","addr":"0x4"}
-device
{"driver":"virtio-serial-pci","id":"virtio-serial0","bus":"pci.0","addr":"0x2"}
-blockdev
{"driver":"file","filename":"/home/hramrach/Downloads/SLE-12-SP5-Server-MINI-ISO-ppc64le-GM-DVD.iso","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}
-blockdev
{"node-name":"libvirt-2-format","read-only":true,"driver":"raw","file":"libvirt-2-storage"}
-device
{"driver":"scsi-cd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":0,"device_id":"drive-scsi0-0-0-0","drive":"libvirt-2-format","id":"scsi0-0-0-0","bootindex":2}
-blockdev
{"driver":"file","filename":"/var/lib/libvirt/images/sles12sp5-ppc64le.qcow2","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}
-blockdev
{"node-name":"libvirt-1-format","read-only":false,"discard":"unmap","driver":"qcow2","file":"libvirt-1-storage","backing":null}
-device
{"driver":"scsi-hd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":1,"device_id":"drive-scsi0-0-0-1","drive":"libvirt-1-format","id":"scsi0-0-0-1","bootindex":1}
-netdev {"type":"tap","fd":"30","id":"hostnet0"} -device
{"driver":"e1000","netdev":"hostnet0","id":"net0","mac":"52:54:00:3b:d5:a5","bus":"pci.0","addr":"0x1"}
-chardev pty,id=charserial0 -device
{"driver":"spapr-vty","chardev":"charserial0","id":"serial0","reg":805306368}
-audiodev {"id":"audio1","driver":"none"} -device
{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x5"}
-object
{"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}
-device
{"driver":"virtio-rng-pci","rng":"objrng0","id":"rng0","bus":"pci.0","addr":"0x6"}
-sandbox
on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny
-msg timestamp=on

virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | cat
{
  "return": [
    {
      "props": {
        "core-id": 8,
        "node-id": 0
      },
      "vcpus-count": 8,
      "qom-path": "/machine/unattached/device[2]",
      "type": "power9_v2.2-spapr-cpu-core"
    },
    {
      "props": {
        "core-id": 0,
        "node-id": 0
      },
      "vcpus-count": 8,
      "qom-path": "/machine/unattached/device[1]",
      "type": "power9_v2.2-spapr-cpu-core"
    }
  ],
  "id": "libvirt-155"
}

virsh qemu-monitor-command sles12sp5-ppc64le device_del '"id":"/machine/unattached/device[2]"' | jq . 
{
  "return": {},
  "id": "libvirt-218"
}

virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | cat
{
  "return": [
    {
      "props": {
        "core-id": 8,
        "node-id": 0
      },
      "vcpus-count": 8,
      "type": "power9_v2.2-spapr-cpu-core"
    },
    {
      "props": {
        "core-id": 0,
        "node-id": 0
      },
      "vcpus-count": 8,
      "qom-path": "/machine/unattached/device[1]",
      "type": "power9_v2.2-spapr-cpu-core"
    }
  ],
  "id": "libvirt-235"
}

virsh qemu-monitor-command sles12sp5-ppc64le device_add '"id":"cpu-666"' '"driver":"power9_v2.2-spapr-cpu-core"' '"core-id":8' '"node-id":0'  | jq .

__GI_raise (sig=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51	}
(gdb) up
#1  0x00007f7839c553e5 in __GI_abort () at abort.c:79
79	      raise (SIGABRT);
(gdb) up
#2  0x00007f783c54a125 in g_assertion_message (domain=domain@entry=0x0, file=file@entry=0x556b3baf9242 "../tcg/tcg.c", line=line@entry=784, func=func@entry=0x556b3bb55720 <__func__.55816> "tcg_register_thread", 
    message=message@entry=0x7f76a46e8f40 "assertion failed: (n < tcg_max_ctxs)") at ../glib/gtestutils.c:3223
3223	    g_abort ();

This ends the usable part of stacktrace, going upp the call stack gdb
locks up.

Looking at tcg.c line 784 is here:

ster_thread(void)
{
    TCGContext *s = g_malloc(sizeof(*s));
    unsigned int i, n;

    *s = tcg_init_ctx;

    /* Relink mem_base.  */
    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
        if (tcg_init_ctx.temps[i].mem_base) {
            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
            tcg_debug_assert(b >= 0 && b < n);
            s->temps[i].mem_base = &s->temps[b];
        }
    }

    /* Claim an entry in tcg_ctxs */
    n = qatomic_fetch_inc(&tcg_cur_ctxs);
>>> g_assert(n < tcg_max_ctxs); <<<
    qatomic_set(&tcg_ctxs[n], s);

    if (n > 0) {
        alloc_tcg_plugin_context(s);
        tcg_region_initial_alloc(s);
    }

    tcg_ctx = s;
}

Any idea why qemu would be crashing here?

Thanks

Michal


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: qemu ppc64 crash when adding CPU
  2023-12-04 18:36 qemu ppc64 crash when adding CPU Michal Suchánek
@ 2023-12-04 18:57 ` Stefan Hajnoczi
  2023-12-04 19:42   ` Philippe Mathieu-Daudé
  0 siblings, 1 reply; 3+ messages in thread
From: Stefan Hajnoczi @ 2023-12-04 18:57 UTC (permalink / raw)
  To: Michal Suchánek; +Cc: qemu-devel, Richard Henderson

On Mon, 4 Dec 2023 at 13:37, Michal Suchánek <msuchanek@suse.de> wrote:
>
> Hello,
>
> When running a VM with libvirt I get:
>
> /usr/bin/qemu-system-ppc64 --version
> QEMU emulator version 8.1.3 (Virtualization / 15.5)
> Copyright (c) 2003-2023 Fabrice Bellard and the QEMU Project developers
>
> /usr/bin/qemu-system-ppc64 -name
> guest=sles12sp5-ppc64le,debug-threads=on -S -object
> {"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-11-sles12sp5-ppc64le/master-key.aes"}
> -machine
> pseries-7.1,usb=off,dump-guest-core=off,memory-backend=ppc_spapr.ram
> -accel tcg -cpu POWER9 -m 4096 -object
> {"qom-type":"memory-backend-ram","id":"ppc_spapr.ram","size":4294967296}
> -overcommit mem-lock=off -smp 16,sockets=1,dies=1,cores=2,threads=8
> -uuid a6ad6a7d-125b-4525-b452-241ce2000eda -display none -no-user-config
> -nodefaults -chardev socket,id=charmonitor,fd=29,server=on,wait=off -mon
> chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown
> -boot strict=on -device
> {"driver":"qemu-xhci","p2":15,"p3":15,"id":"usb","bus":"pci.0","addr":"0x3"}
> -device
> {"driver":"virtio-scsi-pci","id":"scsi0","bus":"pci.0","addr":"0x4"}
> -device
> {"driver":"virtio-serial-pci","id":"virtio-serial0","bus":"pci.0","addr":"0x2"}
> -blockdev
> {"driver":"file","filename":"/home/hramrach/Downloads/SLE-12-SP5-Server-MINI-ISO-ppc64le-GM-DVD.iso","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}
> -blockdev
> {"node-name":"libvirt-2-format","read-only":true,"driver":"raw","file":"libvirt-2-storage"}
> -device
> {"driver":"scsi-cd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":0,"device_id":"drive-scsi0-0-0-0","drive":"libvirt-2-format","id":"scsi0-0-0-0","bootindex":2}
> -blockdev
> {"driver":"file","filename":"/var/lib/libvirt/images/sles12sp5-ppc64le.qcow2","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}
> -blockdev
> {"node-name":"libvirt-1-format","read-only":false,"discard":"unmap","driver":"qcow2","file":"libvirt-1-storage","backing":null}
> -device
> {"driver":"scsi-hd","bus":"scsi0.0","channel":0,"scsi-id":0,"lun":1,"device_id":"drive-scsi0-0-0-1","drive":"libvirt-1-format","id":"scsi0-0-0-1","bootindex":1}
> -netdev {"type":"tap","fd":"30","id":"hostnet0"} -device
> {"driver":"e1000","netdev":"hostnet0","id":"net0","mac":"52:54:00:3b:d5:a5","bus":"pci.0","addr":"0x1"}
> -chardev pty,id=charserial0 -device
> {"driver":"spapr-vty","chardev":"charserial0","id":"serial0","reg":805306368}
> -audiodev {"id":"audio1","driver":"none"} -device
> {"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x5"}
> -object
> {"qom-type":"rng-random","id":"objrng0","filename":"/dev/urandom"}
> -device
> {"driver":"virtio-rng-pci","rng":"objrng0","id":"rng0","bus":"pci.0","addr":"0x6"}
> -sandbox
> on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny
> -msg timestamp=on
>
> virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | cat
> {
>   "return": [
>     {
>       "props": {
>         "core-id": 8,
>         "node-id": 0
>       },
>       "vcpus-count": 8,
>       "qom-path": "/machine/unattached/device[2]",
>       "type": "power9_v2.2-spapr-cpu-core"
>     },
>     {
>       "props": {
>         "core-id": 0,
>         "node-id": 0
>       },
>       "vcpus-count": 8,
>       "qom-path": "/machine/unattached/device[1]",
>       "type": "power9_v2.2-spapr-cpu-core"
>     }
>   ],
>   "id": "libvirt-155"
> }
>
> virsh qemu-monitor-command sles12sp5-ppc64le device_del '"id":"/machine/unattached/device[2]"' | jq .
> {
>   "return": {},
>   "id": "libvirt-218"
> }
>
> virsh qemu-monitor-command sles12sp5-ppc64le query-hotpluggable-cpus | jq . | cat
> {
>   "return": [
>     {
>       "props": {
>         "core-id": 8,
>         "node-id": 0
>       },
>       "vcpus-count": 8,
>       "type": "power9_v2.2-spapr-cpu-core"
>     },
>     {
>       "props": {
>         "core-id": 0,
>         "node-id": 0
>       },
>       "vcpus-count": 8,
>       "qom-path": "/machine/unattached/device[1]",
>       "type": "power9_v2.2-spapr-cpu-core"
>     }
>   ],
>   "id": "libvirt-235"
> }
>
> virsh qemu-monitor-command sles12sp5-ppc64le device_add '"id":"cpu-666"' '"driver":"power9_v2.2-spapr-cpu-core"' '"core-id":8' '"node-id":0'  | jq .
>
> __GI_raise (sig=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> 51      }
> (gdb) up
> #1  0x00007f7839c553e5 in __GI_abort () at abort.c:79
> 79            raise (SIGABRT);
> (gdb) up
> #2  0x00007f783c54a125 in g_assertion_message (domain=domain@entry=0x0, file=file@entry=0x556b3baf9242 "../tcg/tcg.c", line=line@entry=784, func=func@entry=0x556b3bb55720 <__func__.55816> "tcg_register_thread",
>     message=message@entry=0x7f76a46e8f40 "assertion failed: (n < tcg_max_ctxs)") at ../glib/gtestutils.c:3223
> 3223        g_abort ();
>
> This ends the usable part of stacktrace, going upp the call stack gdb
> locks up.
>
> Looking at tcg.c line 784 is here:
>
> ster_thread(void)
> {
>     TCGContext *s = g_malloc(sizeof(*s));
>     unsigned int i, n;
>
>     *s = tcg_init_ctx;
>
>     /* Relink mem_base.  */
>     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
>         if (tcg_init_ctx.temps[i].mem_base) {
>             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
>             tcg_debug_assert(b >= 0 && b < n);
>             s->temps[i].mem_base = &s->temps[b];
>         }
>     }
>
>     /* Claim an entry in tcg_ctxs */
>     n = qatomic_fetch_inc(&tcg_cur_ctxs);
> >>> g_assert(n < tcg_max_ctxs); <<<
>     qatomic_set(&tcg_ctxs[n], s);
>
>     if (n > 0) {
>         alloc_tcg_plugin_context(s);
>         tcg_region_initial_alloc(s);
>     }
>
>     tcg_ctx = s;
> }
>
> Any idea why qemu would be crashing here?

Hi Michal,

$ git grep tcg_cur_ctxs
tcg/region.c:409:    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
tcg/region.c:889:    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
tcg/tcg-internal.h:34:extern unsigned int tcg_cur_ctxs;
tcg/tcg.c:241:unsigned int tcg_cur_ctxs;
tcg/tcg.c:806:    n = qatomic_fetch_inc(&tcg_cur_ctxs);
tcg/tcg.c:1369:    tcg_cur_ctxs = 1;

I don't see a qatomic_dec(&tcg_cur_ctxs) anywhere, so it seems hot
unplugging a vcpu doesn't release the tcg_cur_ctxs refcount. Do we
need a tcg_unregister_thread() function?

CCing Richard Henderson.

Stefan


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: qemu ppc64 crash when adding CPU
  2023-12-04 18:57 ` Stefan Hajnoczi
@ 2023-12-04 19:42   ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 3+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-12-04 19:42 UTC (permalink / raw)
  To: Stefan Hajnoczi, Michal Suchánek; +Cc: qemu-devel, Richard Henderson

Hi,

On 4/12/23 19:57, Stefan Hajnoczi wrote:
> On Mon, 4 Dec 2023 at 13:37, Michal Suchánek <msuchanek@suse.de> wrote:

>> Looking at tcg.c line 784 is here:
>>
>> ster_thread(void)
>> {
>>      TCGContext *s = g_malloc(sizeof(*s));
>>      unsigned int i, n;
>>
>>      *s = tcg_init_ctx;
>>
>>      /* Relink mem_base.  */
>>      for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
>>          if (tcg_init_ctx.temps[i].mem_base) {
>>              ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
>>              tcg_debug_assert(b >= 0 && b < n);
>>              s->temps[i].mem_base = &s->temps[b];
>>          }
>>      }
>>
>>      /* Claim an entry in tcg_ctxs */
>>      n = qatomic_fetch_inc(&tcg_cur_ctxs);
>>>>> g_assert(n < tcg_max_ctxs); <<<
>>      qatomic_set(&tcg_ctxs[n], s);
>>
>>      if (n > 0) {
>>          alloc_tcg_plugin_context(s);
>>          tcg_region_initial_alloc(s);
>>      }
>>
>>      tcg_ctx = s;
>> }
>>
>> Any idea why qemu would be crashing here?
> 
> Hi Michal,
> 
> $ git grep tcg_cur_ctxs
> tcg/region.c:409:    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
> tcg/region.c:889:    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
> tcg/tcg-internal.h:34:extern unsigned int tcg_cur_ctxs;
> tcg/tcg.c:241:unsigned int tcg_cur_ctxs;
> tcg/tcg.c:806:    n = qatomic_fetch_inc(&tcg_cur_ctxs);
> tcg/tcg.c:1369:    tcg_cur_ctxs = 1;
> 
> I don't see a qatomic_dec(&tcg_cur_ctxs) anywhere, so it seems hot
> unplugging a vcpu doesn't release the tcg_cur_ctxs refcount. Do we
> need a tcg_unregister_thread() function?

Suggested fix posted as RFC patch:
https://lore.kernel.org/qemu-devel/20231204194039.56169-1-philmd@linaro.org/


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-12-04 19:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-04 18:36 qemu ppc64 crash when adding CPU Michal Suchánek
2023-12-04 18:57 ` Stefan Hajnoczi
2023-12-04 19:42   ` Philippe Mathieu-Daudé

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).