qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Bharata B Rao <bharata@linux.vnet.ibm.com>
Cc: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>,
	mdroth@linux.vnet.ibm.com, aik@ozlabs.ru, agraf@suse.de,
	qemu-devel@nongnu.org, Chen Fan <chen.fan.fnst@cn.fujitsu.com>,
	pbonzini@redhat.com, qemu-ppc@nongnu.org,
	tyreld@linux.vnet.ibm.com, nfont@linux.vnet.ibm.com,
	Gu Zheng <guz.fnst@cn.fujitsu.com>,
	imammedo@redhat.com, afaerber@suse.de
Subject: Re: [Qemu-devel] [PATCH v5 04/10] cpu: Reclaim vCPU objects
Date: Tue, 1 Dec 2015 11:55:58 +1100	[thread overview]
Message-ID: <20151201005558.GC31343@voom.redhat.com> (raw)
In-Reply-To: <1448024079-20808-5-git-send-email-bharata@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 8915 bytes --]

On Fri, Nov 20, 2015 at 06:24:33PM +0530, Bharata B Rao wrote:
> From: Gu Zheng <guz.fnst@cn.fujitsu.com>
> 
> In order to deal well with the kvm vcpus (which can not be removed without any
> protection), we do not close KVM vcpu fd, just record and mark it as stopped
> into a list, so that we can reuse it for the appending cpu hot-add request if
> possible. It is also the approach that kvm guys suggested:
> https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
> Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>                [- Explicit CPU_REMOVE() from qemu_kvm/tcg_destroy_vcpu()
>                   isn't needed as it is done from cpu_exec_exit()]
> ---
>  cpus.c               | 41 +++++++++++++++++++++++++++++++++++++
>  include/qom/cpu.h    | 10 +++++++++
>  include/sysemu/kvm.h |  1 +
>  kvm-all.c            | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  kvm-stub.c           |  5 +++++
>  5 files changed, 113 insertions(+), 1 deletion(-)
> 
> diff --git a/cpus.c b/cpus.c
> index 877bd70..af2b274 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -953,6 +953,21 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
>      qemu_cpu_kick(cpu);
>  }
>  
> +static void qemu_kvm_destroy_vcpu(CPUState *cpu)
> +{
> +    if (kvm_destroy_vcpu(cpu) < 0) {
> +        error_report("kvm_destroy_vcpu failed.\n");
> +        exit(EXIT_FAILURE);
> +    }
> +
> +    object_unparent(OBJECT(cpu));
> +}
> +
> +static void qemu_tcg_destroy_vcpu(CPUState *cpu)
> +{
> +    object_unparent(OBJECT(cpu));
> +}
> +
>  static void flush_queued_work(CPUState *cpu)
>  {
>      struct qemu_work_item *wi;
> @@ -1053,6 +1068,11 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
>              }
>          }
>          qemu_kvm_wait_io_event(cpu);
> +        if (cpu->exit && !cpu_can_run(cpu)) {
> +            qemu_kvm_destroy_vcpu(cpu);
> +            qemu_mutex_unlock(&qemu_global_mutex);

This looks like a change to locking semantics, and I can't see the
connection to the described purpose of the patch.

> +            return NULL;
> +        }
>      }
>  
>      return NULL;
> @@ -1108,6 +1128,7 @@ static void tcg_exec_all(void);
>  static void *qemu_tcg_cpu_thread_fn(void *arg)
>  {
>      CPUState *cpu = arg;
> +    CPUState *remove_cpu = NULL;
>  
>      rcu_register_thread();
>  
> @@ -1145,6 +1166,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>              }
>          }
>          qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
> +        CPU_FOREACH(cpu) {
> +            if (cpu->exit && !cpu_can_run(cpu)) {
> +                remove_cpu = cpu;
> +                break;
> +            }
> +        }
> +        if (remove_cpu) {
> +            qemu_tcg_destroy_vcpu(remove_cpu);
> +            remove_cpu = NULL;
> +        }

Any particular reason to only cleanup one cpu per iteration?

Also, any particular reason this isn't folded into tcg_exec_all with
the other cpu->exit logic?

>      }
>  
>      return NULL;
> @@ -1301,6 +1332,13 @@ void resume_all_vcpus(void)
>      }
>  }
>  
> +void cpu_remove(CPUState *cpu)
> +{
> +    cpu->stop = true;
> +    cpu->exit = true;
> +    qemu_cpu_kick(cpu);
> +}
> +
>  /* For temporary buffers for forming a name */
>  #define VCPU_THREAD_NAME_SIZE 16
>  
> @@ -1506,6 +1544,9 @@ static void tcg_exec_all(void)
>                  break;
>              }
>          } else if (cpu->stop || cpu->stopped) {
> +            if (cpu->exit) {
> +                next_cpu = CPU_NEXT(cpu);
> +            }
>              break;
>          }
>      }
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 51a1323..67e05b0 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -223,6 +223,7 @@ struct kvm_run;
>   * @halted: Nonzero if the CPU is in suspended state.
>   * @stop: Indicates a pending stop request.
>   * @stopped: Indicates the CPU has been artificially stopped.
> + * @exit: Indicates the CPU has exited due to an unplug operation.
>   * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
>   * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
>   *           CPU and return to its top level loop.
> @@ -274,6 +275,7 @@ struct CPUState {
>      bool created;
>      bool stop;
>      bool stopped;
> +    bool exit;
>      bool crash_occurred;
>      bool exit_request;
>      uint32_t interrupt_request;
> @@ -696,6 +698,14 @@ void cpu_exit(CPUState *cpu);
>  void cpu_resume(CPUState *cpu);
>  
>  /**
> + * cpu_remove:
> + * @cpu: The CPU to remove.
> + *
> + * Requests the CPU to be removed.
> + */
> +void cpu_remove(CPUState *cpu);
> +
> +/**
>   * qemu_init_vcpu:
>   * @cpu: The vCPU to initialize.
>   *
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index b31f325..dd1b783 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -206,6 +206,7 @@ int kvm_has_intx_set_mask(void);
>  
>  int kvm_init_vcpu(CPUState *cpu);
>  int kvm_cpu_exec(CPUState *cpu);
> +int kvm_destroy_vcpu(CPUState *cpu);
>  
>  #ifdef NEED_CPU_H
>  
> diff --git a/kvm-all.c b/kvm-all.c
> index c648b81..3befc59 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -60,6 +60,12 @@
>  
>  #define KVM_MSI_HASHTAB_SIZE    256
>  
> +struct KVMParkedVcpu {
> +    unsigned long vcpu_id;
> +    int kvm_fd;
> +    QLIST_ENTRY(KVMParkedVcpu) node;
> +};
> +
>  struct KVMState
>  {
>      AccelState parent_obj;
> @@ -93,6 +99,7 @@ struct KVMState
>      QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
>  #endif
>      KVMMemoryListener memory_listener;
> +    QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
>  };
>  
>  KVMState *kvm_state;
> @@ -235,6 +242,53 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot)
>      return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
>  }
>  
> +int kvm_destroy_vcpu(CPUState *cpu)
> +{
> +    KVMState *s = kvm_state;
> +    long mmap_size;
> +    struct KVMParkedVcpu *vcpu = NULL;
> +    int ret = 0;
> +
> +    DPRINTF("kvm_destroy_vcpu\n");
> +
> +    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
> +    if (mmap_size < 0) {
> +        ret = mmap_size;
> +        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
> +        goto err;
> +    }
> +
> +    ret = munmap(cpu->kvm_run, mmap_size);
> +    if (ret < 0) {
> +        goto err;
> +    }
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +err:
> +    return ret;
> +}
> +
> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
> +{
> +    struct KVMParkedVcpu *cpu;
> +
> +    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
> +        if (cpu->vcpu_id == vcpu_id) {
> +            int kvm_fd;
> +
> +            QLIST_REMOVE(cpu, node);
> +            kvm_fd = cpu->kvm_fd;
> +            g_free(cpu);
> +            return kvm_fd;
> +        }
> +    }

Hmm.. use of a simple list here does mean that unplugging, then
replugging all (except 1) vcpus would be an O(n^2) operation.  That's
probably still alright, I guess.

> +
> +    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> +}
> +
>  int kvm_init_vcpu(CPUState *cpu)
>  {
>      KVMState *s = kvm_state;
> @@ -243,7 +297,7 @@ int kvm_init_vcpu(CPUState *cpu)
>  
>      DPRINTF("kvm_init_vcpu\n");
>  
> -    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
> +    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>      if (ret < 0) {
>          DPRINTF("kvm_create_vcpu failed\n");
>          goto err;
> @@ -1468,6 +1522,7 @@ static int kvm_init(MachineState *ms)
>  #ifdef KVM_CAP_SET_GUEST_DEBUG
>      QTAILQ_INIT(&s->kvm_sw_breakpoints);
>  #endif
> +    QLIST_INIT(&s->kvm_parked_vcpus);
>      s->vmfd = -1;
>      s->fd = qemu_open("/dev/kvm", O_RDWR);
>      if (s->fd == -1) {
> diff --git a/kvm-stub.c b/kvm-stub.c
> index dc97a5e..0b39456 100644
> --- a/kvm-stub.c
> +++ b/kvm-stub.c
> @@ -32,6 +32,11 @@ bool kvm_allowed;
>  bool kvm_readonly_mem_allowed;
>  bool kvm_ioeventfd_any_length_allowed;
>  
> +int kvm_destroy_vcpu(CPUState *cpu)
> +{
> +    return -ENOSYS;
> +}
> +
>  int kvm_init_vcpu(CPUState *cpu)
>  {
>      return -ENOSYS;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

  parent reply	other threads:[~2015-12-01  1:42 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-20 12:54 [Qemu-devel] [PATCH v5 00/10] sPAPR CPU hotplug Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 01/10] vl: Don't allow CPU toplogies with partially filled cores Bharata B Rao
2015-12-01  0:37   ` David Gibson
2015-12-02  3:04     ` Bharata B Rao
2015-12-02 13:52   ` Igor Mammedov
2015-12-02 14:14     ` Eduardo Habkost
2015-12-02 14:38       ` Bharata B Rao
2015-12-02 15:19         ` Eduardo Habkost
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 02/10] exec: Remove cpu from cpus list during cpu_exec_exit() Bharata B Rao
2015-12-01  0:44   ` David Gibson
2015-12-02  3:47     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 03/10] exec: Do vmstate unregistration from cpu_exec_exit() Bharata B Rao
2015-12-01  0:46   ` David Gibson
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 04/10] cpu: Reclaim vCPU objects Bharata B Rao
2015-11-30  7:30   ` Alexey Kardashevskiy
2015-12-02  3:50     ` Bharata B Rao
2015-12-01  0:55   ` David Gibson [this message]
2015-12-02  5:28     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 05/10] cpu: Add a sync version of cpu_remove() Bharata B Rao
2015-12-01  0:57   ` David Gibson
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 06/10] xics_kvm: Add cpu_destroy method to XICS Bharata B Rao
2015-12-01  1:01   ` David Gibson
2015-12-02  5:45     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 07/10] spapr: Enable CPU hotplug for pseries-2.5 and add CPU DRC DT entries Bharata B Rao
2015-12-01  1:06   ` David Gibson
2015-12-02  5:47     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 08/10] spapr: CPU hotplug support Bharata B Rao
2015-12-01  1:30   ` David Gibson
2015-12-01  4:48     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 09/10] spapr: CPU hot unplug support Bharata B Rao
2015-12-01  1:34   ` David Gibson
2015-12-02  5:49     ` Bharata B Rao
2015-11-20 12:54 ` [Qemu-devel] [PATCH v5 10/10] target-ppc: Enable CPU hotplug for POWER8 CPU family Bharata B Rao
2015-11-23 11:54 ` [Qemu-devel] [PATCH v5 00/10] sPAPR CPU hotplug Peter Krempa
2015-11-23 13:04   ` Christian Borntraeger
2015-12-01  1:43     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151201005558.GC31343@voom.redhat.com \
    --to=david@gibson.dropbear.id.au \
    --cc=afaerber@suse.de \
    --cc=agraf@suse.de \
    --cc=aik@ozlabs.ru \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=chen.fan.fnst@cn.fujitsu.com \
    --cc=guz.fnst@cn.fujitsu.com \
    --cc=imammedo@redhat.com \
    --cc=mdroth@linux.vnet.ibm.com \
    --cc=nfont@linux.vnet.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=tyreld@linux.vnet.ibm.com \
    --cc=zhugh.fnst@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).