qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [0/3] pSeries machine improvements
@ 2011-04-05  5:12 David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array David Gibson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: David Gibson @ 2011-04-05  5:12 UTC (permalink / raw)
  To: agraf, qemu-devel

This patch series makes several small cleanups and improvements to the
recently added pSeries machine.  This include an optimization of the
popcntd implementation, as suggested in replies to the original
series, and some cleanups of the cpu initialization handling which
will assist later kvm enablement.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array
  2011-04-05  5:12 [Qemu-devel] [0/3] pSeries machine improvements David Gibson
@ 2011-04-05  5:12 ` David Gibson
  2011-04-05  8:05   ` [Qemu-devel] " Alexander Graf
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 2/3] Delay creation of pseries device tree until reset David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 3/3] Use existing helper function to implement popcntd instruction David Gibson
  2 siblings, 1 reply; 7+ messages in thread
From: David Gibson @ 2011-04-05  5:12 UTC (permalink / raw)
  To: agraf, qemu-devel

Currently the pseries machine init code builds up an array, envs, of
CPUState pointers for all the cpus in the system.  This is kind of
pointless, given the generic code already has a perfectly good linked list
of the cpus.

In addition, there are a number of places which assume that the cpu's
cpu_index field is equal to its index in this array.  This is true in
practice, because cpu_index values are just assigned sequentially, but
it's conceptually incorrect and may not always be true.

Therefore, this patch abolishes the envs array, and explicitly uses the
generic cpu linked list and cpu_index values throughout.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/spapr.c |   49 ++++++++++++++++++++++++-------------------------
 hw/xics.c  |   32 +++++++++++++++++++++-----------
 hw/xics.h  |    3 +--
 3 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/hw/spapr.c b/hw/spapr.c
index 1152a25..f80873c 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -57,7 +57,7 @@
 sPAPREnvironment *spapr;
 
 static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
-                              const char *cpu_model, CPUState *envs[],
+                              const char *cpu_model,
                               sPAPREnvironment *spapr,
                               target_phys_addr_t initrd_base,
                               target_phys_addr_t initrd_size,
@@ -68,6 +68,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
                               long hash_shift)
 {
     void *fdt;
+    CPUState *env;
     uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) };
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
@@ -135,14 +136,14 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
         modelname[i] = toupper(modelname[i]);
     }
 
-    for (i = 0; i < smp_cpus; i++) {
-        CPUState *env = envs[i];
-        uint32_t gserver_prop[] = {cpu_to_be32(i), 0}; /* HACK! */
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        int index = env->cpu_index;
+        uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
         char *nodename;
         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                            0xffffffff, 0xffffffff};
 
-        if (asprintf(&nodename, "%s@%x", modelname, i) < 0) {
+        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
             fprintf(stderr, "Allocation failure\n");
             exit(1);
         }
@@ -151,7 +152,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
 
         free(nodename);
 
-        _FDT((fdt_property_cell(fdt, "reg", i)));
+        _FDT((fdt_property_cell(fdt, "reg", index)));
         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
 
         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
@@ -168,11 +169,11 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
                            pft_size_prop, sizeof(pft_size_prop))));
         _FDT((fdt_property_string(fdt, "status", "okay")));
         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
-        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", i)));
+        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", index)));
         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
                            gserver_prop, sizeof(gserver_prop))));
 
-        if (envs[i]->mmu_model & POWERPC_MMU_1TSEG) {
+        if (env->mmu_model & POWERPC_MMU_1TSEG) {
             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
                                segs, sizeof(segs))));
         }
@@ -261,8 +262,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
                            const char *initrd_filename,
                            const char *cpu_model)
 {
-    CPUState *envs[MAX_CPUS];
     void *fdt, *htab;
+    CPUState *env;
     int i;
     ram_addr_t ram_offset;
     target_phys_addr_t fdt_addr, rtas_addr;
@@ -288,7 +289,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         cpu_model = "POWER7";
     }
     for (i = 0; i < smp_cpus; i++) {
-        CPUState *env = cpu_init(cpu_model);
+        env = cpu_init(cpu_model);
 
         if (!env) {
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
@@ -300,9 +301,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
         env->hreset_vector = 0x60;
         env->hreset_excp_prefix = 0;
-        env->gpr[3] = i;
-
-        envs[i] = env;
+        env->gpr[3] = env->cpu_index;
     }
 
     /* allocate RAM */
@@ -315,10 +314,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     htab_size = 1ULL << (pteg_shift + 7);
     htab = qemu_mallocz(htab_size);
 
-    for (i = 0; i < smp_cpus; i++) {
-        envs[i]->external_htab = htab;
-        envs[i]->htab_base = -1;
-        envs[i]->htab_mask = htab_size - 1;
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        env->external_htab = htab;
+        env->htab_base = -1;
+        env->htab_mask = htab_size - 1;
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
@@ -330,7 +329,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     qemu_free(filename);
 
     /* Set up Interrupt Controller */
-    spapr->icp = xics_system_init(smp_cpus, envs, XICS_IRQS);
+    spapr->icp = xics_system_init(XICS_IRQS);
 
     /* Set up VIO bus */
     spapr->vio_bus = spapr_vio_bus_init();
@@ -416,13 +415,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
         /* SLOF will startup the secondary CPUs using RTAS,
            rather than expecting a kexec() style entry */
-        for (i = 0; i < smp_cpus; i++) {
-            envs[i]->halted = 1;
+        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            env->halted = 1;
         }
     }
 
     /* Prepare the device tree */
-    fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, envs, spapr,
+    fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, spapr,
                            initrd_base, initrd_size,
                            boot_device, kernel_cmdline,
                            rtas_addr, rtas_size, pteg_shift + 7);
@@ -432,10 +431,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
 
     qemu_free(fdt);
 
-    envs[0]->gpr[3] = fdt_addr;
-    envs[0]->gpr[5] = 0;
-    envs[0]->hreset_vector = kernel_base;
-    envs[0]->halted = 0;
+    first_cpu->gpr[3] = fdt_addr;
+    first_cpu->gpr[5] = 0;
+    first_cpu->hreset_vector = kernel_base;
+    first_cpu->halted = 0;
 }
 
 static QEMUMachine spapr_machine = {
diff --git a/hw/xics.c b/hw/xics.c
index 66047a6..13a1d25 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -425,27 +425,39 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
     rtas_st(rets, 0, 0); /* Success */
 }
 
-struct icp_state *xics_system_init(int nr_servers, CPUState *servers[],
-                                   int nr_irqs)
+struct icp_state *xics_system_init(int nr_irqs)
 {
+    CPUState *env;
+    int max_server_num;
     int i;
     struct icp_state *icp;
     struct ics_state *ics;
 
+    max_server_num = -1;
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->cpu_index > max_server_num) {
+            max_server_num = env->cpu_index;
+        }
+    }
+
     icp = qemu_mallocz(sizeof(*icp));
-    icp->nr_servers = nr_servers;
-    icp->ss = qemu_mallocz(nr_servers * sizeof(struct icp_server_state));
+    icp->nr_servers = max_server_num + 1;
+    icp->ss = qemu_mallocz(icp->nr_servers*sizeof(struct icp_server_state));
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        icp->ss[i].mfrr = 0xff;
+    }
 
-    for (i = 0; i < nr_servers; i++) {
-        servers[i]->cpu_index = i;
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        struct icp_server_state *ss = &icp->ss[env->cpu_index];
 
-        switch (PPC_INPUT(servers[i])) {
+        switch (PPC_INPUT(env)) {
         case PPC_FLAGS_INPUT_POWER7:
-            icp->ss[i].output = servers[i]->irq_inputs[POWER7_INPUT_INT];
+            ss->output = env->irq_inputs[POWER7_INPUT_INT];
             break;
 
         case PPC_FLAGS_INPUT_970:
-            icp->ss[i].output = servers[i]->irq_inputs[PPC970_INPUT_INT];
+            ss->output = env->irq_inputs[PPC970_INPUT_INT];
             break;
 
         default:
@@ -453,8 +465,6 @@ struct icp_state *xics_system_init(int nr_servers, CPUState *servers[],
                      "model\n");
             exit(1);
         }
-
-        icp->ss[i].mfrr = 0xff;
     }
 
     ics = qemu_mallocz(sizeof(*ics));
diff --git a/hw/xics.h b/hw/xics.h
index 096eeb3..83c1182 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -33,7 +33,6 @@ struct icp_state;
 
 qemu_irq xics_find_qirq(struct icp_state *icp, int irq);
 
-struct icp_state *xics_system_init(int nr_servers, CPUState *servers[],
-                                   int nr_irqs);
+struct icp_state *xics_system_init(int nr_irqs);
 
 #endif /* __XICS_H__ */
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 2/3] Delay creation of pseries device tree until reset
  2011-04-05  5:12 [Qemu-devel] [0/3] pSeries machine improvements David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array David Gibson
@ 2011-04-05  5:12 ` David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 3/3] Use existing helper function to implement popcntd instruction David Gibson
  2 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2011-04-05  5:12 UTC (permalink / raw)
  To: agraf, qemu-devel

At present, the 'pseries' machine creates a flattened device tree in the
machine->init function to pass to either the guest kernel or to firmware.

However, the machine->init function runs before processing of -device
command line options, which means that the device tree so created will
be (incorrectly) missing devices specified that way.

Supplying a correct device tree is, in any case, part of the required
platform entry conditions.  Therefore, this patch moves the creation and
loading of the device tree from machine->init to a reset callback.  The
setup of entry point address and initial register state moves with it,
which leads to a slight cleanup.

This is not, alas, quite enough to make a fully working reset for pseries.
For that we would need to reload the firmware images, which on this
machine are loaded into RAM.  It's a step in the right direction, though.

Signed-off-by: David Gibson <dwg@au1.ibm.com>
---
 hw/spapr.c |  116 +++++++++++++++++++++++++++++++++++-------------------------
 hw/spapr.h |    7 ++++
 2 files changed, 75 insertions(+), 48 deletions(-)

diff --git a/hw/spapr.c b/hw/spapr.c
index f80873c..1782cc0 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -56,20 +56,16 @@
 
 sPAPREnvironment *spapr;
 
-static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
-                              const char *cpu_model,
-                              sPAPREnvironment *spapr,
-                              target_phys_addr_t initrd_base,
-                              target_phys_addr_t initrd_size,
-                              const char *boot_device,
-                              const char *kernel_cmdline,
-                              target_phys_addr_t rtas_addr,
-                              target_phys_addr_t rtas_size,
-                              long hash_shift)
+static void *spapr_create_fdt_skel(const char *cpu_model,
+                                   target_phys_addr_t initrd_base,
+                                   target_phys_addr_t initrd_size,
+                                   const char *boot_device,
+                                   const char *kernel_cmdline,
+                                   long hash_shift)
 {
     void *fdt;
     CPUState *env;
-    uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) };
+    uint64_t mem_reg_property[] = { 0, cpu_to_be64(ram_size) };
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
     uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
@@ -78,7 +74,6 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
     int i;
     char *modelname;
-    int ret;
 
 #define _FDT(exp) \
     do { \
@@ -222,8 +217,21 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
     _FDT((fdt_end_node(fdt))); /* close root node */
     _FDT((fdt_finish(fdt)));
 
-    /* re-expand to allow for further tweaks */
-    _FDT((fdt_open_into(fdt, fdt, FDT_MAX_SIZE)));
+    return fdt;
+}
+
+static void spapr_finalize_fdt(sPAPREnvironment *spapr,
+                               target_phys_addr_t fdt_addr,
+                               target_phys_addr_t rtas_addr,
+                               target_phys_addr_t rtas_size)
+{
+    int ret;
+    void *fdt;
+
+    fdt = qemu_malloc(FDT_MAX_SIZE);
+
+    /* open out the base tree into a temp buffer for the final tweaks */
+    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
 
     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
     if (ret < 0) {
@@ -239,9 +247,9 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
 
     _FDT((fdt_pack(fdt)));
 
-    *fdt_size = fdt_totalsize(fdt);
+    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
 
-    return fdt;
+    qemu_free(fdt);
 }
 
 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
@@ -254,6 +262,27 @@ static void emulate_spapr_hypercall(CPUState *env)
     env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
 }
 
+static void spapr_reset(void *opaque)
+{
+    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
+
+    fprintf(stderr, "sPAPR reset\n");
+
+    /* flush out the hash table */
+    memset(spapr->htab, 0, spapr->htab_size);
+
+    /* Load the fdt */
+    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
+                       spapr->rtas_size);
+
+    /* Set up the entry state */
+    first_cpu->gpr[3] = spapr->fdt_addr;
+    first_cpu->gpr[5] = 0;
+    first_cpu->halted = 0;
+    first_cpu->nip = spapr->entry_point;
+
+}
+
 /* pSeries LPAR / sPAPR hardware init */
 static void ppc_spapr_init(ram_addr_t ram_size,
                            const char *boot_device,
@@ -262,15 +291,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
                            const char *initrd_filename,
                            const char *cpu_model)
 {
-    void *fdt, *htab;
     CPUState *env;
     int i;
     ram_addr_t ram_offset;
-    target_phys_addr_t fdt_addr, rtas_addr;
-    uint32_t kernel_base, initrd_base;
-    long kernel_size, initrd_size, htab_size, rtas_size, fw_size;
+    uint32_t initrd_base;
+    long kernel_size, initrd_size, fw_size;
     long pteg_shift = 17;
-    int fdt_size;
     char *filename;
     int irq = 16;
 
@@ -280,9 +306,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     /* We place the device tree just below either the top of RAM, or
      * 2GB, so that it can be processed with 32-bit code if
      * necessary */
-    fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE;
-    /* RTAS goes just below that */
-    rtas_addr = fdt_addr - RTAS_MAX_SIZE;
+    spapr->fdt_addr = MIN(ram_size, 0x80000000) - FDT_MAX_SIZE;
+    spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE;
 
     /* init CPUs */
     if (cpu_model == NULL) {
@@ -311,18 +336,19 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     /* allocate hash page table.  For now we always make this 16mb,
      * later we should probably make it scale to the size of guest
      * RAM */
-    htab_size = 1ULL << (pteg_shift + 7);
-    htab = qemu_mallocz(htab_size);
+    spapr->htab_size = 1ULL << (pteg_shift + 7);
+    spapr->htab = qemu_malloc(spapr->htab_size);
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->external_htab = htab;
+        env->external_htab = spapr->htab;
         env->htab_base = -1;
-        env->htab_mask = htab_size - 1;
+        env->htab_mask = spapr->htab_size - 1;
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
-    rtas_size = load_image_targphys(filename, rtas_addr, ram_size - rtas_addr);
-    if (rtas_size < 0) {
+    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
+                                           ram_size - spapr->rtas_addr);
+    if (spapr->rtas_size < 0) {
         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
         exit(1);
     }
@@ -368,13 +394,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     if (kernel_filename) {
         uint64_t lowaddr = 0;
 
-        kernel_base = KERNEL_LOAD_ADDR;
-
         kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
                                NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
         if (kernel_size < 0) {
-            kernel_size = load_image_targphys(kernel_filename, kernel_base,
-                                              ram_size - kernel_base);
+            kernel_size = load_image_targphys(kernel_filename,
+                                              KERNEL_LOAD_ADDR,
+                                              ram_size - KERNEL_LOAD_ADDR);
         }
         if (kernel_size < 0) {
             fprintf(stderr, "qemu: could not load kernel '%s'\n",
@@ -396,6 +421,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
             initrd_base = 0;
             initrd_size = 0;
         }
+
+        spapr->entry_point = KERNEL_LOAD_ADDR;
     } else {
         if (ram_size < (MIN_RAM_SLOF << 20)) {
             fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
@@ -409,7 +436,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
             exit(1);
         }
         qemu_free(filename);
-        kernel_base = 0x100;
+        spapr->entry_point = 0x100;
         initrd_base = 0;
         initrd_size = 0;
 
@@ -421,20 +448,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     }
 
     /* Prepare the device tree */
-    fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, spapr,
-                           initrd_base, initrd_size,
-                           boot_device, kernel_cmdline,
-                           rtas_addr, rtas_size, pteg_shift + 7);
-    assert(fdt != NULL);
+    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
+                                            initrd_base, initrd_size,
+                                            boot_device, kernel_cmdline,
+                                            pteg_shift + 7);
+    assert(spapr->fdt_skel != NULL);
 
-    cpu_physical_memory_write(fdt_addr, fdt, fdt_size);
-
-    qemu_free(fdt);
-
-    first_cpu->gpr[3] = fdt_addr;
-    first_cpu->gpr[5] = 0;
-    first_cpu->hreset_vector = kernel_base;
-    first_cpu->halted = 0;
+    qemu_register_reset(spapr_reset, spapr);
 }
 
 static QEMUMachine spapr_machine = {
diff --git a/hw/spapr.h b/hw/spapr.h
index fae8e13..b52133a 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -7,6 +7,13 @@ struct icp_state;
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     struct icp_state *icp;
+
+    void *htab;
+    long htab_size;
+    target_phys_addr_t fdt_addr, rtas_addr;
+    long rtas_size;
+    void *fdt_skel;
+    target_ulong entry_point;
 } sPAPREnvironment;
 
 #define H_SUCCESS         0
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 3/3] Use existing helper function to implement popcntd instruction
  2011-04-05  5:12 [Qemu-devel] [0/3] pSeries machine improvements David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array David Gibson
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 2/3] Delay creation of pseries device tree until reset David Gibson
@ 2011-04-05  5:12 ` David Gibson
  2 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2011-04-05  5:12 UTC (permalink / raw)
  To: agraf, qemu-devel

The recent patches adding partial support for POWER7 cpu emulation included
implementing the popcntd instruction.  The support for this was open coded,
but host-utils.h already included a function implementing an equivalent
population count function, which uses a gcc builtin (which can use special
host instructions) if available.

This patch makes the popcntd implementation use the existing, potentially
faster, implementation.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target-ppc/op_helper.c |   14 +-------------
 1 files changed, 1 insertions(+), 13 deletions(-)

diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c
index b1b883d..5882bec 100644
--- a/target-ppc/op_helper.c
+++ b/target-ppc/op_helper.c
@@ -528,19 +528,7 @@ target_ulong helper_popcntw (target_ulong val)
 
 target_ulong helper_popcntd (target_ulong val)
 {
-    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
-                                           0x5555555555555555ULL);
-    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
-                                           0x3333333333333333ULL);
-    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
-                                           0x0f0f0f0f0f0f0f0fULL);
-    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
-                                           0x00ff00ff00ff00ffULL);
-    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
-                                           0x0000ffff0000ffffULL);
-    val = (val & 0x00000000ffffffffULL) + ((val >> 32) &
-                                           0x00000000ffffffffULL);
-    return val;
+    return ctpop64(val);
 }
 #else
 target_ulong helper_popcntb (target_ulong val)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] Re: [PATCH 1/3] pseries: Abolish envs array
  2011-04-05  5:12 ` [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array David Gibson
@ 2011-04-05  8:05   ` Alexander Graf
  2011-04-05 13:16     ` David Gibson
  0 siblings, 1 reply; 7+ messages in thread
From: Alexander Graf @ 2011-04-05  8:05 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-devel


On 05.04.2011, at 07:12, David Gibson wrote:

> Currently the pseries machine init code builds up an array, envs, of
> CPUState pointers for all the cpus in the system.  This is kind of
> pointless, given the generic code already has a perfectly good linked list
> of the cpus.
> 
> In addition, there are a number of places which assume that the cpu's
> cpu_index field is equal to its index in this array.  This is true in
> practice, because cpu_index values are just assigned sequentially, but
> it's conceptually incorrect and may not always be true.
> 
> Therefore, this patch abolishes the envs array, and explicitly uses the
> generic cpu linked list and cpu_index values throughout.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
> hw/spapr.c |   49 ++++++++++++++++++++++++-------------------------
> hw/xics.c  |   32 +++++++++++++++++++++-----------
> hw/xics.h  |    3 +--
> 3 files changed, 46 insertions(+), 38 deletions(-)
> 
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 1152a25..f80873c 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -57,7 +57,7 @@
> sPAPREnvironment *spapr;
> 
> static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
> -                              const char *cpu_model, CPUState *envs[],
> +                              const char *cpu_model,
>                               sPAPREnvironment *spapr,
>                               target_phys_addr_t initrd_base,
>                               target_phys_addr_t initrd_size,
> @@ -68,6 +68,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
>                               long hash_shift)
> {
>     void *fdt;
> +    CPUState *env;
>     uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) };
>     uint32_t start_prop = cpu_to_be32(initrd_base);
>     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
> @@ -135,14 +136,14 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
>         modelname[i] = toupper(modelname[i]);
>     }
> 
> -    for (i = 0; i < smp_cpus; i++) {
> -        CPUState *env = envs[i];
> -        uint32_t gserver_prop[] = {cpu_to_be32(i), 0}; /* HACK! */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        int index = env->cpu_index;
> +        uint32_t gserver_prop[] = {cpu_to_be32(index), 0}; /* HACK! */
>         char *nodename;
>         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
>                            0xffffffff, 0xffffffff};
> 
> -        if (asprintf(&nodename, "%s@%x", modelname, i) < 0) {
> +        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
>             fprintf(stderr, "Allocation failure\n");
>             exit(1);
>         }
> @@ -151,7 +152,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
> 
>         free(nodename);
> 
> -        _FDT((fdt_property_cell(fdt, "reg", i)));
> +        _FDT((fdt_property_cell(fdt, "reg", index)));
>         _FDT((fdt_property_string(fdt, "device_type", "cpu")));
> 
>         _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
> @@ -168,11 +169,11 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
>                            pft_size_prop, sizeof(pft_size_prop))));
>         _FDT((fdt_property_string(fdt, "status", "okay")));
>         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
> -        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", i)));
> +        _FDT((fdt_property_cell(fdt, "ibm,ppc-interrupt-server#s", index)));
>         _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
>                            gserver_prop, sizeof(gserver_prop))));
> 
> -        if (envs[i]->mmu_model & POWERPC_MMU_1TSEG) {
> +        if (env->mmu_model & POWERPC_MMU_1TSEG) {
>             _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
>                                segs, sizeof(segs))));
>         }
> @@ -261,8 +262,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>                            const char *initrd_filename,
>                            const char *cpu_model)
> {
> -    CPUState *envs[MAX_CPUS];
>     void *fdt, *htab;
> +    CPUState *env;
>     int i;
>     ram_addr_t ram_offset;
>     target_phys_addr_t fdt_addr, rtas_addr;
> @@ -288,7 +289,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>         cpu_model = "POWER7";
>     }
>     for (i = 0; i < smp_cpus; i++) {
> -        CPUState *env = cpu_init(cpu_model);
> +        env = cpu_init(cpu_model);
> 
>         if (!env) {
>             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
> @@ -300,9 +301,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> 
>         env->hreset_vector = 0x60;
>         env->hreset_excp_prefix = 0;
> -        env->gpr[3] = i;
> -
> -        envs[i] = env;
> +        env->gpr[3] = env->cpu_index;
>     }
> 
>     /* allocate RAM */
> @@ -315,10 +314,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>     htab_size = 1ULL << (pteg_shift + 7);
>     htab = qemu_mallocz(htab_size);
> 
> -    for (i = 0; i < smp_cpus; i++) {
> -        envs[i]->external_htab = htab;
> -        envs[i]->htab_base = -1;
> -        envs[i]->htab_mask = htab_size - 1;
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        env->external_htab = htab;
> +        env->htab_base = -1;
> +        env->htab_mask = htab_size - 1;
>     }
> 
>     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
> @@ -330,7 +329,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>     qemu_free(filename);
> 
>     /* Set up Interrupt Controller */
> -    spapr->icp = xics_system_init(smp_cpus, envs, XICS_IRQS);
> +    spapr->icp = xics_system_init(XICS_IRQS);
> 
>     /* Set up VIO bus */
>     spapr->vio_bus = spapr_vio_bus_init();
> @@ -416,13 +415,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> 
>         /* SLOF will startup the secondary CPUs using RTAS,
>            rather than expecting a kexec() style entry */
> -        for (i = 0; i < smp_cpus; i++) {
> -            envs[i]->halted = 1;
> +        for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +            env->halted = 1;
>         }
>     }
> 
>     /* Prepare the device tree */
> -    fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, envs, spapr,
> +    fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, spapr,
>                            initrd_base, initrd_size,
>                            boot_device, kernel_cmdline,
>                            rtas_addr, rtas_size, pteg_shift + 7);
> @@ -432,10 +431,10 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> 
>     qemu_free(fdt);
> 
> -    envs[0]->gpr[3] = fdt_addr;
> -    envs[0]->gpr[5] = 0;
> -    envs[0]->hreset_vector = kernel_base;
> -    envs[0]->halted = 0;
> +    first_cpu->gpr[3] = fdt_addr;
> +    first_cpu->gpr[5] = 0;
> +    first_cpu->hreset_vector = kernel_base;
> +    first_cpu->halted = 0;
> }
> 
> static QEMUMachine spapr_machine = {
> diff --git a/hw/xics.c b/hw/xics.c
> index 66047a6..13a1d25 100644
> --- a/hw/xics.c
> +++ b/hw/xics.c
> @@ -425,27 +425,39 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
>     rtas_st(rets, 0, 0); /* Success */
> }
> 
> -struct icp_state *xics_system_init(int nr_servers, CPUState *servers[],
> -                                   int nr_irqs)
> +struct icp_state *xics_system_init(int nr_irqs)
> {
> +    CPUState *env;
> +    int max_server_num;
>     int i;
>     struct icp_state *icp;
>     struct ics_state *ics;
> 
> +    max_server_num = -1;
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        if (env->cpu_index > max_server_num) {
> +            max_server_num = env->cpu_index;
> +        }
> +    }
> +
>     icp = qemu_mallocz(sizeof(*icp));
> -    icp->nr_servers = nr_servers;
> -    icp->ss = qemu_mallocz(nr_servers * sizeof(struct icp_server_state));
> +    icp->nr_servers = max_server_num + 1;
> +    icp->ss = qemu_mallocz(icp->nr_servers*sizeof(struct icp_server_state));
> +
> +    for (i = 0; i < icp->nr_servers; i++) {
> +        icp->ss[i].mfrr = 0xff;

Is ss always big enough to hold all CPUs + 1?


Alex

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Qemu-devel] Re: [PATCH 1/3] pseries: Abolish envs array
  2011-04-05  8:05   ` [Qemu-devel] " Alexander Graf
@ 2011-04-05 13:16     ` David Gibson
  2011-04-05 13:55       ` Alexander Graf
  0 siblings, 1 reply; 7+ messages in thread
From: David Gibson @ 2011-04-05 13:16 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-devel

On Tue, Apr 05, 2011 at 10:05:12AM +0200, Alexander Graf wrote:
> On 05.04.2011, at 07:12, David Gibson wrote:
[snip]
> > +struct icp_state *xics_system_init(int nr_irqs)
> > {
> > +    CPUState *env;
> > +    int max_server_num;
> >     int i;
> >     struct icp_state *icp;
> >     struct ics_state *ics;
> > 
> > +    max_server_num = -1;
> > +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> > +        if (env->cpu_index > max_server_num) {
> > +            max_server_num = env->cpu_index;
> > +        }
> > +    }
> > +
> >     icp = qemu_mallocz(sizeof(*icp));
> > -    icp->nr_servers = nr_servers;
> > -    icp->ss = qemu_mallocz(nr_servers * sizeof(struct icp_server_state));
> > +    icp->nr_servers = max_server_num + 1;
> > +    icp->ss = qemu_mallocz(icp->nr_servers*sizeof(struct icp_server_state));
> > +
> > +    for (i = 0; i < icp->nr_servers; i++) {
> > +        icp->ss[i].mfrr = 0xff;
> 
> Is ss always big enough to hold all CPUs + 1?

Well, it should be.  We allocated it just above based on
icp->nr_servers, and nr_servers is the largest cpu_index + 1, computer
just above that.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Qemu-devel] Re: [PATCH 1/3] pseries: Abolish envs array
  2011-04-05 13:16     ` David Gibson
@ 2011-04-05 13:55       ` Alexander Graf
  0 siblings, 0 replies; 7+ messages in thread
From: Alexander Graf @ 2011-04-05 13:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: David Gibson

On 04/05/2011 03:16 PM, David Gibson wrote:
> On Tue, Apr 05, 2011 at 10:05:12AM +0200, Alexander Graf wrote:
>> On 05.04.2011, at 07:12, David Gibson wrote:
> [snip]
>>> +struct icp_state *xics_system_init(int nr_irqs)
>>> {
>>> +    CPUState *env;
>>> +    int max_server_num;
>>>      int i;
>>>      struct icp_state *icp;
>>>      struct ics_state *ics;
>>>
>>> +    max_server_num = -1;
>>> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
>>> +        if (env->cpu_index>  max_server_num) {
>>> +            max_server_num = env->cpu_index;
>>> +        }
>>> +    }
>>> +
>>>      icp = qemu_mallocz(sizeof(*icp));
>>> -    icp->nr_servers = nr_servers;
>>> -    icp->ss = qemu_mallocz(nr_servers * sizeof(struct icp_server_state));
>>> +    icp->nr_servers = max_server_num + 1;
>>> +    icp->ss = qemu_mallocz(icp->nr_servers*sizeof(struct icp_server_state));
>>> +
>>> +    for (i = 0; i<  icp->nr_servers; i++) {
>>> +        icp->ss[i].mfrr = 0xff;
>> Is ss always big enough to hold all CPUs + 1?
> Well, it should be.  We allocated it just above based on
> icp->nr_servers, and nr_servers is the largest cpu_index + 1, computer
> just above that

Ah, there it is :). I missed that part. Will apply the patches to my tree.


Alex

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-04-05 13:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-05  5:12 [Qemu-devel] [0/3] pSeries machine improvements David Gibson
2011-04-05  5:12 ` [Qemu-devel] [PATCH 1/3] pseries: Abolish envs array David Gibson
2011-04-05  8:05   ` [Qemu-devel] " Alexander Graf
2011-04-05 13:16     ` David Gibson
2011-04-05 13:55       ` Alexander Graf
2011-04-05  5:12 ` [Qemu-devel] [PATCH 2/3] Delay creation of pseries device tree until reset David Gibson
2011-04-05  5:12 ` [Qemu-devel] [PATCH 3/3] Use existing helper function to implement popcntd instruction David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).