- * [Qemu-devel] [PATCH 01/58] spapr: proper qdevification
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-15  3:14   ` David Gibson
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 02/58] spapr: prepare for qdevification of irq Alexander Graf
                   ` (56 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, Aurelien Jarno, David Gibson
From: Paolo Bonzini <pbonzini@redhat.com>
Right now the spapr devices cannot be instantiated with -device,
because the IRQs need to be passed to the spapr_*_create functions.
Do this instead in the bus's init wrapper.
This is particularly important with the conversion from scsi-disk
to scsi-{cd,hd} that Markus made.  After his patches, if you
specify a scsi-cd device attached to an if=none drive, the default
VSCSI controller will not be created and, without qdevification,
you will not be able to add yours.
NOTE from agraf: added small compile fix
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c       |   15 +++++----------
 hw/spapr.h       |    8 ++++++++
 hw/spapr_llan.c  |    7 +------
 hw/spapr_vio.c   |    5 +++++
 hw/spapr_vio.h   |   13 ++++---------
 hw/spapr_vscsi.c |    8 +-------
 hw/spapr_vty.c   |    8 +-------
 7 files changed, 25 insertions(+), 39 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 1265cee..8cf93fe 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -298,7 +298,6 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     long kernel_size, initrd_size, fw_size;
     long pteg_shift = 17;
     char *filename;
-    int irq = 16;
 
     spapr = g_malloc(sizeof(*spapr));
     cpu_ppc_hypercall = emulate_spapr_hypercall;
@@ -360,15 +359,14 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     /* Set up VIO bus */
     spapr->vio_bus = spapr_vio_bus_init();
 
-    for (i = 0; i < MAX_SERIAL_PORTS; i++, irq++) {
+    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_hds[i]) {
             spapr_vty_create(spapr->vio_bus, SPAPR_VTY_BASE_ADDRESS + i,
-                             serial_hds[i], xics_find_qirq(spapr->icp, irq),
-                             irq);
+                             serial_hds[i]);
         }
     }
 
-    for (i = 0; i < nb_nics; i++, irq++) {
+    for (i = 0; i < nb_nics; i++) {
         NICInfo *nd = &nd_table[i];
 
         if (!nd->model) {
@@ -376,8 +374,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
         }
 
         if (strcmp(nd->model, "ibmveth") == 0) {
-            spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd,
-                              xics_find_qirq(spapr->icp, irq), irq);
+            spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
         } else {
             fprintf(stderr, "pSeries (sPAPR) platform does not support "
                     "NIC model '%s' (only ibmveth is supported)\n",
@@ -387,9 +384,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     }
 
     for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
-        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i,
-                           xics_find_qirq(spapr->icp, irq), irq);
-        irq++;
+        spapr_vscsi_create(spapr->vio_bus, 0x2000 + i);
     }
 
     if (kernel_filename) {
diff --git a/hw/spapr.h b/hw/spapr.h
index 263691b..009c459 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -1,6 +1,8 @@
 #if !defined(__HW_SPAPR_H__)
 #define __HW_SPAPR_H__
 
+#include "hw/xics.h"
+
 struct VIOsPAPRBus;
 struct icp_state;
 
@@ -278,6 +280,12 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(CPUState *env, target_ulong opcode,
                              target_ulong *args);
 
+static inline qemu_irq spapr_find_qirq(sPAPREnvironment *spapr,
+                                        int irq_num)
+{
+    return xics_find_qirq(spapr->icp, irq_num);
+}
+
 static inline uint32_t rtas_ld(target_ulong phys, int n)
 {
     return ldl_be_phys(phys + 4*n);
diff --git a/hw/spapr_llan.c b/hw/spapr_llan.c
index c18efc7..2597748 100644
--- a/hw/spapr_llan.c
+++ b/hw/spapr_llan.c
@@ -195,11 +195,9 @@ static int spapr_vlan_init(VIOsPAPRDevice *sdev)
     return 0;
 }
 
-void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd,
-                       qemu_irq qirq, uint32_t vio_irq_num)
+void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd)
 {
     DeviceState *dev;
-    VIOsPAPRDevice *sdev;
 
     dev = qdev_create(&bus->bus, "spapr-vlan");
     qdev_prop_set_uint32(dev, "reg", reg);
@@ -207,9 +205,6 @@ void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd,
     qdev_set_nic_properties(dev, nd);
 
     qdev_init_nofail(dev);
-    sdev = (VIOsPAPRDevice *)dev;
-    sdev->qirq = qirq;
-    sdev->vio_irq_num = vio_irq_num;
 }
 
 static int spapr_vlan_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index ce6558b..ba2e1c1 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -32,6 +32,7 @@
 
 #include "hw/spapr.h"
 #include "hw/spapr_vio.h"
+#include "hw/xics.h"
 
 #ifdef CONFIG_FDT
 #include <libfdt.h>
@@ -595,6 +596,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev, DeviceInfo *qinfo)
 {
     VIOsPAPRDeviceInfo *info = (VIOsPAPRDeviceInfo *)qinfo;
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
+    VIOsPAPRBus *bus = DO_UPCAST(VIOsPAPRBus, bus, dev->qdev.parent_bus);
     char *id;
 
     if (asprintf(&id, "%s@%x", info->dt_name, dev->reg) < 0) {
@@ -602,6 +604,8 @@ static int spapr_vio_busdev_init(DeviceState *qdev, DeviceInfo *qinfo)
     }
 
     dev->qdev.id = id;
+    dev->vio_irq_num = bus->irq++;
+    dev->qirq = spapr_find_qirq(spapr, dev->vio_irq_num);
 
     rtce_init(dev);
 
@@ -656,6 +660,7 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
 
     qbus = qbus_create(&spapr_vio_bus_info, dev, "spapr-vio");
     bus = DO_UPCAST(VIOsPAPRBus, bus, qbus);
+    bus->irq = 16;
 
     /* hcall-vio */
     spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index 603a8c4..faa5d94 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -62,6 +62,7 @@ typedef struct VIOsPAPRDevice {
 
 typedef struct VIOsPAPRBus {
     BusState bus;
+    int irq;
 } VIOsPAPRBus;
 
 typedef struct {
@@ -98,15 +99,9 @@ uint64_t ldq_tce(VIOsPAPRDevice *dev, uint64_t taddr);
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
 void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len);
-void spapr_vty_create(VIOsPAPRBus *bus,
-                      uint32_t reg, CharDriverState *chardev,
-                      qemu_irq qirq, uint32_t vio_irq_num);
-
-void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd,
-                       qemu_irq qirq, uint32_t vio_irq_num);
-
-void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
-                        qemu_irq qirq, uint32_t vio_irq_num);
+void spapr_vty_create(VIOsPAPRBus *bus, uint32_t reg, CharDriverState *chardev);
+void spapr_vlan_create(VIOsPAPRBus *bus, uint32_t reg, NICInfo *nd);
+void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg);
 
 int spapr_tce_set_bypass(uint32_t unit, uint32_t enable);
 void spapr_vio_quiesce(void);
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index fc9ac6a..d2d0415 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -893,20 +893,14 @@ static int spapr_vscsi_init(VIOsPAPRDevice *dev)
     return 0;
 }
 
-void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg,
-                        qemu_irq qirq, uint32_t vio_irq_num)
+void spapr_vscsi_create(VIOsPAPRBus *bus, uint32_t reg)
 {
     DeviceState *dev;
-    VIOsPAPRDevice *sdev;
 
     dev = qdev_create(&bus->bus, "spapr-vscsi");
     qdev_prop_set_uint32(dev, "reg", reg);
 
     qdev_init_nofail(dev);
-
-    sdev = (VIOsPAPRDevice *)dev;
-    sdev->qirq = qirq;
-    sdev->vio_irq_num = vio_irq_num;
 }
 
 static int spapr_vscsi_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c
index f5046d9..607b81b 100644
--- a/hw/spapr_vty.c
+++ b/hw/spapr_vty.c
@@ -115,20 +115,14 @@ static target_ulong h_get_term_char(CPUState *env, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
-void spapr_vty_create(VIOsPAPRBus *bus,
-                      uint32_t reg, CharDriverState *chardev,
-                      qemu_irq qirq, uint32_t vio_irq_num)
+void spapr_vty_create(VIOsPAPRBus *bus, uint32_t reg, CharDriverState *chardev)
 {
     DeviceState *dev;
-    VIOsPAPRDevice *sdev;
 
     dev = qdev_create(&bus->bus, "spapr-vty");
     qdev_prop_set_uint32(dev, "reg", reg);
     qdev_prop_set_chr(dev, "chardev", chardev);
     qdev_init_nofail(dev);
-    sdev = (VIOsPAPRDevice *)dev;
-    sdev->qirq = qirq;
-    sdev->vio_irq_num = vio_irq_num;
 }
 
 static void vty_hcalls(VIOsPAPRBus *bus)
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 01/58] spapr: proper qdevification
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 01/58] spapr: proper qdevification Alexander Graf
@ 2011-09-15  3:14   ` David Gibson
  2011-09-15  7:01     ` Paolo Bonzini
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-15  3:14 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Wed, Sep 14, 2011 at 10:42:25AM +0200, Alexander Graf wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> Right now the spapr devices cannot be instantiated with -device,
> because the IRQs need to be passed to the spapr_*_create functions.
> Do this instead in the bus's init wrapper.
> 
> This is particularly important with the conversion from scsi-disk
> to scsi-{cd,hd} that Markus made.  After his patches, if you
> specify a scsi-cd device attached to an if=none drive, the default
> VSCSI controller will not be created and, without qdevification,
> you will not be able to add yours.
> 
> NOTE from agraf: added small compile fix
Thanks for fixing this, Paolo.  Since writing that code, I've realised
it doesn't really fit the model correctly, but haven't gotten around
to fixing it yet.
I will make a later patch to move the irq allocation from the vio bus
to the xics itself, which will matter once we add PCI and/or other
busses.
A question about qdev stuff.  Under PAPR, there is generally only
supposed to be one SCSI target (disk / cd / whatever) per virtual scsi
bus.  But the generic qdev code will, by default, keep assigning
devices to the existing bus until it's full.  Any thoughts on how to
sanely change that behaviour on a per-machine basis?
We'll have a similar problem later on with PCI - PAPR machines usually
only have one device per host bridge, for better isolation.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 01/58] spapr: proper qdevification
  2011-09-15  3:14   ` David Gibson
@ 2011-09-15  7:01     ` Paolo Bonzini
  2011-09-16  3:06       ` [Qemu-devel] [Qemu-ppc] " David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-15  7:01 UTC (permalink / raw)
  To: Alexander Graf, qemu-devel Developers, qemu-ppc, David Gibson
  Cc: Blue Swirl, Aurelien Jarno
On 09/15/2011 05:14 AM, David Gibson wrote:
> Under PAPR, there is generally only
> supposed to be one SCSI target (disk / cd / whatever) per virtual scsi
> bus.  But the generic qdev code will, by default, keep assigning
> devices to the existing bus until it's full.  Any thoughts on how to
> sanely change that behaviour on a per-machine basis?
You could change the if_max_devs array in blockdev.c to something 
provided by the machines.
However, I'm not sure about this, for two reasons:
1) do you mean, in Linux terms, one target per SCSI _host_ or one target 
per SCSI _channel_?  i.e. if you looks at /sys/bus/scsi/devices, right 
now it looks like
    0:0:0:0    0:0:1:0     (two targets on the same host and channel)
Should it be?
    0:0:0:0    0:1:0:0     (one target per channel)
or
    0:0:0:0    1:0:0:0     (one target per host)
If it is the former, then you are simply hitting a limitation of the 
SCSI layer in QEMU and I do have patches to make assignment more 
flexible.  Based on the Linux VSCSI driver, and based on what SLOF does, 
I'd guess that's what you mean.
2) does this matter at all?  First, when doing "real world" 
virtualization you won't use the legacy options (neither -hda/-cdrom nor 
"-drive ...,if=scsi"), you would use -device to manually assign the 
devices to their buses.  Second, why should you care in the case of 
SCSI?  It seems like a very hard limitation to me, and unlike the PCI 
case it doesn't buy you anything in terms of isolation.
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-15  7:01     ` Paolo Bonzini
@ 2011-09-16  3:06       ` David Gibson
  2011-09-16 10:41         ` Paolo Bonzini
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-16  3:06 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-ppc, Alexander Graf, qemu-devel Developers
On Thu, Sep 15, 2011 at 09:01:38AM +0200, Paolo Bonzini wrote:
> On 09/15/2011 05:14 AM, David Gibson wrote:
> >Under PAPR, there is generally only
> >supposed to be one SCSI target (disk / cd / whatever) per virtual scsi
> >bus.  But the generic qdev code will, by default, keep assigning
> >devices to the existing bus until it's full.  Any thoughts on how to
> >sanely change that behaviour on a per-machine basis?
> 
> You could change the if_max_devs array in blockdev.c to something
> provided by the machines.
> 
> However, I'm not sure about this, for two reasons:
> 
> 1) do you mean, in Linux terms, one target per SCSI _host_ or one
> target per SCSI _channel_?  i.e. if you looks at
> /sys/bus/scsi/devices, right now it looks like
> 
>    0:0:0:0    0:0:1:0     (two targets on the same host and channel)
> 
> Should it be?
> 
>    0:0:0:0    0:1:0:0     (one target per channel)
> 
> or
> 
>    0:0:0:0    1:0:0:0     (one target per host)
> 
> If it is the former, then you are simply hitting a limitation of the
> SCSI layer in QEMU and I do have patches to make assignment more
> flexible.  Based on the Linux VSCSI driver, and based on what SLOF
> does, I'd guess that's what you mean.
Well, now I'm confused.  I had a look at a pHyp machine, and Linux
seemed to see it as multiple targets on a single channel, but I'm sure
the PAPR spec says you shouldn't have that.  So I'm going to have to
look closer now.
> 2) does this matter at all?  First, when doing "real world"
> virtualization you won't use the legacy options (neither -hda/-cdrom
> nor "-drive ...,if=scsi"), you would use -device to manually assign
> the devices to their buses.
Well, perhaps, but I really prefer to have sane defaults, rather than
having to build the machine myself on the command line.
>  Second, why should you care in the case
> of SCSI?  It seems like a very hard limitation to me, and unlike the
> PCI case it doesn't buy you anything in terms of isolation.
Ah, there is a good reason on this side.  I forget the exact details,
but due to the protocol it uses there's some blocksize limit that is
only advertised per vscsi adaptor, whereas it should really be a
per-target quantity (and is different in practice for cdroms and
disks).  Of course that's arguably a bug in the vscsi protocol, but we
can't fix that.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16  3:06       ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2011-09-16 10:41         ` Paolo Bonzini
  2011-09-16 13:27           ` Thomas Huth
  2011-09-16 14:08           ` David Gibson
  0 siblings, 2 replies; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-16 10:41 UTC (permalink / raw)
  To: Alexander Graf, qemu-devel Developers, qemu-ppc, David Gibson
On 09/16/2011 05:06 AM, David Gibson wrote:
>> >
>> >  1) do you mean, in Linux terms, one target per SCSI _host_ or one
>> >  target per SCSI _channel_?  i.e. if you looks at
>> >  /sys/bus/scsi/devices, right now it looks like
>> >
>> >      0:0:0:0    0:0:1:0     (two targets on the same host and channel)
>> >
>> >  Should it be?
>> >
>> >      0:0:0:0    0:1:0:0     (one target per channel)
>> >
>> >  or
>> >
>> >      0:0:0:0    1:0:0:0     (one target per host)
>> >
>> >  If it is the former, then you are simply hitting a limitation of the
>> >  SCSI layer in QEMU and I do have patches to make assignment more
>> >  flexible.  Based on the Linux VSCSI driver, and based on what SLOF
>> >  does, I'd guess that's what you mean.
>
> Well, now I'm confused.  I had a look at a pHyp machine, and Linux
> seemed to see it as multiple targets on a single channel, but I'm sure
> the PAPR spec says you shouldn't have that.  So I'm going to have to
> look closer now.
If this is the case, there might be a bug in SLOF's probing of SCSI devices.
SLOF probes target 0/LUN 0 on eight channels, i.e. from 0:0:0 to 7:0:0. 
  Linux however shows them the same as pHyp, i.e. from 0:0:0 to 0:7:0.
The reason this works is because LUN parsing in QEMU is completely 
broken (by Ben's admission in spapr_vscsi.c :)) and so SLOF's x:0:0 and 
Linux's 0:x:0 end up referring to the same device.
Now, when implementing SCSI addressing I had two choices:
(1) leave them where Linux sees them.  This seems correct according to 
your experiments with pHyp, but then SLOF could only see 0:0:0;
(2) move the devices so that both SLOF and Linux see them as x:0:0 (one 
target per channel).  This would be inconsistent with pHyp, but it 
doesn't break either SLOF or Linux.
So, I would like to agree on a plan for merging the SCSI addressing 
series.  Right now I am doing (2), because it lets me use the current 
version of SLOF.  Is it okay for you to merge the feature with these 
semantics?
If you want to change to (1), that can be done easily.  However, it 
requires fixing SLOF, so it would have to go preferably through Alex's 
PPC tree.
(Again, that would be just the defaults---the addressing can always be 
overridden by using -device explicitly).
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 10:41         ` Paolo Bonzini
@ 2011-09-16 13:27           ` Thomas Huth
  2011-09-16 13:28             ` Paolo Bonzini
  2011-09-16 15:51             ` Benjamin Herrenschmidt
  2011-09-16 14:08           ` David Gibson
  1 sibling, 2 replies; 128+ messages in thread
From: Thomas Huth @ 2011-09-16 13:27 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: David Gibson, qemu-ppc, Alexander Graf, qemu-devel Developers
 Hi all!
Am Fri, 16 Sep 2011 12:41:40 +0200
schrieb Paolo Bonzini <pbonzini@redhat.com>:
> On 09/16/2011 05:06 AM, David Gibson wrote:
> >> >
> >> >  1) do you mean, in Linux terms, one target per SCSI _host_ or one
> >> >  target per SCSI _channel_?  i.e. if you looks at
> >> >  /sys/bus/scsi/devices, right now it looks like
> >> >
> >> >      0:0:0:0    0:0:1:0     (two targets on the same host and channel)
> >> >
> >> >  Should it be?
> >> >
> >> >      0:0:0:0    0:1:0:0     (one target per channel)
> >> >
> >> >  or
> >> >
> >> >      0:0:0:0    1:0:0:0     (one target per host)
> >> >
> >> >  If it is the former, then you are simply hitting a limitation of the
> >> >  SCSI layer in QEMU and I do have patches to make assignment more
> >> >  flexible.  Based on the Linux VSCSI driver, and based on what SLOF
> >> >  does, I'd guess that's what you mean.
> >
> > Well, now I'm confused.  I had a look at a pHyp machine, and Linux
> > seemed to see it as multiple targets on a single channel, but I'm sure
> > the PAPR spec says you shouldn't have that.  So I'm going to have to
> > look closer now.
> 
> If this is the case, there might be a bug in SLOF's probing of SCSI devices.
> 
> SLOF probes target 0/LUN 0 on eight channels, i.e. from 0:0:0 to 7:0:0. 
>   Linux however shows them the same as pHyp, i.e. from 0:0:0 to 0:7:0.
> 
> The reason this works is because LUN parsing in QEMU is completely 
> broken (by Ben's admission in spapr_vscsi.c :)) and so SLOF's x:0:0 and 
> Linux's 0:x:0 end up referring to the same device.
I've done some readings about this problem today, and I think I've got
an idea what might be wrong here - seems like a bug in SLOF to me.
First, according to the SLOF source code, it seems to me that its
intention is to to scan target IDs, not channels (but as I haven't
written that part, I am not 100% sure here).
Then I compared how Linux and SLOF fill the 64-bit LUN field in the
SRP_CMD request structure, and they both fill in the target ID at the
same location - but Linux is additionally setting an additional bit in
first byte (see the "lun_from_dev" function in ibmvscsi.c of the
kernel).
Looking at the "SCSI Architecture Model" specification, this additional
bit is used to select the "Logical unit addressing method" instead of
the "Peripheral device addressing method" that SLOF currently uses - and
the "Logical unit addressing method" sounds more reasonable to me when
looking at the places where SLOF tries to fill in the target ID.
So I suggest that I change SLOF to also use the "Logical unit
addressing method" like Linux does, which should result in the fact that
SLOF tries to scan the target IDs instead of the channels/bus IDs.
What do you think, does that sound ok?
 Regards,
  Thomas
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 13:27           ` Thomas Huth
@ 2011-09-16 13:28             ` Paolo Bonzini
  2011-09-16 15:51             ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-16 13:28 UTC (permalink / raw)
  To: Thomas Huth; +Cc: David Gibson, qemu-ppc, Alexander Graf, qemu-devel Developers
On 09/16/2011 03:27 PM, Thomas Huth wrote:
> So I suggest that I change SLOF to also use the "Logical unit
> addressing method" like Linux does, which should result in the fact that
> SLOF tries to scan the target IDs instead of the channels/bus IDs.
>
> What do you think, does that sound ok?
Yes, that's perfect.
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 13:27           ` Thomas Huth
  2011-09-16 13:28             ` Paolo Bonzini
@ 2011-09-16 15:51             ` Benjamin Herrenschmidt
  2011-09-19  6:55               ` Thomas Huth
  1 sibling, 1 reply; 128+ messages in thread
From: Benjamin Herrenschmidt @ 2011-09-16 15:51 UTC (permalink / raw)
  To: Thomas Huth
  Cc: Paolo Bonzini, David Gibson, qemu-ppc, Alexander Graf,
	qemu-devel Developers
> I've done some readings about this problem today, and I think I've got
> an idea what might be wrong here - seems like a bug in SLOF to me.
> 
> First, according to the SLOF source code, it seems to me that its
> intention is to to scan target IDs, not channels (but as I haven't
> written that part, I am not 100% sure here).
> 
> Then I compared how Linux and SLOF fill the 64-bit LUN field in the
> SRP_CMD request structure, and they both fill in the target ID at the
> same location - but Linux is additionally setting an additional bit in
> first byte (see the "lun_from_dev" function in ibmvscsi.c of the
> kernel).
> 
> Looking at the "SCSI Architecture Model" specification, this additional
> bit is used to select the "Logical unit addressing method" instead of
> the "Peripheral device addressing method" that SLOF currently uses - and
> the "Logical unit addressing method" sounds more reasonable to me when
> looking at the places where SLOF tries to fill in the target ID.
> 
> So I suggest that I change SLOF to also use the "Logical unit
> addressing method" like Linux does, which should result in the fact that
> SLOF tries to scan the target IDs instead of the channels/bus IDs.
 .../...
Note that in addition to that, the PAPR spec specifies only one
"device" (whatever that means) per vscsi instance.
In fact, if we are ever to support proper sg, we need that, because the
way our vscsi client driver works in linux, we can only pass one max
request size down to the client from qemu, and without that information,
sg cannot be done reliably.
Cheers,
Ben.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 15:51             ` Benjamin Herrenschmidt
@ 2011-09-19  6:55               ` Thomas Huth
  2011-09-19  6:59                 ` Paolo Bonzini
  0 siblings, 1 reply; 128+ messages in thread
From: Thomas Huth @ 2011-09-19  6:55 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Paolo Bonzini, David Gibson, qemu-ppc, Alexander Graf,
	qemu-devel Developers
Am Fri, 16 Sep 2011 12:51:31 -0300
schrieb Benjamin Herrenschmidt <benh@kernel.crashing.org>:
> 
> > I've done some readings about this problem today, and I think I've got
> > an idea what might be wrong here - seems like a bug in SLOF to me.
> > 
> > First, according to the SLOF source code, it seems to me that its
> > intention is to to scan target IDs, not channels (but as I haven't
> > written that part, I am not 100% sure here).
> > 
> > Then I compared how Linux and SLOF fill the 64-bit LUN field in the
> > SRP_CMD request structure, and they both fill in the target ID at the
> > same location - but Linux is additionally setting an additional bit in
> > first byte (see the "lun_from_dev" function in ibmvscsi.c of the
> > kernel).
> > 
> > Looking at the "SCSI Architecture Model" specification, this additional
> > bit is used to select the "Logical unit addressing method" instead of
> > the "Peripheral device addressing method" that SLOF currently uses - and
> > the "Logical unit addressing method" sounds more reasonable to me when
> > looking at the places where SLOF tries to fill in the target ID.
> > 
> > So I suggest that I change SLOF to also use the "Logical unit
> > addressing method" like Linux does, which should result in the fact that
> > SLOF tries to scan the target IDs instead of the channels/bus IDs.
> 
>  .../...
> 
> Note that in addition to that, the PAPR spec specifies only one
> "device" (whatever that means) per vscsi instance.
Really? In that case, I wonder why Linux is using the "Logical unit
addressing format" with target IDs and bus numbers instead of the
"Flat space addressing method" for vscsi ... according to
drivers/scsi/ibmvscsi/ibmvscsi.c :
static inline u16 lun_from_dev(struct scsi_device *dev)
{
	return (0x2 << 14) | (dev->id << 8) | (dev->channel << 5) | dev->lun;
}
In case there's really only one device per vscsi instance, shouldn't
that code use addressing method 0x1 instead of 0x2 here?
 Thomas
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-19  6:55               ` Thomas Huth
@ 2011-09-19  6:59                 ` Paolo Bonzini
  0 siblings, 0 replies; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-19  6:59 UTC (permalink / raw)
  To: Thomas Huth; +Cc: David Gibson, qemu-ppc, Alexander Graf, qemu-devel Developers
On 09/19/2011 08:55 AM, Thomas Huth wrote:
>> >  Note that in addition to that, the PAPR spec specifies only one
>> >  "device" (whatever that means) per vscsi instance.
> Really? In that case, I wonder why Linux is using the "Logical unit
> addressing format" with target IDs and bus numbers instead of the
> "Flat space addressing method" for vscsi ... according to
> drivers/scsi/ibmvscsi/ibmvscsi.c :
>
> static inline u16 lun_from_dev(struct scsi_device *dev)
> {
> 	return (0x2<<  14) | (dev->id<<  8) | (dev->channel<<  5) | dev->lun;
> }
>
> In case there's really only one device per vscsi instance, shouldn't
> that code use addressing method 0x1 instead of 0x2 here?
As long as dev->id == 0, dev->channel == 0, dev->lun < 31, the three 
addressing methods are all equivalent.
Some comments in ibmvscsi.c say that iOS needs non-zero channels, so 
there does seem to be someone else who doesn't follow the PAPR spec too 
well. :)
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread
 
 
 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 10:41         ` Paolo Bonzini
  2011-09-16 13:27           ` Thomas Huth
@ 2011-09-16 14:08           ` David Gibson
  2011-09-19  6:50             ` Paolo Bonzini
  1 sibling, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-16 14:08 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-ppc, Alexander Graf, qemu-devel Developers
On Fri, Sep 16, 2011 at 12:41:40PM +0200, Paolo Bonzini wrote:
> On 09/16/2011 05:06 AM, David Gibson wrote:
> >>>
> >>>  1) do you mean, in Linux terms, one target per SCSI _host_ or one
> >>>  target per SCSI _channel_?  i.e. if you looks at
> >>>  /sys/bus/scsi/devices, right now it looks like
> >>>
> >>>      0:0:0:0    0:0:1:0     (two targets on the same host and channel)
> >>>
> >>>  Should it be?
> >>>
> >>>      0:0:0:0    0:1:0:0     (one target per channel)
> >>>
> >>>  or
> >>>
> >>>      0:0:0:0    1:0:0:0     (one target per host)
> >>>
> >>>  If it is the former, then you are simply hitting a limitation of the
> >>>  SCSI layer in QEMU and I do have patches to make assignment more
> >>>  flexible.  Based on the Linux VSCSI driver, and based on what SLOF
> >>>  does, I'd guess that's what you mean.
> >
> >Well, now I'm confused.  I had a look at a pHyp machine, and Linux
> >seemed to see it as multiple targets on a single channel, but I'm sure
> >the PAPR spec says you shouldn't have that.  So I'm going to have to
> >look closer now.
> 
> If this is the case, there might be a bug in SLOF's probing of SCSI
> devices.
Um.. I'm confused.  This is a pHyp (aka PowerVM) machine, so there is
no SLOF.  What I'm seeing there seems to contradict the PAPR spec
which is supposed to describe it.  So I don't see how it has a bearing
on SLOF addressing.
> SLOF probes target 0/LUN 0 on eight channels, i.e. from 0:0:0 to
> 7:0:0.  Linux however shows them the same as pHyp, i.e. from 0:0:0
> to 0:7:0.
> 
> The reason this works is because LUN parsing in QEMU is completely
> broken (by Ben's admission in spapr_vscsi.c :)) and so SLOF's x:0:0
> and Linux's 0:x:0 end up referring to the same device.
> 
> Now, when implementing SCSI addressing I had two choices:
> 
> (1) leave them where Linux sees them.  This seems correct according
> to your experiments with pHyp, but then SLOF could only see 0:0:0;
> 
> (2) move the devices so that both SLOF and Linux see them as x:0:0
> (one target per channel).  This would be inconsistent with pHyp, but
> it doesn't break either SLOF or Linux.
> 
> So, I would like to agree on a plan for merging the SCSI addressing
> series.  Right now I am doing (2), because it lets me use the
> current version of SLOF.  Is it okay for you to merge the feature
> with these semantics?
(2) sounds like what PAPR describes to me, so that sounds fine to me.
But I still don't follow your reasoning leading up to that.
> If you want to change to (1), that can be done easily.  However, it
> requires fixing SLOF, so it would have to go preferably through
> Alex's PPC tree.
> 
> (Again, that would be just the defaults---the addressing can always
> be overridden by using -device explicitly).
> 
> Paolo
> 
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 01/58] spapr: proper qdevification
  2011-09-16 14:08           ` David Gibson
@ 2011-09-19  6:50             ` Paolo Bonzini
  0 siblings, 0 replies; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-19  6:50 UTC (permalink / raw)
  To: Alexander Graf, qemu-devel Developers, qemu-ppc
On 09/16/2011 04:08 PM, David Gibson wrote:
> > > Well, now I'm confused.  I had a look at a pHyp machine, and Linux
> > > seemed to see it as multiple targets on a single channel, but I'm sure
> > > the PAPR spec says you shouldn't have that.  So I'm going to have to
> > > look closer now.
> >
> >  If this is the case, there might be a bug in SLOF's probing of SCSI
> >  devices.
>
> Um.. I'm confused.  This is a pHyp (aka PowerVM) machine, so there is
> no SLOF.  What I'm seeing there seems to contradict the PAPR spec
> which is supposed to describe it.  So I don't see how it has a bearing
> on SLOF addressing.
I meant "if we want to make QEMU present devices like pHyp, we cannot do 
that without fixing SLOF".
> >  (2) move the devices so that both SLOF and Linux see them as x:0:0
> >  (one target per channel).  This would be inconsistent with pHyp, but
> >  it doesn't break either SLOF or Linux.
> >
>
> (2) sounds like what PAPR describes to me, so that sounds fine to me.
No, PAPR describes one target per *host*, not channel.
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
 
 
 
- * [Qemu-devel] [PATCH 02/58] spapr: prepare for qdevification of irq
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 01/58] spapr: proper qdevification Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev Alexander Graf
                   ` (55 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, Aurelien Jarno, David Gibson
From: Paolo Bonzini <pbonzini@redhat.com>
Restructure common properties for sPAPR devices so that IRQ definitions
can be added in one place.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr_llan.c  |    4 +---
 hw/spapr_vio.h   |    5 +++++
 hw/spapr_vscsi.c |    4 +---
 hw/spapr_vty.c   |    2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/hw/spapr_llan.c b/hw/spapr_llan.c
index 2597748..abe1297 100644
--- a/hw/spapr_llan.c
+++ b/hw/spapr_llan.c
@@ -495,9 +495,7 @@ static VIOsPAPRDeviceInfo spapr_vlan = {
     .qdev.name = "spapr-vlan",
     .qdev.size = sizeof(VIOsPAPRVLANDevice),
     .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x1000),
-        DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice, rtce_window_size,
-                           0x10000000),
+        DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev, 0x1000, 0x10000000),
         DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf),
         DEFINE_PROP_END_OF_LIST(),
     },
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index faa5d94..7eb5367 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -60,6 +60,11 @@ typedef struct VIOsPAPRDevice {
     VIOsPAPR_CRQ crq;
 } VIOsPAPRDevice;
 
+#define DEFINE_SPAPR_PROPERTIES(type, field, default_reg, default_dma_window) \
+        DEFINE_PROP_UINT32("reg", type, field.reg, default_reg), \
+        DEFINE_PROP_UINT32("dma-window", type, field.rtce_window_size, \
+                           default_dma_window)
+
 typedef struct VIOsPAPRBus {
     BusState bus;
     int irq;
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index d2d0415..6fc82f6 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -930,9 +930,7 @@ static VIOsPAPRDeviceInfo spapr_vscsi = {
     .qdev.name = "spapr-vscsi",
     .qdev.size = sizeof(VSCSIState),
     .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0x2000),
-        DEFINE_PROP_UINT32("dma-window", VIOsPAPRDevice,
-                           rtce_window_size, 0x10000000),
+        DEFINE_SPAPR_PROPERTIES(VSCSIState, vdev, 0x2000, 0x10000000),
         DEFINE_PROP_END_OF_LIST(),
     },
 };
diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c
index 607b81b..a9d4b03 100644
--- a/hw/spapr_vty.c
+++ b/hw/spapr_vty.c
@@ -140,7 +140,7 @@ static VIOsPAPRDeviceInfo spapr_vty = {
     .qdev.name = "spapr-vty",
     .qdev.size = sizeof(VIOsPAPRVTYDevice),
     .qdev.props = (Property[]) {
-        DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0),
+        DEFINE_SPAPR_PROPERTIES(VIOsPAPRVTYDevice, sdev, 0, 0),
         DEFINE_PROP_CHR("chardev", VIOsPAPRVTYDevice, chardev),
         DEFINE_PROP_END_OF_LIST(),
     },
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 01/58] spapr: proper qdevification Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 02/58] spapr: prepare for qdevification of irq Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-15  3:15   ` David Gibson
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 04/58] PPC: Move openpic to target specific code compilation Alexander Graf
                   ` (54 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, Aurelien Jarno, David Gibson
From: Paolo Bonzini <pbonzini@redhat.com>
This also lets the user see the irq in "info qtree".
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr_vio.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index ba2e1c1..0546ccb 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -52,6 +52,10 @@
 static struct BusInfo spapr_vio_bus_info = {
     .name       = "spapr-vio",
     .size       = sizeof(VIOsPAPRBus),
+    .props = (Property[]) {
+        DEFINE_PROP_UINT32("irq", VIOsPAPRDevice, vio_irq_num, 0), \
+        DEFINE_PROP_END_OF_LIST(),
+    },
 };
 
 VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg)
@@ -604,7 +608,9 @@ static int spapr_vio_busdev_init(DeviceState *qdev, DeviceInfo *qinfo)
     }
 
     dev->qdev.id = id;
-    dev->vio_irq_num = bus->irq++;
+    if (!dev->vio_irq_num) {
+        dev->vio_irq_num = bus->irq++;
+    }
     dev->qirq = spapr_find_qirq(spapr, dev->vio_irq_num);
 
     rtce_init(dev);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev Alexander Graf
@ 2011-09-15  3:15   ` David Gibson
  2011-09-15  6:51     ` Paolo Bonzini
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-15  3:15 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Wed, Sep 14, 2011 at 10:42:27AM +0200, Alexander Graf wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> This also lets the user see the irq in "info qtree".
Um.. I'm a bit confused by this one.  The previous patch comment
implies it's a preparation for this, but then you add the new irq
property to the BusInfo, rather than to the macro that goes into the
DeviceInfos.  Why does this one go in the bus, rather than the device?
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Alexander Graf <agraf@suse.de>
> Cc: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  hw/spapr_vio.c |    8 +++++++-
>  1 files changed, 7 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
> index ba2e1c1..0546ccb 100644
> --- a/hw/spapr_vio.c
> +++ b/hw/spapr_vio.c
> @@ -52,6 +52,10 @@
>  static struct BusInfo spapr_vio_bus_info = {
>      .name       = "spapr-vio",
>      .size       = sizeof(VIOsPAPRBus),
> +    .props = (Property[]) {
> +        DEFINE_PROP_UINT32("irq", VIOsPAPRDevice, vio_irq_num, 0), \
> +        DEFINE_PROP_END_OF_LIST(),
> +    },
>  };
>  
>  VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg)
> @@ -604,7 +608,9 @@ static int spapr_vio_busdev_init(DeviceState *qdev, DeviceInfo *qinfo)
>      }
>  
>      dev->qdev.id = id;
> -    dev->vio_irq_num = bus->irq++;
> +    if (!dev->vio_irq_num) {
> +        dev->vio_irq_num = bus->irq++;
> +    }
>      dev->qirq = spapr_find_qirq(spapr, dev->vio_irq_num);
>  
>      rtce_init(dev);
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev
  2011-09-15  3:15   ` David Gibson
@ 2011-09-15  6:51     ` Paolo Bonzini
  0 siblings, 0 replies; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-15  6:51 UTC (permalink / raw)
  To: Alexander Graf, qemu-devel Developers, qemu-ppc, Blue Swirl,
	Aurelien Jarno
On 09/15/2011 05:15 AM, David Gibson wrote:
>> >
>> >  This also lets the user see the irq in "info qtree".
> Um.. I'm a bit confused by this one.  The previous patch comment
> implies it's a preparation for this, but then you add the new irq
> property to the BusInfo, rather than to the macro that goes into the
> DeviceInfos.  Why does this one go in the bus, rather than the device?
Yes, I suppose that the commit message for patch 2 is a bit stale.
I put this one in the bus, rather than the device, because the default 
is common to all devices.  Instead, each device type has its own default 
reg and default dma-window.
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
- * [Qemu-devel] [PATCH 04/58] PPC: Move openpic to target specific code compilation
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (2 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 03/58] spapr: make irq customizable via qdev Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC Alexander Graf
                   ` (53 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The MPIC has some funny feature where it maps different registers to an MMIO
region depending which CPU accesses them.
To be able to reflect that, we need to make OpenPIC be compiled in the target
code, so it can access cpu_single_env.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Makefile.objs   |    1 -
 Makefile.target |    2 ++
 2 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/Makefile.objs b/Makefile.objs
index 62020d7..60c63af 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -221,7 +221,6 @@ hw-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
 hw-obj-$(CONFIG_USB_REDIR) += usb-redir.o
 
 # PPC devices
-hw-obj-$(CONFIG_OPENPIC) += openpic.o
 hw-obj-$(CONFIG_PREP_PCI) += prep_pci.o
 # Mac shared devices
 hw-obj-$(CONFIG_MACIO) += macio.o
diff --git a/Makefile.target b/Makefile.target
index f708453..2ed9099 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -252,6 +252,8 @@ obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o
 obj-ppc-y += virtex_ml507.o
 obj-ppc-$(CONFIG_KVM) += kvm_ppc.o
 obj-ppc-$(CONFIG_FDT) += device_tree.o
+# PowerPC OpenPIC
+obj-ppc-y += openpic.o
 
 # Xilinx PPC peripherals
 obj-ppc-y += xilinx_intc.o
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (3 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 04/58] PPC: Move openpic to target specific code compilation Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14 10:07   ` Peter Maydell
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 06/58] PPC: Extend MPIC MMIO range Alexander Graf
                   ` (52 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The MPIC exports a register set for each CPU connected to it. They can all
be accessed through specific registers or using a shadow page that is mapped
differently depending on which CPU accesses it.
This patch implements the shadow map, making it possible for guests to access
the CPU local registers using the same address on each CPU.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |  110 ++++++++++++++++++++++++++++++++++++++--------------------
 1 files changed, 72 insertions(+), 38 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index 26c96e2..cf89f23 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -2,6 +2,7 @@
  * OpenPIC emulation
  *
  * Copyright (c) 2004 Jocelyn Mayer
+ *               2011 Alexander Graf
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -161,6 +162,16 @@ static inline int test_bit (uint32_t *field, int bit)
     return (field[bit >> 5] & 1 << (bit & 0x1F)) != 0;
 }
 
+static int get_current_cpu(void)
+{
+  return cpu_single_env->cpu_index;
+}
+
+static uint32_t openpic_cpu_read_internal(void *opaque, target_phys_addr_t addr,
+                                          int idx);
+static void openpic_cpu_write_internal(void *opaque, target_phys_addr_t addr,
+                                       uint32_t val, int idx);
+
 enum {
     IRQ_EXTERNAL = 0x01,
     IRQ_INTERNAL = 0x02,
@@ -590,18 +601,27 @@ static void openpic_gbl_write (void *opaque, target_phys_addr_t addr, uint32_t v
     DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
     if (addr & 0xF)
         return;
-    addr &= 0xFF;
     switch (addr) {
-    case 0x00: /* FREP */
+    case 0x40:
+    case 0x50:
+    case 0x60:
+    case 0x70:
+    case 0x80:
+    case 0x90:
+    case 0xA0:
+    case 0xB0:
+        openpic_cpu_write_internal(opp, addr, val, get_current_cpu());
+        break;
+    case 0x1000: /* FREP */
         break;
-    case 0x20: /* GLBC */
+    case 0x1020: /* GLBC */
         if (val & 0x80000000 && opp->reset)
             opp->reset(opp);
         opp->glbc = val & ~0x80000000;
         break;
-    case 0x80: /* VENI */
+    case 0x1080: /* VENI */
         break;
-    case 0x90: /* PINT */
+    case 0x1090: /* PINT */
         for (idx = 0; idx < opp->nb_cpus; idx++) {
             if ((val & (1 << idx)) && !(opp->pint & (1 << idx))) {
                 DPRINTF("Raise OpenPIC RESET output for CPU %d\n", idx);
@@ -615,22 +635,20 @@ static void openpic_gbl_write (void *opaque, target_phys_addr_t addr, uint32_t v
         }
         opp->pint = val;
         break;
-#if MAX_IPI > 0
-    case 0xA0: /* IPI_IPVP */
-    case 0xB0:
-    case 0xC0:
-    case 0xD0:
+    case 0x10A0: /* IPI_IPVP */
+    case 0x10B0:
+    case 0x10C0:
+    case 0x10D0:
         {
             int idx;
-            idx = (addr - 0xA0) >> 4;
+            idx = (addr - 0x10A0) >> 4;
             write_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IPVP, val);
         }
         break;
-#endif
-    case 0xE0: /* SPVE */
+    case 0x10E0: /* SPVE */
         opp->spve = val & 0x000000FF;
         break;
-    case 0xF0: /* TIFR */
+    case 0x10F0: /* TIFR */
         opp->tifr = val;
         break;
     default:
@@ -647,36 +665,43 @@ static uint32_t openpic_gbl_read (void *opaque, target_phys_addr_t addr)
     retval = 0xFFFFFFFF;
     if (addr & 0xF)
         return retval;
-    addr &= 0xFF;
     switch (addr) {
-    case 0x00: /* FREP */
+    case 0x1000: /* FREP */
         retval = opp->frep;
         break;
-    case 0x20: /* GLBC */
+    case 0x1020: /* GLBC */
         retval = opp->glbc;
         break;
-    case 0x80: /* VENI */
+    case 0x1080: /* VENI */
         retval = opp->veni;
         break;
-    case 0x90: /* PINT */
+    case 0x1090: /* PINT */
         retval = 0x00000000;
         break;
-#if MAX_IPI > 0
-    case 0xA0: /* IPI_IPVP */
+    case 0x40:
+    case 0x50:
+    case 0x60:
+    case 0x70:
+    case 0x80:
+    case 0x90:
+    case 0xA0:
     case 0xB0:
-    case 0xC0:
-    case 0xD0:
+        retval = openpic_cpu_read_internal(opp, addr, get_current_cpu());
+        break;
+    case 0x10A0: /* IPI_IPVP */
+    case 0x10B0:
+    case 0x10C0:
+    case 0x10D0:
         {
             int idx;
-            idx = (addr - 0xA0) >> 4;
+            idx = (addr - 0x10A0) >> 4;
             retval = read_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IPVP);
         }
         break;
-#endif
-    case 0xE0: /* SPVE */
+    case 0x10E0: /* SPVE */
         retval = opp->spve;
         break;
-    case 0xF0: /* TIFR */
+    case 0x10F0: /* TIFR */
         retval = opp->tifr;
         break;
     default:
@@ -794,23 +819,23 @@ static uint32_t openpic_src_read (void *opaque, uint32_t addr)
     return retval;
 }
 
-static void openpic_cpu_write (void *opaque, target_phys_addr_t addr, uint32_t val)
+static void openpic_cpu_write_internal(void *opaque, target_phys_addr_t addr,
+                                       uint32_t val, int idx)
 {
     openpic_t *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
-    int idx, s_IRQ, n_IRQ;
+    int s_IRQ, n_IRQ;
 
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
+    DPRINTF("%s: cpu %d addr " TARGET_FMT_plx " <= %08x\n", __func__, idx,
+            addr, val);
     if (addr & 0xF)
         return;
-    addr &= 0x1FFF0;
-    idx = addr / 0x1000;
     dst = &opp->dst[idx];
     addr &= 0xFF0;
     switch (addr) {
 #if MAX_IPI > 0
-    case 0x40: /* PIPD */
+    case 0x40: /* IPIDR */
     case 0x50:
     case 0x60:
     case 0x70:
@@ -852,20 +877,24 @@ static void openpic_cpu_write (void *opaque, target_phys_addr_t addr, uint32_t v
     }
 }
 
-static uint32_t openpic_cpu_read (void *opaque, target_phys_addr_t addr)
+static void openpic_cpu_write(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+    openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12);
+}
+
+static uint32_t openpic_cpu_read_internal(void *opaque, target_phys_addr_t addr,
+                                          int idx)
 {
     openpic_t *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
     uint32_t retval;
-    int idx, n_IRQ;
+    int n_IRQ;
 
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+    DPRINTF("%s: cpu %d addr " TARGET_FMT_plx "\n", __func__, idx, addr);
     retval = 0xFFFFFFFF;
     if (addr & 0xF)
         return retval;
-    addr &= 0x1FFF0;
-    idx = addr / 0x1000;
     dst = &opp->dst[idx];
     addr &= 0xFF0;
     switch (addr) {
@@ -925,6 +954,11 @@ static uint32_t openpic_cpu_read (void *opaque, target_phys_addr_t addr)
     return retval;
 }
 
+static uint32_t openpic_cpu_read(void *opaque, target_phys_addr_t addr)
+{
+    return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
+}
+
 static void openpic_buggy_write (void *opaque,
                                  target_phys_addr_t addr, uint32_t val)
 {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC Alexander Graf
@ 2011-09-14 10:07   ` Peter Maydell
  2011-09-14 10:11     ` Alexander Graf
  2011-09-14 10:22     ` Jan Kiszka
  0 siblings, 2 replies; 128+ messages in thread
From: Peter Maydell @ 2011-09-14 10:07 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, qemu-ppc, qemu-devel Developers, Aurelien Jarno
On 14 September 2011 09:42, Alexander Graf <agraf@suse.de> wrote:
> The MPIC exports a register set for each CPU connected to it. They can all
> be accessed through specific registers or using a shadow page that is mapped
> differently depending on which CPU accesses it.
>
> This patch implements the shadow map, making it possible for guests to access
> the CPU local registers using the same address on each CPU.
> +static int get_current_cpu(void)
> +{
> +  return cpu_single_env->cpu_index;
> +}
This is the standard way of doing this (we use it on ARM as well), but
it's pretty clearly a hack. "which master sent this memory transaction"
is an attribute that ought to be passed down to the MMIO read/write
functions, really (along with other interesting things like "priv or
not?" and probably architecture specific attributes like ARM's
"secure/non-secure"); this matches how hardware does it where the
attributes are passed along as extra signals in the bus fabric.
(Sometimes hardware also does this by having buses from the different
cores be totally separate paths at the point where this kind of device
is connected, before merging together later; we don't really support
modelling that either :-))
Not a nak, just an observation while I'm thinking about it.
-- PMM
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC
  2011-09-14 10:07   ` Peter Maydell
@ 2011-09-14 10:11     ` Alexander Graf
  2011-09-14 10:22     ` Jan Kiszka
  1 sibling, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14 10:11 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Blue Swirl, qemu-ppc@nongnu.org, qemu-devel Developers,
	Aurelien Jarno
Am 14.09.2011 um 12:07 schrieb Peter Maydell <peter.maydell@linaro.org>:
> On 14 September 2011 09:42, Alexander Graf <agraf@suse.de> wrote:
>> The MPIC exports a register set for each CPU connected to it. They can all
>> be accessed through specific registers or using a shadow page that is mapped
>> differently depending on which CPU accesses it.
>> 
>> This patch implements the shadow map, making it possible for guests to access
>> the CPU local registers using the same address on each CPU.
> 
>> +static int get_current_cpu(void)
>> +{
>> +  return cpu_single_env->cpu_index;
>> +}
> 
> This is the standard way of doing this (we use it on ARM as well), but
> it's pretty clearly a hack. "which master sent this memory transaction"
> is an attribute that ought to be passed down to the MMIO read/write
> functions, really (along with other interesting things like "priv or
> not?" and probably architecture specific attributes like ARM's
> "secure/non-secure"); this matches how hardware does it where the
> attributes are passed along as extra signals in the bus fabric.
> (Sometimes hardware also does this by having buses from the different
> cores be totally separate paths at the point where this kind of device
> is connected, before merging together later; we don't really support
> modelling that either :-))
> 
> Not a nak, just an observation while I'm thinking about it.
Yeah, I tend to agree in general. I'm not 100% sure in this case, as it's almost an in-cpu device. But it would be nice to pass this information on the mmio callbacks.
However, right now this is the only way to do it, as we don't have the pretty flexible one implemented yet ;).
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC
  2011-09-14 10:07   ` Peter Maydell
  2011-09-14 10:11     ` Alexander Graf
@ 2011-09-14 10:22     ` Jan Kiszka
  2011-09-14 11:59       ` Avi Kivity
  1 sibling, 1 reply; 128+ messages in thread
From: Jan Kiszka @ 2011-09-14 10:22 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Alexander Graf, qemu-devel Developers, Blue Swirl, qemu-ppc,
	Avi Kivity, Aurelien Jarno
On 2011-09-14 12:07, Peter Maydell wrote:
> On 14 September 2011 09:42, Alexander Graf <agraf@suse.de> wrote:
>> The MPIC exports a register set for each CPU connected to it. They can all
>> be accessed through specific registers or using a shadow page that is mapped
>> differently depending on which CPU accesses it.
>>
>> This patch implements the shadow map, making it possible for guests to access
>> the CPU local registers using the same address on each CPU.
> 
>> +static int get_current_cpu(void)
>> +{
>> +  return cpu_single_env->cpu_index;
>> +}
> 
> This is the standard way of doing this (we use it on ARM as well), but
> it's pretty clearly a hack. "which master sent this memory transaction"
> is an attribute that ought to be passed down to the MMIO read/write
> functions, really (along with other interesting things like "priv or
> not?" and probably architecture specific attributes like ARM's
> "secure/non-secure"); this matches how hardware does it where the
> attributes are passed along as extra signals in the bus fabric.
> (Sometimes hardware also does this by having buses from the different
> cores be totally separate paths at the point where this kind of device
> is connected, before merging together later; we don't really support
> modelling that either :-))
> 
> Not a nak, just an observation while I'm thinking about it.
Same problem has to be solved on x86. The way the local APIC is hooked
up right now is totally broken, just works by chance because normal
guests don't seriously stress the architecture.
If we start dispatching CPU memory accesses via per-CPU memory roots,
the problem can be solved without passing additional source information
to the callbacks.
Jan
-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC
  2011-09-14 10:22     ` Jan Kiszka
@ 2011-09-14 11:59       ` Avi Kivity
  0 siblings, 0 replies; 128+ messages in thread
From: Avi Kivity @ 2011-09-14 11:59 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Peter Maydell, Alexander Graf, qemu-devel Developers, Blue Swirl,
	qemu-ppc, Aurelien Jarno
On 09/14/2011 01:22 PM, Jan Kiszka wrote:
> >
> >  This is the standard way of doing this (we use it on ARM as well), but
> >  it's pretty clearly a hack. "which master sent this memory transaction"
> >  is an attribute that ought to be passed down to the MMIO read/write
> >  functions, really (along with other interesting things like "priv or
> >  not?" and probably architecture specific attributes like ARM's
> >  "secure/non-secure"); this matches how hardware does it where the
> >  attributes are passed along as extra signals in the bus fabric.
> >  (Sometimes hardware also does this by having buses from the different
> >  cores be totally separate paths at the point where this kind of device
> >  is connected, before merging together later; we don't really support
> >  modelling that either :-))
> >
> >  Not a nak, just an observation while I'm thinking about it.
>
> Same problem has to be solved on x86. The way the local APIC is hooked
> up right now is totally broken, just works by chance because normal
> guests don't seriously stress the architecture.
That, plus SMRAM.
> If we start dispatching CPU memory accesses via per-CPU memory roots,
> the problem can be solved without passing additional source information
> to the callbacks.
For that we need a full conversion.
-- 
error compiling committee.c: too many arguments to function
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
- * [Qemu-devel] [PATCH 06/58] PPC: Extend MPIC MMIO range
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (4 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 05/58] PPC: Add CPU local MMIO regions to MPIC Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 07/58] PPC: Fix IPI support in MPIC Alexander Graf
                   ` (51 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The MPIC exports a page for each CPU that it controls. To support more than
one CPU, we need to also reserve the MMIO space according to the amount of
CPUs we want to support.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index cf89f23..f7d5583 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -128,7 +128,7 @@ enum {
 #define MPIC_MSI_REG_START        0x11C00
 #define MPIC_MSI_REG_SIZE         0x100
 #define MPIC_CPU_REG_START        0x20000
-#define MPIC_CPU_REG_SIZE         0x100
+#define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
 
 enum mpic_ide_bits {
     IDR_EP     = 0,
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 07/58] PPC: Fix IPI support in MPIC
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (5 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 06/58] PPC: Extend MPIC MMIO range Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 08/58] PPC: Set MPIC IDE for IPI to 0 Alexander Graf
                   ` (50 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The current IPI support in the MPIC code is incomplete and doesn't work. This
code adds proper support for IPIs in MPIC by using the IDE register to remember
which CPUs IPIs are still outstanding to. New triggers through the IPI trigger
register only add to the list of CPUs we want to IPI.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - Use MAX_IPI instead of hardcoded 4
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |   17 +++++++++++++++--
 1 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index f7d5583..9710ac0 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -57,7 +57,7 @@
 #define MAX_MBX     4
 #define MAX_TMR     4
 #define VECTOR_BITS 8
-#define MAX_IPI     0
+#define MAX_IPI     4
 
 #define VID (0x00000000)
 
@@ -840,7 +840,9 @@ static void openpic_cpu_write_internal(void *opaque, target_phys_addr_t addr,
     case 0x60:
     case 0x70:
         idx = (addr - 0x40) >> 4;
-        write_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IDE, val);
+        /* we use IDE as mask which CPUs to deliver the IPI to still. */
+        write_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IDE,
+                     opp->src[opp->irq_ipi0 + idx].ide | val);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
         break;
@@ -934,6 +936,17 @@ static uint32_t openpic_cpu_read_internal(void *opaque, target_phys_addr_t addr,
                 reset_bit(&src->ipvp, IPVP_ACTIVITY);
                 src->pending = 0;
             }
+
+            if ((n_IRQ >= opp->irq_ipi0) &&  (n_IRQ < (opp->irq_ipi0 + MAX_IPI))) {
+                src->ide &= ~(1 << idx);
+                if (src->ide && !test_bit(&src->ipvp, IPVP_SENSE)) {
+                    /* trigger on CPUs that didn't know about it yet */
+                    openpic_set_irq(opp, n_IRQ, 1);
+                    openpic_set_irq(opp, n_IRQ, 0);
+                    /* if all CPUs knew about it, set active bit again */
+                    set_bit(&src->ipvp, IPVP_ACTIVITY);
+                }
+            }
         }
         break;
     case 0xB0: /* PEOI */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 08/58] PPC: Set MPIC IDE for IPI to 0
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (6 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 07/58] PPC: Fix IPI support in MPIC Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 09/58] PPC: MPIC: Remove read functionality for WO registers Alexander Graf
                   ` (49 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We use the IDE register with IPIs as a mask to keep track which processors
have already acknowledged the respective interrupt. So we need to initialize
it to 0 to make sure that it doesn't accidently fire an IPI on CPU0 when the
first IPI is triggered.
Reported-by: Elie Richa <richa@adacore.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v2 -> v3:
  - fix IDE IPI reset
---
 hw/openpic.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index 9710ac0..31ad175 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -1299,6 +1299,10 @@ static void mpic_reset (void *opaque)
         mpp->src[i].ipvp = 0x80800000;
         mpp->src[i].ide  = 0x00000001;
     }
+    /* Set IDE for IPIs to 0 so we don't get spurious interrupts */
+    for (i = mpp->irq_ipi0; i < (mpp->irq_ipi0 + MAX_IPI); i++) {
+        mpp->src[i].ide = 0;
+    }
     /* Initialise IRQ destinations */
     for (i = 0; i < MAX_CPU; i++) {
         mpp->dst[i].pctp      = 0x0000000F;
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 09/58] PPC: MPIC: Remove read functionality for WO registers
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (7 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 08/58] PPC: Set MPIC IDE for IPI to 0 Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 10/58] PPC: MPIC: Fix CI bit definitions Alexander Graf
                   ` (48 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The IPI dispatch registers are write only according to every MPIC
spec I have found. So instead of pretending you could read back something
from them, better not handle them at all.
Reported-by: Elie Richa <richa@adacore.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index 31ad175..dfec52e 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -952,13 +952,6 @@ static uint32_t openpic_cpu_read_internal(void *opaque, target_phys_addr_t addr,
     case 0xB0: /* PEOI */
         retval = 0;
         break;
-#if MAX_IPI > 0
-    case 0x40: /* IDE */
-    case 0x50:
-        idx = (addr - 0x40) >> 4;
-        retval = read_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IDE);
-        break;
-#endif
     default:
         break;
     }
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 10/58] PPC: MPIC: Fix CI bit definitions
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (8 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 09/58] PPC: MPIC: Remove read functionality for WO registers Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 11/58] PPC: Bump MPIC up to 32 supported CPUs Alexander Graf
                   ` (47 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The bit definitions for critical interrupt routing are in PowerPC order
(most significant bit is 0), while we end up shifting it with normal bit
order. Turn the numbers around so we actually end up fetching the
right ones.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index dfec52e..109c1bc 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -131,11 +131,11 @@ enum {
 #define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
 
 enum mpic_ide_bits {
-    IDR_EP     = 0,
-    IDR_CI0     = 1,
-    IDR_CI1     = 2,
-    IDR_P1     = 30,
-    IDR_P0     = 31,
+    IDR_EP     = 31,
+    IDR_CI0     = 30,
+    IDR_CI1     = 29,
+    IDR_P1     = 1,
+    IDR_P0     = 0,
 };
 
 #else
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 11/58] PPC: Bump MPIC up to 32 supported CPUs
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (9 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 10/58] PPC: MPIC: Fix CI bit definitions Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 12/58] PPC: E500: create multiple envs Alexander Graf
                   ` (46 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The MPIC emulation is now capable of handling up to 32 CPUs. Reflect that in
the code exporting the numbers out and fix an integer overflow while at it.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - Max cpus is 15 due to cINT routing
  - Report nb_cpus not MAX_CPUS in MPIC capabilities
---
 hw/openpic.c |   10 +++-------
 1 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index 109c1bc..03e442b 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -63,7 +63,7 @@
 
 #elif defined(USE_MPCxxx)
 
-#define MAX_CPU     2
+#define MAX_CPU    15
 #define MAX_IRQ   128
 #define MAX_DBL     0
 #define MAX_MBX     0
@@ -507,7 +507,7 @@ static inline void write_IRQreg (openpic_t *opp, int n_IRQ,
         break;
     case IRQ_IDE:
         tmp = val & 0xC0000000;
-        tmp |= val & ((1 << MAX_CPU) - 1);
+        tmp |= val & ((1ULL << MAX_CPU) - 1);
         opp->src[n_IRQ].ide = tmp;
         DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
         break;
@@ -1283,7 +1283,7 @@ static void mpic_reset (void *opaque)
 
     mpp->glbc = 0x80000000;
     /* Initialise controller registers */
-    mpp->frep = 0x004f0002;
+    mpp->frep = 0x004f0002 | ((mpp->nb_cpus - 1) << 8);
     mpp->veni = VENI;
     mpp->pint = 0x00000000;
     mpp->spve = 0x0000FFFF;
@@ -1684,10 +1684,6 @@ qemu_irq *mpic_init (target_phys_addr_t base, int nb_cpus,
         {mpic_cpu_read, mpic_cpu_write, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
     };
 
-    /* XXX: for now, only one CPU is supported */
-    if (nb_cpus != 1)
-        return NULL;
-
     mpp = g_malloc0(sizeof(openpic_t));
 
     for (i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 12/58] PPC: E500: create multiple envs
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (10 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 11/58] PPC: Bump MPIC up to 32 supported CPUs Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 13/58] PPC: E500: Generate IRQ lines for many CPUs Alexander Graf
                   ` (45 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
When creating a VM, we should go through smp_cpus and create a virtual CPU for
every CPU the user requested. This patch adds support for that and moves some
code around to make that more convenient.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   44 +++++++++++++++++++++++++++++---------------
 1 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 1274a3e..8d05587 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -226,7 +226,7 @@ static void mpc8544ds_init(ram_addr_t ram_size,
                          const char *cpu_model)
 {
     PCIBus *pci_bus;
-    CPUState *env;
+    CPUState *env = NULL;
     uint64_t elf_entry;
     uint64_t elf_lowaddr;
     target_phys_addr_t entry=0;
@@ -240,24 +240,40 @@ static void mpc8544ds_init(ram_addr_t ram_size,
     qemu_irq *irqs, *mpic;
     DeviceState *dev;
     struct boot_info *boot_info;
+    CPUState *firstenv = NULL;
 
-    /* Setup CPU */
+    /* Setup CPUs */
     if (cpu_model == NULL) {
         cpu_model = "e500v2_v30";
     }
 
-    env = cpu_ppc_init(cpu_model);
-    if (!env) {
-        fprintf(stderr, "Unable to initialize CPU!\n");
-        exit(1);
-    }
+    for (i = 0; i < smp_cpus; i++) {
+        qemu_irq *input;
+        env = cpu_ppc_init(cpu_model);
+        if (!env) {
+            fprintf(stderr, "Unable to initialize CPU!\n");
+            exit(1);
+        }
+
+        if (!firstenv) {
+            firstenv = env;
+        }
 
-    /* XXX register timer? */
-    ppc_emb_timers_init(env, 400000000, PPC_INTERRUPT_DECR);
-    ppc_dcr_init(env, NULL, NULL);
+        env->spr[SPR_BOOKE_PIR] = env->cpu_index = i;
 
-    /* Register reset handler */
-    qemu_register_reset(mpc8544ds_cpu_reset, env);
+        /* XXX register timer? */
+        ppc_emb_timers_init(env, 400000000, PPC_INTERRUPT_DECR);
+        ppc_dcr_init(env, NULL, NULL);
+        /* XXX Enable DEC interrupts - probably wrong in the backend */
+        env->spr[SPR_40x_TCR] = 1 << 26;
+
+        /* Register reset handler */
+        boot_info = g_malloc0(sizeof(struct boot_info));
+        qemu_register_reset(mpc8544ds_cpu_reset, env);
+        env->load_info = boot_info;
+    }
+
+    env = firstenv;
 
     /* Fixup Memory size on a alignment boundary */
     ram_size &= ~(RAM_SIZES_ALIGN - 1);
@@ -336,8 +352,6 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         }
     }
 
-    boot_info = g_malloc0(sizeof(struct boot_info));
-
     /* If we're loading a kernel directly, we must load the device tree too. */
     if (kernel_filename) {
 #ifndef CONFIG_FDT
@@ -350,10 +364,10 @@ static void mpc8544ds_init(ram_addr_t ram_size,
             exit(1);
         }
 
+        boot_info = env->load_info;
         boot_info->entry = entry;
         boot_info->dt_base = dt_base;
     }
-    env->load_info = boot_info;
 
     if (kvm_enabled()) {
         kvmppc_init();
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 13/58] PPC: E500: Generate IRQ lines for many CPUs
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (11 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 12/58] PPC: E500: create multiple envs Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 14/58] device tree: add nop_node Alexander Graf
                   ` (44 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Now that we can generate multiple envs for all our virtual CPUs, we
also need to tell the MPIC that we have multiple CPUs connected and
connect them all to the respective virtual interrupt lines.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 8d05587..9cb01f3 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -237,7 +237,7 @@ static void mpc8544ds_init(ram_addr_t ram_size,
     target_long initrd_size=0;
     int i=0;
     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
-    qemu_irq *irqs, *mpic;
+    qemu_irq **irqs, *mpic;
     DeviceState *dev;
     struct boot_info *boot_info;
     CPUState *firstenv = NULL;
@@ -247,6 +247,8 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         cpu_model = "e500v2_v30";
     }
 
+    irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
+    irqs[0] = g_malloc0(smp_cpus * sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
     for (i = 0; i < smp_cpus; i++) {
         qemu_irq *input;
         env = cpu_ppc_init(cpu_model);
@@ -259,6 +261,10 @@ static void mpc8544ds_init(ram_addr_t ram_size,
             firstenv = env;
         }
 
+        irqs[i] = irqs[0] + (i * OPENPIC_OUTPUT_NB);
+        input = (qemu_irq *)env->irq_inputs;
+        irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+        irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
         env->spr[SPR_BOOKE_PIR] = env->cpu_index = i;
 
         /* XXX register timer? */
@@ -283,10 +289,11 @@ static void mpc8544ds_init(ram_addr_t ram_size,
                                  "mpc8544ds.ram", ram_size));
 
     /* MPIC */
-    irqs = g_malloc0(sizeof(qemu_irq) * OPENPIC_OUTPUT_NB);
-    irqs[OPENPIC_OUTPUT_INT] = ((qemu_irq *)env->irq_inputs)[PPCE500_INPUT_INT];
-    irqs[OPENPIC_OUTPUT_CINT] = ((qemu_irq *)env->irq_inputs)[PPCE500_INPUT_CINT];
-    mpic = mpic_init(MPC8544_MPIC_REGS_BASE, 1, &irqs, NULL);
+    mpic = mpic_init(MPC8544_MPIC_REGS_BASE, smp_cpus, irqs, NULL);
+
+    if (!mpic) {
+        cpu_abort(env, "MPIC failed to initialize\n");
+    }
 
     /* Serial */
     if (serial_hds[0]) {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 14/58] device tree: add nop_node
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (12 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 13/58] PPC: E500: Generate IRQ lines for many CPUs Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-17 16:48   ` Blue Swirl
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 15/58] PPC: bamboo: Move host fdt copy to target Alexander Graf
                   ` (43 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We have a qemu internal abstraction layer on FDT. While I'm not fully convinced
we need it at all, it's missing the nop_node functionality that we now need
on e500. So let's add it and think about the general future of that API later.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 device_tree.c |   11 +++++++++++
 device_tree.h |    1 +
 2 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/device_tree.c b/device_tree.c
index 3a224d1..23e89e3 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -107,3 +107,14 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path,
 
     return fdt_setprop_string(fdt, offset, property, string);
 }
+
+int qemu_devtree_nop_node(void *fdt, const char *node_path)
+{
+    int offset;
+
+    offset = fdt_path_offset(fdt, node_path);
+    if (offset < 0)
+        return offset;
+
+    return fdt_nop_node(fdt, offset);
+}
diff --git a/device_tree.h b/device_tree.h
index cecd98f..76fce5f 100644
--- a/device_tree.h
+++ b/device_tree.h
@@ -22,5 +22,6 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
                               const char *property, uint32_t val);
 int qemu_devtree_setprop_string(void *fdt, const char *node_path,
                                 const char *property, const char *string);
+int qemu_devtree_nop_node(void *fdt, const char *node_path);
 
 #endif /* __DEVICE_TREE_H__ */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 14/58] device tree: add nop_node
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 14/58] device tree: add nop_node Alexander Graf
@ 2011-09-17 16:48   ` Blue Swirl
  2011-09-19 11:22     ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-17 16:48 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers, Aurelien Jarno
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
> We have a qemu internal abstraction layer on FDT. While I'm not fully convinced
> we need it at all, it's missing the nop_node functionality that we now need
> on e500. So let's add it and think about the general future of that API later.
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  device_tree.c |   11 +++++++++++
>  device_tree.h |    1 +
>  2 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/device_tree.c b/device_tree.c
> index 3a224d1..23e89e3 100644
> --- a/device_tree.c
> +++ b/device_tree.c
> @@ -107,3 +107,14 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path,
>
>     return fdt_setprop_string(fdt, offset, property, string);
>  }
> +
> +int qemu_devtree_nop_node(void *fdt, const char *node_path)
> +{
> +    int offset;
> +
> +    offset = fdt_path_offset(fdt, node_path);
> +    if (offset < 0)
-EBRACES
> +        return offset;
> +
> +    return fdt_nop_node(fdt, offset);
> +}
> diff --git a/device_tree.h b/device_tree.h
> index cecd98f..76fce5f 100644
> --- a/device_tree.h
> +++ b/device_tree.h
> @@ -22,5 +22,6 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
>                               const char *property, uint32_t val);
>  int qemu_devtree_setprop_string(void *fdt, const char *node_path,
>                                 const char *property, const char *string);
> +int qemu_devtree_nop_node(void *fdt, const char *node_path);
>
>  #endif /* __DEVICE_TREE_H__ */
> --
> 1.6.0.2
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 14/58] device tree: add nop_node
  2011-09-17 16:48   ` Blue Swirl
@ 2011-09-19 11:22     ` Alexander Graf
  0 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-19 11:22 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-ppc, qemu-devel Developers, Aurelien Jarno
On 17.09.2011, at 18:48, Blue Swirl wrote:
> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>> We have a qemu internal abstraction layer on FDT. While I'm not fully convinced
>> we need it at all, it's missing the nop_node functionality that we now need
>> on e500. So let's add it and think about the general future of that API later.
>> 
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> ---
>>  device_tree.c |   11 +++++++++++
>>  device_tree.h |    1 +
>>  2 files changed, 12 insertions(+), 0 deletions(-)
>> 
>> diff --git a/device_tree.c b/device_tree.c
>> index 3a224d1..23e89e3 100644
>> --- a/device_tree.c
>> +++ b/device_tree.c
>> @@ -107,3 +107,14 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path,
>> 
>>     return fdt_setprop_string(fdt, offset, property, string);
>>  }
>> +
>> +int qemu_devtree_nop_node(void *fdt, const char *node_path)
>> +{
>> +    int offset;
>> +
>> +    offset = fdt_path_offset(fdt, node_path);
>> +    if (offset < 0)
> 
> -EBRACES
That code gets changed right after that patch anyways. The version that actually ends up being visible to the user has proper braces :)
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread
 
 
- * [Qemu-devel] [PATCH 15/58] PPC: bamboo: Move host fdt copy to target
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (13 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 14/58] device tree: add nop_node Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 16/58] PPC: KVM: Add generic function to read host clockfreq Alexander Graf
                   ` (42 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We have some code in generic kvm_ppc.c that is only used by 440. Move to
the 440 specific device code.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc440_bamboo.c   |   37 +++++++++++++++++++++++++++++++++++--
 target-ppc/kvm_ppc.c |   30 ------------------------------
 target-ppc/kvm_ppc.h |    1 -
 3 files changed, 35 insertions(+), 33 deletions(-)
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index 1addb68..65d4f0f 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -31,6 +31,38 @@
 #define FDT_ADDR     0x1800000
 #define RAMDISK_ADDR 0x1900000
 
+#ifdef CONFIG_FDT
+static int bamboo_copy_host_cell(void *fdt, const char *node, const char *prop)
+{
+    uint32_t cell;
+    int ret;
+
+    ret = kvmppc_read_host_property(node, prop, &cell, sizeof(cell));
+    if (ret < 0) {
+        fprintf(stderr, "couldn't read host %s/%s\n", node, prop);
+        goto out;
+    }
+
+    ret = qemu_devtree_setprop_cell(fdt, node, prop, cell);
+    if (ret < 0) {
+        fprintf(stderr, "couldn't set guest %s/%s\n", node, prop);
+        goto out;
+    }
+
+out:
+    return ret;
+}
+
+static void bamboo_fdt_update(void *fdt)
+{
+    /* Copy data from the host device tree into the guest. Since the guest can
+     * directly access the timebase without host involvement, we must expose
+     * the correct frequencies. */
+    bamboo_copy_host_cell(fdt, "/cpus/cpu@0", "clock-frequency");
+    bamboo_copy_host_cell(fdt, "/cpus/cpu@0", "timebase-frequency");
+}
+#endif
+
 static int bamboo_load_device_tree(target_phys_addr_t addr,
                                      uint32_t ramsize,
                                      target_phys_addr_t initrd_base,
@@ -76,8 +108,9 @@ static int bamboo_load_device_tree(target_phys_addr_t addr,
     if (ret < 0)
         fprintf(stderr, "couldn't set /chosen/bootargs\n");
 
-    if (kvm_enabled())
-        kvmppc_fdt_update(fdt);
+    if (kvm_enabled()) {
+        bamboo_fdt_update(fdt);
+    }
 
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
diff --git a/target-ppc/kvm_ppc.c b/target-ppc/kvm_ppc.c
index 867dc1d..233115e 100644
--- a/target-ppc/kvm_ppc.c
+++ b/target-ppc/kvm_ppc.c
@@ -54,36 +54,6 @@ free:
     free(path);
     return ret;
 }
-
-static int kvmppc_copy_host_cell(void *fdt, const char *node, const char *prop)
-{
-    uint32_t cell;
-    int ret;
-
-    ret = kvmppc_read_host_property(node, prop, &cell, sizeof(cell));
-    if (ret < 0) {
-        fprintf(stderr, "couldn't read host %s/%s\n", node, prop);
-        goto out;
-    }
-
-    ret = qemu_devtree_setprop_cell(fdt, node, prop, cell);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't set guest %s/%s\n", node, prop);
-        goto out;
-    }
-
-out:
-    return ret;
-}
-
-void kvmppc_fdt_update(void *fdt)
-{
-    /* Copy data from the host device tree into the guest. Since the guest can
-     * directly access the timebase without host involvement, we must expose
-     * the correct frequencies. */
-    kvmppc_copy_host_cell(fdt, "/cpus/cpu@0", "clock-frequency");
-    kvmppc_copy_host_cell(fdt, "/cpus/cpu@0", "timebase-frequency");
-}
 #endif
 
 static void kvmppc_timer_hack(void *opaque)
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 45a1373..2f32249 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -10,7 +10,6 @@
 #define __KVM_PPC_H__
 
 void kvmppc_init(void);
-void kvmppc_fdt_update(void *fdt);
 #ifndef CONFIG_KVM
 static inline int kvmppc_read_host_property(const char *node_path, const char *prop,
                                             void *val, size_t len)
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 16/58] PPC: KVM: Add generic function to read host clockfreq
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (14 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 15/58] PPC: bamboo: Move host fdt copy to target Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-15  3:16   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 17/58] PPC: E500: Use generic kvm function for freq Alexander Graf
                   ` (41 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We need to find out the host's clock-frequency when running on KVM, so
let's export a respective function.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - enable 64bit values
---
 target-ppc/kvm.c     |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 target-ppc/kvm_ppc.h |    1 +
 2 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 21f35af..77b98c4 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -14,6 +14,7 @@
  *
  */
 
+#include <dirent.h>
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -38,6 +39,8 @@
     do { } while (0)
 #endif
 
+#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
@@ -509,6 +512,70 @@ uint32_t kvmppc_get_tbfreq(void)
     return retval;
 }
 
+/* Try to find a device tree node for a CPU with clock-frequency property */
+static int kvmppc_find_cpu_dt(char *buf, int buf_len)
+{
+    struct dirent *dirp;
+    DIR *dp;
+
+    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
+        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
+        return -1;
+    }
+
+    buf[0] = '\0';
+    while ((dirp = readdir(dp)) != NULL) {
+        FILE *f;
+        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
+                 dirp->d_name);
+        f = fopen(buf, "r");
+        if (f) {
+            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
+            fclose(f);
+            break;
+        }
+        buf[0] = '\0';
+    }
+    closedir(dp);
+    if (buf[0] == '\0') {
+        printf("Unknown host!\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+uint64_t kvmppc_get_clockfreq(void)
+{
+    char buf[512];
+    uint32_t tb[2];
+    FILE *f;
+    int len;
+
+    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
+        return 0;
+    }
+
+    strncat(buf, "/clock-frequency", sizeof(buf) - strlen(buf));
+
+    f = fopen(buf, "rb");
+    if (!f) {
+        return -1;
+    }
+
+    len = fread(tb, sizeof(tb[0]), 2, f);
+    fclose(f);
+    switch (len) {
+    case 1:
+        /* freq is only a single cell */
+        return tb[0];
+    case 2:
+        return *(uint64_t*)tb;
+    }
+
+    return 0;
+}
+
 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
 {
     uint32_t *hc = (uint32_t*)buf;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 2f32249..7c08c0f 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -23,6 +23,7 @@ int kvmppc_read_host_property(const char *node_path, const char *prop,
 #endif
 
 uint32_t kvmppc_get_tbfreq(void);
+uint64_t kvmppc_get_clockfreq(void);
 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(CPUState *env, int irq, int level);
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 17/58] PPC: E500: Use generic kvm function for freq
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (15 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 16/58] PPC: KVM: Add generic function to read host clockfreq Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 18/58] PPC: E500: Remove mpc8544_copy_soc_cell Alexander Graf
                   ` (40 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Now that we have generic KVM functions to read out the host tb and clock
frequencies, let's use them in the e500 code!
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   44 +++++++++-----------------------------------
 1 files changed, 9 insertions(+), 35 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 9cb01f3..8748531 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -14,8 +14,6 @@
  * (at your option) any later version.
  */
 
-#include <dirent.h>
-
 #include "config.h"
 #include "qemu-common.h"
 #include "net.h"
@@ -96,6 +94,9 @@ static int mpc8544_load_device_tree(CPUState *env,
     int fdt_size;
     void *fdt;
     uint8_t hypercall[16];
+    char cpu_name[128] = "/cpus/PowerPC,8544@0";
+    uint32_t clock_freq = 400000000;
+    uint32_t tb_freq = 400000000;
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
     if (!filename) {
@@ -133,32 +134,9 @@ static int mpc8544_load_device_tree(CPUState *env,
         fprintf(stderr, "couldn't set /chosen/bootargs\n");
 
     if (kvm_enabled()) {
-        struct dirent *dirp;
-        DIR *dp;
-        char buf[128];
-
-        if ((dp = opendir("/proc/device-tree/cpus/")) == NULL) {
-            printf("Can't open directory /proc/device-tree/cpus/\n");
-            ret = -1;
-            goto out;
-        }
-
-        buf[0] = '\0';
-        while ((dirp = readdir(dp)) != NULL) {
-            if (strncmp(dirp->d_name, "PowerPC", 7) == 0) {
-                snprintf(buf, 128, "/cpus/%s", dirp->d_name);
-                break;
-            }
-        }
-        closedir(dp);
-        if (buf[0] == '\0') {
-            printf("Unknow host!\n");
-            ret = -1;
-            goto out;
-        }
-
-        mpc8544_copy_soc_cell(fdt, buf, "clock-frequency");
-        mpc8544_copy_soc_cell(fdt, buf, "timebase-frequency");
+        /* Read out host's frequencies */
+        clock_freq = kvmppc_get_clockfreq();
+        tb_freq = kvmppc_get_tbfreq();
 
         /* indicate KVM hypercall interface */
         qemu_devtree_setprop_string(fdt, "/hypervisor", "compatible",
@@ -166,15 +144,11 @@ static int mpc8544_load_device_tree(CPUState *env,
         kvmppc_get_hypercall(env, hypercall, sizeof(hypercall));
         qemu_devtree_setprop(fdt, "/hypervisor", "hcall-instructions",
                              hypercall, sizeof(hypercall));
-    } else {
-        const uint32_t freq = 400000000;
-
-        qemu_devtree_setprop_cell(fdt, "/cpus/PowerPC,8544@0",
-                                  "clock-frequency", freq);
-        qemu_devtree_setprop_cell(fdt, "/cpus/PowerPC,8544@0",
-                                  "timebase-frequency", freq);
     }
 
+    qemu_devtree_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
+    qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 18/58] PPC: E500: Remove mpc8544_copy_soc_cell
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (16 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 17/58] PPC: E500: Use generic kvm function for freq Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 19/58] PPC: bamboo: Use kvm api for freq and clock frequencies Alexander Graf
                   ` (39 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We don't need mpc8544_copy_soc_cell anymore, since we're explicitly reading
host values and writing guest values respectively.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   24 ------------------------
 1 files changed, 0 insertions(+), 24 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 8748531..2c7c677 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -56,30 +56,6 @@ struct boot_info
     uint32_t entry;
 };
 
-#ifdef CONFIG_FDT
-static int mpc8544_copy_soc_cell(void *fdt, const char *node, const char *prop)
-{
-    uint32_t cell;
-    int ret;
-
-    ret = kvmppc_read_host_property(node, prop, &cell, sizeof(cell));
-    if (ret < 0) {
-        fprintf(stderr, "couldn't read host %s/%s\n", node, prop);
-        goto out;
-    }
-
-    ret = qemu_devtree_setprop_cell(fdt, "/cpus/PowerPC,8544@0",
-                                prop, cell);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't set guest /cpus/PowerPC,8544@0/%s\n", prop);
-        goto out;
-    }
-
-out:
-    return ret;
-}
-#endif
-
 static int mpc8544_load_device_tree(CPUState *env,
                                     target_phys_addr_t addr,
                                     uint32_t ramsize,
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 19/58] PPC: bamboo: Use kvm api for freq and clock frequencies
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (17 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 18/58] PPC: E500: Remove mpc8544_copy_soc_cell Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 20/58] PPC: KVM: Remove kvmppc_read_host_property Alexander Graf
                   ` (38 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Now that we have nice and shiny APIs to read out the host's clock and timebase
frequencies, let's use them in the bamboo code as well!
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc440_bamboo.c |   45 ++++++++++++---------------------------------
 1 files changed, 12 insertions(+), 33 deletions(-)
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index 65d4f0f..1523764 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -31,38 +31,6 @@
 #define FDT_ADDR     0x1800000
 #define RAMDISK_ADDR 0x1900000
 
-#ifdef CONFIG_FDT
-static int bamboo_copy_host_cell(void *fdt, const char *node, const char *prop)
-{
-    uint32_t cell;
-    int ret;
-
-    ret = kvmppc_read_host_property(node, prop, &cell, sizeof(cell));
-    if (ret < 0) {
-        fprintf(stderr, "couldn't read host %s/%s\n", node, prop);
-        goto out;
-    }
-
-    ret = qemu_devtree_setprop_cell(fdt, node, prop, cell);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't set guest %s/%s\n", node, prop);
-        goto out;
-    }
-
-out:
-    return ret;
-}
-
-static void bamboo_fdt_update(void *fdt)
-{
-    /* Copy data from the host device tree into the guest. Since the guest can
-     * directly access the timebase without host involvement, we must expose
-     * the correct frequencies. */
-    bamboo_copy_host_cell(fdt, "/cpus/cpu@0", "clock-frequency");
-    bamboo_copy_host_cell(fdt, "/cpus/cpu@0", "timebase-frequency");
-}
-#endif
-
 static int bamboo_load_device_tree(target_phys_addr_t addr,
                                      uint32_t ramsize,
                                      target_phys_addr_t initrd_base,
@@ -75,6 +43,8 @@ static int bamboo_load_device_tree(target_phys_addr_t addr,
     char *filename;
     int fdt_size;
     void *fdt;
+    uint32_t tb_freq = 400000000;
+    uint32_t clock_freq = 400000000;
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
     if (!filename) {
@@ -108,10 +78,19 @@ static int bamboo_load_device_tree(target_phys_addr_t addr,
     if (ret < 0)
         fprintf(stderr, "couldn't set /chosen/bootargs\n");
 
+    /* Copy data from the host device tree into the guest. Since the guest can
+     * directly access the timebase without host involvement, we must expose
+     * the correct frequencies. */
     if (kvm_enabled()) {
-        bamboo_fdt_update(fdt);
+        tb_freq = kvmppc_get_tbfreq();
+        clock_freq = kvmppc_get_clockfreq();
     }
 
+    qemu_devtree_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
+                              clock_freq);
+    qemu_devtree_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
+                              tb_freq);
+
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 20/58] PPC: KVM: Remove kvmppc_read_host_property
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (18 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 19/58] PPC: bamboo: Use kvm api for freq and clock frequencies Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 21/58] PPC: KVM: Add stubs for kvm helper functions Alexander Graf
                   ` (37 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We just got rid of the last user of kvmppc_read_host_property, so we
can now safely remove it.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/kvm_ppc.c |   35 -----------------------------------
 target-ppc/kvm_ppc.h |   11 -----------
 2 files changed, 0 insertions(+), 46 deletions(-)
diff --git a/target-ppc/kvm_ppc.c b/target-ppc/kvm_ppc.c
index 233115e..0c9b530 100644
--- a/target-ppc/kvm_ppc.c
+++ b/target-ppc/kvm_ppc.c
@@ -21,41 +21,6 @@
 static QEMUTimer *kvmppc_timer;
 static unsigned int kvmppc_timer_rate;
 
-#ifdef CONFIG_FDT
-int kvmppc_read_host_property(const char *node_path, const char *prop,
-                                     void *val, size_t len)
-{
-    char *path;
-    FILE *f;
-    int ret = 0;
-    int pathlen;
-
-    pathlen = snprintf(NULL, 0, "%s/%s/%s", PROC_DEVTREE_PATH, node_path, prop)
-              + 1;
-    path = g_malloc(pathlen);
-
-    snprintf(path, pathlen, "%s/%s/%s", PROC_DEVTREE_PATH, node_path, prop);
-
-    f = fopen(path, "rb");
-    if (f == NULL) {
-        ret = errno;
-        goto free;
-    }
-
-    len = fread(val, len, 1, f);
-    if (len != 1) {
-        ret = ferror(f);
-        goto close;
-    }
-
-close:
-    fclose(f);
-free:
-    free(path);
-    return ret;
-}
-#endif
-
 static void kvmppc_timer_hack(void *opaque)
 {
     qemu_service_io();
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 7c08c0f..0c659c8 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -10,17 +10,6 @@
 #define __KVM_PPC_H__
 
 void kvmppc_init(void);
-#ifndef CONFIG_KVM
-static inline int kvmppc_read_host_property(const char *node_path, const char *prop,
-                                            void *val, size_t len)
-{
-    assert(0);
-    return -ENOSYS;
-}
-#else
-int kvmppc_read_host_property(const char *node_path, const char *prop,
-                                     void *val, size_t len);
-#endif
 
 uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 21/58] PPC: KVM: Add stubs for kvm helper functions
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (19 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 20/58] PPC: KVM: Remove kvmppc_read_host_property Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 22/58] PPC: E500: Update freqs for all CPUs Alexander Graf
                   ` (36 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We have a bunch of helper functions that don't have any stubs for them in case
we don't have CONFIG_KVM enabled. That didn't bite us so far, because gcc can
optimize them out pretty well, but we should really provide them.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
   - use uint64_t for clockfreq
---
 target-ppc/kvm_ppc.h |   26 ++++++++++++++++++++++++++
 1 files changed, 26 insertions(+), 0 deletions(-)
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 0c659c8..76f98d9 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -11,11 +11,37 @@
 
 void kvmppc_init(void);
 
+#ifdef CONFIG_KVM
+
 uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(CPUState *env, int irq, int level);
 
+#else
+
+static inline uint32_t kvmppc_get_tbfreq(void)
+{
+    return 0;
+}
+
+static inline uint64_t kvmppc_get_clockfreq(void)
+{
+    return 0;
+}
+
+static inline int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
+{
+    return -1;
+}
+
+static inline int kvmppc_set_interrupt(CPUState *env, int irq, int level)
+{
+    return -1;
+}
+
+#endif
+
 #ifndef CONFIG_KVM
 #define kvmppc_eieio() do { } while (0)
 #else
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 22/58] PPC: E500: Update freqs for all CPUs
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (20 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 21/58] PPC: KVM: Add stubs for kvm helper functions Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 23/58] PPC: E500: Remove unneeded CPU nodes Alexander Graf
                   ` (35 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Now that we can so nicely find out the host's frequencies, we should also
make sure that we get them into all virtual CPUs' device tree nodes.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   10 +++++++---
 1 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 2c7c677..0791e27 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -70,9 +70,9 @@ static int mpc8544_load_device_tree(CPUState *env,
     int fdt_size;
     void *fdt;
     uint8_t hypercall[16];
-    char cpu_name[128] = "/cpus/PowerPC,8544@0";
     uint32_t clock_freq = 400000000;
     uint32_t tb_freq = 400000000;
+    int i;
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
     if (!filename) {
@@ -122,8 +122,12 @@ static int mpc8544_load_device_tree(CPUState *env,
                              hypercall, sizeof(hypercall));
     }
 
-    qemu_devtree_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
-    qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+    for (i = 0; i < smp_cpus; i++) {
+        char cpu_name[128];
+        snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", i);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+    }
 
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 23/58] PPC: E500: Remove unneeded CPU nodes
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (21 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 22/58] PPC: E500: Update freqs for all CPUs Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code Alexander Graf
                   ` (34 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We should only keep CPU nodes in the device tree around that we really have
virtual CPUs for. So remove all superfluous entries that we just keep there
in case someone wants to create a lot of vCPUs.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 0791e27..9379624 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -129,6 +129,12 @@ static int mpc8544_load_device_tree(CPUState *env,
         qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
     }
 
+    for (i = smp_cpus; i < 32; i++) {
+        char cpu_name[128];
+        snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", i);
+        qemu_devtree_nop_node(fdt, cpu_name);
+    }
+
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     g_free(fdt);
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (22 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 23/58] PPC: E500: Remove unneeded CPU nodes Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-17 16:58   ` Blue Swirl
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 25/58] PPC: E500: Update cpu-release-addr property in cpu nodes Alexander Graf
                   ` (33 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
CPUs that are not the boot CPU need to run in spinning code to check if they
should run off to execute and if so where to jump to. This usually happens
by leaving secondary CPUs looping and checking if some variable in memory
changed.
In an environment like Qemu however we can be more clever. We can just export
the spin table the primary CPU modifies as MMIO region that would event based
wake up the respective secondary CPUs. That saves us quite some cycles while
the secondary CPUs are not up yet.
So this patch adds a PV device that simply exports the spinning table into the
guest and thus allows the primary CPU to wake up secondary ones.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - change into MMIO scheme
  - map the secondary NIP instead of 0 1:1
  - only map 64MB for TLB, same as u-boot
  - prepare code for 64-bit spinnings
v2 -> v3:
  - remove r6
  - set MAS2_M
  - map EA 0
  - use second TLB1 entry
v3 -> v4:
  - change to memoryops
v4 -> v5:
  - fix endianness bugs
---
 Makefile.target        |    2 +-
 hw/ppce500_mpc8544ds.c |   33 ++++++++-
 hw/ppce500_spin.c      |  186 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 216 insertions(+), 5 deletions(-)
 create mode 100644 hw/ppce500_spin.c
diff --git a/Makefile.target b/Makefile.target
index 2ed9099..3f689ce 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -247,7 +247,7 @@ endif
 obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-ppc-y += ppc440.o ppc440_bamboo.o
 # PowerPC E500 boards
-obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o
+obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o
 # PowerPC 440 Xilinx ML507 reference board.
 obj-ppc-y += virtex_ml507.o
 obj-ppc-$(CONFIG_KVM) += kvm_ppc.o
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 9379624..3b8b449 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -49,6 +49,7 @@
 #define MPC8544_PCI_IO             0xE1000000
 #define MPC8544_PCI_IOLEN          0x10000
 #define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe0000)
+#define MPC8544_SPIN_BASE          0xEF000000
 
 struct boot_info
 {
@@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState *env,
     tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
 }
 
+static void mpc8544ds_cpu_reset_sec(void *opaque)
+{
+    CPUState *env = opaque;
+
+    cpu_reset(env);
+
+    /* Secondary CPU starts in halted state for now. Needs to change when
+       implementing non-kernel boot. */
+    env->halted = 1;
+    env->exception_index = EXCP_HLT;
+}
+
 static void mpc8544ds_cpu_reset(void *opaque)
 {
     CPUState *env = opaque;
@@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque)
     cpu_reset(env);
 
     /* Set initial guest state. */
+    env->halted = 0;
     env->gpr[1] = (16<<20) - 8;
     env->gpr[3] = bi->dt_base;
     env->nip = bi->entry;
@@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size,
     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
-    struct boot_info *boot_info;
     CPUState *firstenv = NULL;
 
     /* Setup CPUs */
@@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         env->spr[SPR_40x_TCR] = 1 << 26;
 
         /* Register reset handler */
-        boot_info = g_malloc0(sizeof(struct boot_info));
-        qemu_register_reset(mpc8544ds_cpu_reset, env);
-        env->load_info = boot_info;
+        if (!i) {
+            /* Primary CPU */
+            struct boot_info *boot_info;
+            boot_info = g_malloc0(sizeof(struct boot_info));
+            qemu_register_reset(mpc8544ds_cpu_reset, env);
+            env->load_info = boot_info;
+        } else {
+            /* Secondary CPUs */
+            qemu_register_reset(mpc8544ds_cpu_reset_sec, env);
+        }
     }
 
     env = firstenv;
@@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         }
     }
 
+    /* Register spinning region */
+    sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
+
     /* Load kernel. */
     if (kernel_filename) {
         kernel_size = load_uimage(kernel_filename, &entry, &loadaddr, NULL);
@@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size,
 
     /* If we're loading a kernel directly, we must load the device tree too. */
     if (kernel_filename) {
+        struct boot_info *boot_info;
+
 #ifndef CONFIG_FDT
         cpu_abort(env, "Compiled without FDT support - can't load kernel\n");
 #endif
diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c
new file mode 100644
index 0000000..38451ac
--- /dev/null
+++ b/hw/ppce500_spin.c
@@ -0,0 +1,186 @@
+#include "hw.h"
+#include "sysemu.h"
+#include "sysbus.h"
+#include "kvm.h"
+
+#define MAX_CPUS 32
+
+typedef struct spin_info {
+    uint64_t addr;
+    uint64_t r3;
+    uint32_t resv;
+    uint32_t pir;
+    uint64_t reserved;
+} __attribute__ ((packed)) SpinInfo;
+
+typedef struct spin_state {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    SpinInfo spin[MAX_CPUS];
+} SpinState;
+
+typedef struct spin_kick {
+    CPUState *env;
+    SpinInfo *spin;
+} SpinKick;
+
+static void spin_reset(void *opaque)
+{
+    SpinState *s = opaque;
+    int i;
+
+    for (i = 0; i < MAX_CPUS; i++) {
+        SpinInfo *info = &s->spin[i];
+
+        info->pir = i;
+        info->r3 = i;
+        info->addr = 1;
+    }
+}
+
+/* Create -kernel TLB entries for BookE, linearly spanning 256MB.  */
+static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size)
+{
+    return (ffs(size >> 10) - 1) >> 1;
+}
+
+static void mmubooke_create_initial_mapping(CPUState *env,
+                                     target_ulong va,
+                                     target_phys_addr_t pa,
+                                     target_phys_addr_t len)
+{
+    ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1);
+    target_phys_addr_t size;
+
+    size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT);
+    tlb->mas1 = MAS1_VALID | size;
+    tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M;
+    tlb->mas7_3 = pa & TARGET_PAGE_MASK;
+    tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+}
+
+static void spin_kick(void *data)
+{
+    SpinKick *kick = data;
+    CPUState *env = kick->env;
+    SpinInfo *curspin = kick->spin;
+    target_phys_addr_t map_size = 64 * 1024 * 1024;
+    target_phys_addr_t map_start;
+
+    cpu_synchronize_state(env);
+    stl_p(&curspin->pir, env->spr[SPR_PIR]);
+    env->nip = ldq_p(&curspin->addr) & (map_size - 1);
+    env->gpr[3] = ldq_p(&curspin->r3);
+    env->gpr[4] = 0;
+    env->gpr[5] = 0;
+    env->gpr[6] = 0;
+    env->gpr[7] = map_size;
+    env->gpr[8] = 0;
+    env->gpr[9] = 0;
+
+    map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
+    mmubooke_create_initial_mapping(env, 0, map_start, map_size);
+
+    env->halted = 0;
+    env->exception_index = -1;
+    qemu_cpu_kick(env);
+}
+
+static void spin_write(void *opaque, target_phys_addr_t addr, uint64_t value,
+                       unsigned len)
+{
+    SpinState *s = opaque;
+    int env_idx = addr / sizeof(SpinInfo);
+    CPUState *env;
+    SpinInfo *curspin = &s->spin[env_idx];
+    uint8_t *curspin_p = (uint8_t*)curspin;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->cpu_index == env_idx) {
+            break;
+        }
+    }
+
+    if (!env) {
+        /* Unknown CPU */
+        return;
+    }
+
+    if (!env->cpu_index) {
+        /* primary CPU doesn't spin */
+        return;
+    }
+
+    curspin_p = &curspin_p[addr % sizeof(SpinInfo)];
+    switch (len) {
+    case 1:
+        stb_p(curspin_p, value);
+        break;
+    case 2:
+        stw_p(curspin_p, value);
+        break;
+    case 4:
+        stl_p(curspin_p, value);
+        break;
+    }
+
+    if (!(ldq_p(&curspin->addr) & 1)) {
+        /* run CPU */
+        SpinKick kick = {
+            .env = env,
+            .spin = curspin,
+        };
+
+        run_on_cpu(env, spin_kick, &kick);
+    }
+}
+
+static uint64_t spin_read(void *opaque, target_phys_addr_t addr, unsigned len)
+{
+    SpinState *s = opaque;
+    uint8_t *spin_p = &((uint8_t*)s->spin)[addr];
+
+    switch (len) {
+    case 1:
+        return ldub_p(spin_p);
+    case 2:
+        return lduw_p(spin_p);
+    case 4:
+        return ldl_p(spin_p);
+    default:
+        assert(0);
+    }
+}
+
+const MemoryRegionOps spin_rw_ops = {
+    .read = spin_read,
+    .write = spin_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int ppce500_spin_initfn(SysBusDevice *dev)
+{
+    SpinState *s;
+
+    s = FROM_SYSBUS(SpinState, sysbus_from_qdev(dev));
+
+    memory_region_init_io(&s->iomem, &spin_rw_ops, s, "e500 spin pv device",
+                          sizeof(SpinInfo) * MAX_CPUS);
+    sysbus_init_mmio_region(dev, &s->iomem);
+
+    qemu_register_reset(spin_reset, s);
+
+    return 0;
+}
+
+static SysBusDeviceInfo ppce500_spin_info = {
+    .init         = ppce500_spin_initfn,
+    .qdev.name    = "e500-spin",
+    .qdev.size    = sizeof(SpinState),
+};
+
+static void ppce500_spin_register(void)
+{
+    sysbus_register_withprop(&ppce500_spin_info);
+}
+device_init(ppce500_spin_register);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code Alexander Graf
@ 2011-09-17 16:58   ` Blue Swirl
  2011-09-17 17:15     ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-17 16:58 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers, Aurelien Jarno
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
> CPUs that are not the boot CPU need to run in spinning code to check if they
> should run off to execute and if so where to jump to. This usually happens
> by leaving secondary CPUs looping and checking if some variable in memory
> changed.
>
> In an environment like Qemu however we can be more clever. We can just export
> the spin table the primary CPU modifies as MMIO region that would event based
> wake up the respective secondary CPUs. That saves us quite some cycles while
> the secondary CPUs are not up yet.
>
> So this patch adds a PV device that simply exports the spinning table into the
> guest and thus allows the primary CPU to wake up secondary ones.
On Sparc32, there is no need for a PV device. The CPU is woken up from
halted state with an IPI. Maybe you could use this approach?
> Signed-off-by: Alexander Graf <agraf@suse.de>
>
> ---
>
> v1 -> v2:
>
>  - change into MMIO scheme
>  - map the secondary NIP instead of 0 1:1
>  - only map 64MB for TLB, same as u-boot
>  - prepare code for 64-bit spinnings
>
> v2 -> v3:
>
>  - remove r6
>  - set MAS2_M
>  - map EA 0
>  - use second TLB1 entry
>
> v3 -> v4:
>
>  - change to memoryops
>
> v4 -> v5:
>
>  - fix endianness bugs
> ---
>  Makefile.target        |    2 +-
>  hw/ppce500_mpc8544ds.c |   33 ++++++++-
>  hw/ppce500_spin.c      |  186 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 216 insertions(+), 5 deletions(-)
>  create mode 100644 hw/ppce500_spin.c
>
> diff --git a/Makefile.target b/Makefile.target
> index 2ed9099..3f689ce 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -247,7 +247,7 @@ endif
>  obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
>  obj-ppc-y += ppc440.o ppc440_bamboo.o
>  # PowerPC E500 boards
> -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o
> +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o
>  # PowerPC 440 Xilinx ML507 reference board.
>  obj-ppc-y += virtex_ml507.o
>  obj-ppc-$(CONFIG_KVM) += kvm_ppc.o
> diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
> index 9379624..3b8b449 100644
> --- a/hw/ppce500_mpc8544ds.c
> +++ b/hw/ppce500_mpc8544ds.c
> @@ -49,6 +49,7 @@
>  #define MPC8544_PCI_IO             0xE1000000
>  #define MPC8544_PCI_IOLEN          0x10000
>  #define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe0000)
> +#define MPC8544_SPIN_BASE          0xEF000000
>
>  struct boot_info
>  {
> @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState *env,
>     tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
>  }
>
> +static void mpc8544ds_cpu_reset_sec(void *opaque)
> +{
> +    CPUState *env = opaque;
> +
> +    cpu_reset(env);
> +
> +    /* Secondary CPU starts in halted state for now. Needs to change when
> +       implementing non-kernel boot. */
> +    env->halted = 1;
> +    env->exception_index = EXCP_HLT;
> +}
> +
>  static void mpc8544ds_cpu_reset(void *opaque)
>  {
>     CPUState *env = opaque;
> @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque)
>     cpu_reset(env);
>
>     /* Set initial guest state. */
> +    env->halted = 0;
>     env->gpr[1] = (16<<20) - 8;
>     env->gpr[3] = bi->dt_base;
>     env->nip = bi->entry;
> @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size,
>     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
>     qemu_irq **irqs, *mpic;
>     DeviceState *dev;
> -    struct boot_info *boot_info;
>     CPUState *firstenv = NULL;
>
>     /* Setup CPUs */
> @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size,
>         env->spr[SPR_40x_TCR] = 1 << 26;
>
>         /* Register reset handler */
> -        boot_info = g_malloc0(sizeof(struct boot_info));
> -        qemu_register_reset(mpc8544ds_cpu_reset, env);
> -        env->load_info = boot_info;
> +        if (!i) {
> +            /* Primary CPU */
> +            struct boot_info *boot_info;
> +            boot_info = g_malloc0(sizeof(struct boot_info));
> +            qemu_register_reset(mpc8544ds_cpu_reset, env);
> +            env->load_info = boot_info;
> +        } else {
> +            /* Secondary CPUs */
> +            qemu_register_reset(mpc8544ds_cpu_reset_sec, env);
> +        }
>     }
>
>     env = firstenv;
> @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size,
>         }
>     }
>
> +    /* Register spinning region */
> +    sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
> +
>     /* Load kernel. */
>     if (kernel_filename) {
>         kernel_size = load_uimage(kernel_filename, &entry, &loadaddr, NULL);
> @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size,
>
>     /* If we're loading a kernel directly, we must load the device tree too. */
>     if (kernel_filename) {
> +        struct boot_info *boot_info;
> +
>  #ifndef CONFIG_FDT
>         cpu_abort(env, "Compiled without FDT support - can't load kernel\n");
>  #endif
> diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c
> new file mode 100644
> index 0000000..38451ac
> --- /dev/null
> +++ b/hw/ppce500_spin.c
> @@ -0,0 +1,186 @@
> +#include "hw.h"
> +#include "sysemu.h"
> +#include "sysbus.h"
> +#include "kvm.h"
> +
> +#define MAX_CPUS 32
> +
> +typedef struct spin_info {
> +    uint64_t addr;
> +    uint64_t r3;
> +    uint32_t resv;
> +    uint32_t pir;
> +    uint64_t reserved;
> +} __attribute__ ((packed)) SpinInfo;
This attribute isn't needed, the fields are aligned and also the
structure is internal to QEMU so misalignment wouldn't matter.
In the future, please use QEMU_PACKED.
> +
> +typedef struct spin_state {
> +    SysBusDevice busdev;
> +    MemoryRegion iomem;
> +    SpinInfo spin[MAX_CPUS];
> +} SpinState;
> +
> +typedef struct spin_kick {
> +    CPUState *env;
> +    SpinInfo *spin;
> +} SpinKick;
> +
> +static void spin_reset(void *opaque)
> +{
> +    SpinState *s = opaque;
> +    int i;
> +
> +    for (i = 0; i < MAX_CPUS; i++) {
> +        SpinInfo *info = &s->spin[i];
> +
> +        info->pir = i;
> +        info->r3 = i;
> +        info->addr = 1;
> +    }
> +}
> +
> +/* Create -kernel TLB entries for BookE, linearly spanning 256MB.  */
> +static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size)
> +{
> +    return (ffs(size >> 10) - 1) >> 1;
> +}
> +
> +static void mmubooke_create_initial_mapping(CPUState *env,
> +                                     target_ulong va,
> +                                     target_phys_addr_t pa,
> +                                     target_phys_addr_t len)
> +{
> +    ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1);
> +    target_phys_addr_t size;
> +
> +    size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT);
> +    tlb->mas1 = MAS1_VALID | size;
> +    tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M;
> +    tlb->mas7_3 = pa & TARGET_PAGE_MASK;
> +    tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
> +}
> +
> +static void spin_kick(void *data)
> +{
> +    SpinKick *kick = data;
> +    CPUState *env = kick->env;
> +    SpinInfo *curspin = kick->spin;
> +    target_phys_addr_t map_size = 64 * 1024 * 1024;
> +    target_phys_addr_t map_start;
> +
> +    cpu_synchronize_state(env);
> +    stl_p(&curspin->pir, env->spr[SPR_PIR]);
> +    env->nip = ldq_p(&curspin->addr) & (map_size - 1);
ldq_be_p() for non-PV emulation.
> +    env->gpr[3] = ldq_p(&curspin->r3);
> +    env->gpr[4] = 0;
> +    env->gpr[5] = 0;
> +    env->gpr[6] = 0;
> +    env->gpr[7] = map_size;
> +    env->gpr[8] = 0;
> +    env->gpr[9] = 0;
> +
> +    map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
> +    mmubooke_create_initial_mapping(env, 0, map_start, map_size);
> +
> +    env->halted = 0;
> +    env->exception_index = -1;
> +    qemu_cpu_kick(env);
> +}
> +
> +static void spin_write(void *opaque, target_phys_addr_t addr, uint64_t value,
> +                       unsigned len)
> +{
> +    SpinState *s = opaque;
> +    int env_idx = addr / sizeof(SpinInfo);
> +    CPUState *env;
> +    SpinInfo *curspin = &s->spin[env_idx];
> +    uint8_t *curspin_p = (uint8_t*)curspin;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        if (env->cpu_index == env_idx) {
> +            break;
> +        }
> +    }
> +
> +    if (!env) {
> +        /* Unknown CPU */
> +        return;
> +    }
> +
> +    if (!env->cpu_index) {
> +        /* primary CPU doesn't spin */
> +        return;
> +    }
> +
> +    curspin_p = &curspin_p[addr % sizeof(SpinInfo)];
> +    switch (len) {
> +    case 1:
> +        stb_p(curspin_p, value);
> +        break;
> +    case 2:
> +        stw_p(curspin_p, value);
> +        break;
> +    case 4:
> +        stl_p(curspin_p, value);
> +        break;
> +    }
> +
> +    if (!(ldq_p(&curspin->addr) & 1)) {
> +        /* run CPU */
> +        SpinKick kick = {
> +            .env = env,
> +            .spin = curspin,
> +        };
> +
> +        run_on_cpu(env, spin_kick, &kick);
> +    }
> +}
> +
> +static uint64_t spin_read(void *opaque, target_phys_addr_t addr, unsigned len)
> +{
> +    SpinState *s = opaque;
> +    uint8_t *spin_p = &((uint8_t*)s->spin)[addr];
> +
> +    switch (len) {
> +    case 1:
> +        return ldub_p(spin_p);
> +    case 2:
> +        return lduw_p(spin_p);
> +    case 4:
> +        return ldl_p(spin_p);
> +    default:
> +        assert(0);
abort()
> +    }
> +}
> +
> +const MemoryRegionOps spin_rw_ops = {
> +    .read = spin_read,
> +    .write = spin_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +};
> +
> +static int ppce500_spin_initfn(SysBusDevice *dev)
> +{
> +    SpinState *s;
> +
> +    s = FROM_SYSBUS(SpinState, sysbus_from_qdev(dev));
> +
> +    memory_region_init_io(&s->iomem, &spin_rw_ops, s, "e500 spin pv device",
> +                          sizeof(SpinInfo) * MAX_CPUS);
> +    sysbus_init_mmio_region(dev, &s->iomem);
> +
> +    qemu_register_reset(spin_reset, s);
> +
> +    return 0;
> +}
> +
> +static SysBusDeviceInfo ppce500_spin_info = {
> +    .init         = ppce500_spin_initfn,
> +    .qdev.name    = "e500-spin",
> +    .qdev.size    = sizeof(SpinState),
> +};
> +
> +static void ppce500_spin_register(void)
> +{
> +    sysbus_register_withprop(&ppce500_spin_info);
> +}
> +device_init(ppce500_spin_register);
> --
> 1.6.0.2
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-17 16:58   ` Blue Swirl
@ 2011-09-17 17:15     ` Alexander Graf
  2011-09-17 17:40       ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-17 17:15 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-ppc@nongnu.org, qemu-devel Developers, Aurelien Jarno
Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>> CPUs that are not the boot CPU need to run in spinning code to check if they
>> should run off to execute and if so where to jump to. This usually happens
>> by leaving secondary CPUs looping and checking if some variable in memory
>> changed.
>> 
>> In an environment like Qemu however we can be more clever. We can just export
>> the spin table the primary CPU modifies as MMIO region that would event based
>> wake up the respective secondary CPUs. That saves us quite some cycles while
>> the secondary CPUs are not up yet.
>> 
>> So this patch adds a PV device that simply exports the spinning table into the
>> guest and thus allows the primary CPU to wake up secondary ones.
> 
> On Sparc32, there is no need for a PV device. The CPU is woken up from
> halted state with an IPI. Maybe you could use this approach?
The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-17 17:15     ` Alexander Graf
@ 2011-09-17 17:40       ` Blue Swirl
  2011-09-19 11:35         ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-17 17:40 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc@nongnu.org, qemu-devel Developers, Aurelien Jarno
On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>
> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>
>> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>>> CPUs that are not the boot CPU need to run in spinning code to check if they
>>> should run off to execute and if so where to jump to. This usually happens
>>> by leaving secondary CPUs looping and checking if some variable in memory
>>> changed.
>>>
>>> In an environment like Qemu however we can be more clever. We can just export
>>> the spin table the primary CPU modifies as MMIO region that would event based
>>> wake up the respective secondary CPUs. That saves us quite some cycles while
>>> the secondary CPUs are not up yet.
>>>
>>> So this patch adds a PV device that simply exports the spinning table into the
>>> guest and thus allows the primary CPU to wake up secondary ones.
>>
>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>> halted state with an IPI. Maybe you could use this approach?
>
> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
OK. I hoped that there were no implementations yet. The header (btw
missing) should point to the spec.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-17 17:40       ` Blue Swirl
@ 2011-09-19 11:35         ` Alexander Graf
  2011-09-19 16:12           ` Scott Wood
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-19 11:35 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 17.09.2011, at 19:40, Blue Swirl wrote:
> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>> 
>>> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>>>> CPUs that are not the boot CPU need to run in spinning code to check if they
>>>> should run off to execute and if so where to jump to. This usually happens
>>>> by leaving secondary CPUs looping and checking if some variable in memory
>>>> changed.
>>>> 
>>>> In an environment like Qemu however we can be more clever. We can just export
>>>> the spin table the primary CPU modifies as MMIO region that would event based
>>>> wake up the respective secondary CPUs. That saves us quite some cycles while
>>>> the secondary CPUs are not up yet.
>>>> 
>>>> So this patch adds a PV device that simply exports the spinning table into the
>>>> guest and thus allows the primary CPU to wake up secondary ones.
>>> 
>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>> halted state with an IPI. Maybe you could use this approach?
>> 
>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
> 
> OK. I hoped that there were no implementations yet. The header (btw
> missing) should point to the spec.
IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-19 11:35         ` Alexander Graf
@ 2011-09-19 16:12           ` Scott Wood
  2011-09-24  7:41             ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Scott Wood @ 2011-09-19 16:12 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 09/19/2011 06:35 AM, Alexander Graf wrote:
> 
> On 17.09.2011, at 19:40, Blue Swirl wrote:
> 
>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>
>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>
>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>> halted state with an IPI. Maybe you could use this approach?
>>>
>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>
>> OK. I hoped that there were no implementations yet. The header (btw
>> missing) should point to the spec.
The goal with the spin table stuff, suboptimal as it is, was something
that would work on any powerpc implementation.  Other
implementation-specific release mechanisms are allowed, and are
indicated by a property in the cpu node, but only if the loader knows
that the OS supports it.
> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
just released which clarifies some things such as WIMG.
> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
-Scott
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-19 16:12           ` Scott Wood
@ 2011-09-24  7:41             ` Blue Swirl
  2011-09-24  8:03               ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-24  7:41 UTC (permalink / raw)
  To: Scott Wood
  Cc: Yoder Stuart-B08248, qemu-ppc, Alexander Graf, Aurelien Jarno,
	qemu-devel Developers
On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
> On 09/19/2011 06:35 AM, Alexander Graf wrote:
>>
>> On 17.09.2011, at 19:40, Blue Swirl wrote:
>>
>>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>
>>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>>
>>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>>> halted state with an IPI. Maybe you could use this approach?
>>>>
>>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>>
>>> OK. I hoped that there were no implementations yet. The header (btw
>>> missing) should point to the spec.
>
> The goal with the spin table stuff, suboptimal as it is, was something
> that would work on any powerpc implementation.  Other
> implementation-specific release mechanisms are allowed, and are
> indicated by a property in the cpu node, but only if the loader knows
> that the OS supports it.
>
>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>
> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
> just released which clarifies some things such as WIMG.
>
>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>
> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
Chapter 5.5.2 describes the table. This is actually an interface
between OS and Open Firmware, obviously there can't be a real hardware
device that magically loads r3 etc.
The device method would break abstraction layers, it's much like
vmport stuff in x86. Using a hypercall would be a small improvement.
Instead it should be possible to implement a small boot ROM which puts
the secondary CPUs into managed halt state without spinning, then the
boot CPU could send an IPI to a halted CPU to wake them up based on
the spin table, just like real HW would do. On Sparc32 OpenBIOS this
is something like a few lines of ASM on both sides.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-24  7:41             ` Blue Swirl
@ 2011-09-24  8:03               ` Alexander Graf
  2011-09-24  8:44                 ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-24  8:03 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 24.09.2011, at 09:41, Blue Swirl wrote:
> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>> On 09/19/2011 06:35 AM, Alexander Graf wrote:
>>> 
>>> On 17.09.2011, at 19:40, Blue Swirl wrote:
>>> 
>>>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>> 
>>>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>>> 
>>>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>>>> halted state with an IPI. Maybe you could use this approach?
>>>>> 
>>>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>>> 
>>>> OK. I hoped that there were no implementations yet. The header (btw
>>>> missing) should point to the spec.
>> 
>> The goal with the spin table stuff, suboptimal as it is, was something
>> that would work on any powerpc implementation.  Other
>> implementation-specific release mechanisms are allowed, and are
>> indicated by a property in the cpu node, but only if the loader knows
>> that the OS supports it.
>> 
>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>> 
>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>> just released which clarifies some things such as WIMG.
>> 
>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>> 
>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
> 
> Chapter 5.5.2 describes the table. This is actually an interface
> between OS and Open Firmware, obviously there can't be a real hardware
> device that magically loads r3 etc.
> 
> The device method would break abstraction layers, it's much like
> vmport stuff in x86. Using a hypercall would be a small improvement.
> Instead it should be possible to implement a small boot ROM which puts
> the secondary CPUs into managed halt state without spinning, then the
> boot CPU could send an IPI to a halted CPU to wake them up based on
> the spin table, just like real HW would do. On Sparc32 OpenBIOS this
> is something like a few lines of ASM on both sides.
That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott. Maybe one day we will get u-boot support. Then u-boot will spin on the CPU itself and when that time comes, we can check if we can implement a prettier version.
Btw, we can't do the IPI method without exposing something to the guest that u-boot would usually not expose. There simply is no event. All that happens is a write to memory to tell the other CPU that it should wake up. So while sending an IPI to the other CPU is the "clean" way to go, I agree, we can either be compatible or "clean". And if I get the choice I'm rather compatible.
So we have the choice between having code inside the guest that spins, maybe even only checks every x ms, by programming a timer, or we can try to make an event out of the memory write. V1 was the former, v2 (this one) is the latter. This version performs a lot better and is easier to understand.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-24  8:03               ` Alexander Graf
@ 2011-09-24  8:44                 ` Blue Swirl
  2011-09-24 10:00                   ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-24  8:44 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>
> On 24.09.2011, at 09:41, Blue Swirl wrote:
>
>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>> On 09/19/2011 06:35 AM, Alexander Graf wrote:
>>>>
>>>> On 17.09.2011, at 19:40, Blue Swirl wrote:
>>>>
>>>>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>
>>>>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>>>>
>>>>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>>>>> halted state with an IPI. Maybe you could use this approach?
>>>>>>
>>>>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>>>>
>>>>> OK. I hoped that there were no implementations yet. The header (btw
>>>>> missing) should point to the spec.
>>>
>>> The goal with the spin table stuff, suboptimal as it is, was something
>>> that would work on any powerpc implementation.  Other
>>> implementation-specific release mechanisms are allowed, and are
>>> indicated by a property in the cpu node, but only if the loader knows
>>> that the OS supports it.
>>>
>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>
>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>> just released which clarifies some things such as WIMG.
>>>
>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>
>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>
>> Chapter 5.5.2 describes the table. This is actually an interface
>> between OS and Open Firmware, obviously there can't be a real hardware
>> device that magically loads r3 etc.
>>
>> The device method would break abstraction layers, it's much like
>> vmport stuff in x86. Using a hypercall would be a small improvement.
>> Instead it should be possible to implement a small boot ROM which puts
>> the secondary CPUs into managed halt state without spinning, then the
>> boot CPU could send an IPI to a halted CPU to wake them up based on
>> the spin table, just like real HW would do. On Sparc32 OpenBIOS this
>> is something like a few lines of ASM on both sides.
>
> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott. Maybe one day we will get u-boot support. Then u-boot will spin on the CPU itself and when that time comes, we can check if we can implement a prettier version.
>
> Btw, we can't do the IPI method without exposing something to the guest that u-boot would usually not expose. There simply is no event. All that happens is a write to memory to tell the other CPU that it should wake up. So while sending an IPI to the other CPU is the "clean" way to go, I agree, we can either be compatible or "clean". And if I get the choice I'm rather compatible.
There are also warts in Sparc32 design, for example there is no
instruction to halt the CPU, instead a device (only available on some
models) can do it.
> So we have the choice between having code inside the guest that spins, maybe even only checks every x ms, by programming a timer, or we can try to make an event out of the memory write. V1 was the former, v2 (this one) is the latter. This version performs a lot better and is easier to understand.
The abstraction layers should not be broken lightly, I suppose some
performance or laziness^Wlocal optimization reasons were behind vmport
design too. The ideal way to solve this could be to detect a spinning
CPU and optimize that for all architectures, that could be tricky
though (if a CPU remains in the same TB for extended periods, inspect
the TB: if it performs a loop with a single load instruction, replace
the load by a special wait operation for any memory stores to that
page).
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-24  8:44                 ` Blue Swirl
@ 2011-09-24 10:00                   ` Alexander Graf
  2011-09-24 10:18                     ` Blue Swirl
  2011-09-26 23:19                     ` Scott Wood
  0 siblings, 2 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-24 10:00 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 24.09.2011, at 10:44, Blue Swirl wrote:
> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>> 
>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>> On 09/19/2011 06:35 AM, Alexander Graf wrote:
>>>>> 
>>>>> On 17.09.2011, at 19:40, Blue Swirl wrote:
>>>>> 
>>>>>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>> 
>>>>>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>>>>> 
>>>>>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>>>>>> halted state with an IPI. Maybe you could use this approach?
>>>>>>> 
>>>>>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>>>>> 
>>>>>> OK. I hoped that there were no implementations yet. The header (btw
>>>>>> missing) should point to the spec.
>>>> 
>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>> that would work on any powerpc implementation.  Other
>>>> implementation-specific release mechanisms are allowed, and are
>>>> indicated by a property in the cpu node, but only if the loader knows
>>>> that the OS supports it.
>>>> 
>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>> 
>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>> just released which clarifies some things such as WIMG.
>>>> 
>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>> 
>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>> 
>>> Chapter 5.5.2 describes the table. This is actually an interface
>>> between OS and Open Firmware, obviously there can't be a real hardware
>>> device that magically loads r3 etc.
>>> 
>>> The device method would break abstraction layers, it's much like
>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>> Instead it should be possible to implement a small boot ROM which puts
>>> the secondary CPUs into managed halt state without spinning, then the
>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>> the spin table, just like real HW would do. On Sparc32 OpenBIOS this
>>> is something like a few lines of ASM on both sides.
>> 
>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott. Maybe one day we will get u-boot support. Then u-boot will spin on the CPU itself and when that time comes, we can check if we can implement a prettier version.
>> 
>> Btw, we can't do the IPI method without exposing something to the guest that u-boot would usually not expose. There simply is no event. All that happens is a write to memory to tell the other CPU that it should wake up. So while sending an IPI to the other CPU is the "clean" way to go, I agree, we can either be compatible or "clean". And if I get the choice I'm rather compatible.
> 
> There are also warts in Sparc32 design, for example there is no
> instruction to halt the CPU, instead a device (only available on some
> models) can do it.
Ugh, nice :)
> 
>> So we have the choice between having code inside the guest that spins, maybe even only checks every x ms, by programming a timer, or we can try to make an event out of the memory write. V1 was the former, v2 (this one) is the latter. This version performs a lot better and is easier to understand.
> 
> The abstraction layers should not be broken lightly, I suppose some
> performance or laziness^Wlocal optimization reasons were behind vmport
> design too. The ideal way to solve this could be to detect a spinning
> CPU and optimize that for all architectures, that could be tricky
> though (if a CPU remains in the same TB for extended periods, inspect
> the TB: if it performs a loop with a single load instruction, replace
> the load by a special wait operation for any memory stores to that
> page).
I agree.
However, for now I'd like to have _something_ that we can easily replace later on. We don't do savevm or migration yet, so the danger of changing the device model from one version to the next is minimal. To the guest kernel, this is seamless, as the interface stays exactly the same.
In fact, the whole kernel loading way we go today is pretty much wrong. We should rather do it similar to OpenBIOS where firmware always loads and then pulls the kernel from QEMU using a PV interface. At that point, we would have to implement such an optimization as you suggest. Or implement a hypercall :). But at least we'd always be running the same guest software stack.
So what I'm suggesting is that for now, we're making progress and then scratch the device we're introducing here later on, when we move towards different models on how to initialize the machine. As it stands however, I much rather have working code here and concentrate on the 50 other places that are broken than optimize a case that already works well enough because it could be done prettier. Let's rather iterate over this interface again when we hit another road block. At that point in time, we'll have more experience with the shortcomings too.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-24 10:00                   ` Alexander Graf
@ 2011-09-24 10:18                     ` Blue Swirl
  2011-09-26 23:19                     ` Scott Wood
  1 sibling, 0 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-24 10:18 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Sat, Sep 24, 2011 at 10:00 AM, Alexander Graf <agraf@suse.de> wrote:
>
> On 24.09.2011, at 10:44, Blue Swirl wrote:
>
>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>
>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>
>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>> On 09/19/2011 06:35 AM, Alexander Graf wrote:
>>>>>>
>>>>>> On 17.09.2011, at 19:40, Blue Swirl wrote:
>>>>>>
>>>>>>> On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>>>
>>>>>>>> Am 17.09.2011 um 18:58 schrieb Blue Swirl <blauwirbel@gmail.com>:
>>>>>>>>
>>>>>>>>> On Sparc32, there is no need for a PV device. The CPU is woken up from
>>>>>>>>> halted state with an IPI. Maybe you could use this approach?
>>>>>>>>
>>>>>>>> The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :).
>>>>>>>
>>>>>>> OK. I hoped that there were no implementations yet. The header (btw
>>>>>>> missing) should point to the spec.
>>>>>
>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>> that would work on any powerpc implementation.  Other
>>>>> implementation-specific release mechanisms are allowed, and are
>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>> that the OS supports it.
>>>>>
>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>
>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>> just released which clarifies some things such as WIMG.
>>>>>
>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>
>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>
>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>> device that magically loads r3 etc.
>>>>
>>>> The device method would break abstraction layers, it's much like
>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>> Instead it should be possible to implement a small boot ROM which puts
>>>> the secondary CPUs into managed halt state without spinning, then the
>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>> the spin table, just like real HW would do. On Sparc32 OpenBIOS this
>>>> is something like a few lines of ASM on both sides.
>>>
>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott. Maybe one day we will get u-boot support. Then u-boot will spin on the CPU itself and when that time comes, we can check if we can implement a prettier version.
>>>
>>> Btw, we can't do the IPI method without exposing something to the guest that u-boot would usually not expose. There simply is no event. All that happens is a write to memory to tell the other CPU that it should wake up. So while sending an IPI to the other CPU is the "clean" way to go, I agree, we can either be compatible or "clean". And if I get the choice I'm rather compatible.
>>
>> There are also warts in Sparc32 design, for example there is no
>> instruction to halt the CPU, instead a device (only available on some
>> models) can do it.
>
> Ugh, nice :)
>
>>
>>> So we have the choice between having code inside the guest that spins, maybe even only checks every x ms, by programming a timer, or we can try to make an event out of the memory write. V1 was the former, v2 (this one) is the latter. This version performs a lot better and is easier to understand.
>>
>> The abstraction layers should not be broken lightly, I suppose some
>> performance or laziness^Wlocal optimization reasons were behind vmport
>> design too. The ideal way to solve this could be to detect a spinning
>> CPU and optimize that for all architectures, that could be tricky
>> though (if a CPU remains in the same TB for extended periods, inspect
>> the TB: if it performs a loop with a single load instruction, replace
>> the load by a special wait operation for any memory stores to that
>> page).
>
> I agree.
>
> However, for now I'd like to have _something_ that we can easily replace later on. We don't do savevm or migration yet, so the danger of changing the device model from one version to the next is minimal. To the guest kernel, this is seamless, as the interface stays exactly the same.
>
> In fact, the whole kernel loading way we go today is pretty much wrong. We should rather do it similar to OpenBIOS where firmware always loads and then pulls the kernel from QEMU using a PV interface. At that point, we would have to implement such an optimization as you suggest. Or implement a hypercall :). But at least we'd always be running the same guest software stack.
Fully agree, also the hypercall stuff (especially OF tree handling)
could be pushed to OpenBIOS and make it the hypervisor one day.
> So what I'm suggesting is that for now, we're making progress and then scratch the device we're introducing here later on, when we move towards different models on how to initialize the machine. As it stands however, I much rather have working code here and concentrate on the 50 other places that are broken than optimize a case that already works well enough because it could be done prettier. Let's rather iterate over this interface again when we hit another road block. At that point in time, we'll have more experience with the shortcomings too.
OK, if we all agree that the interface is temporary. Maybe the device
file should include warnings about that.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-24 10:00                   ` Alexander Graf
  2011-09-24 10:18                     ` Blue Swirl
@ 2011-09-26 23:19                     ` Scott Wood
  2011-09-27 15:50                       ` Blue Swirl
  1 sibling, 1 reply; 128+ messages in thread
From: Scott Wood @ 2011-09-26 23:19 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 09/24/2011 05:00 AM, Alexander Graf wrote:
> On 24.09.2011, at 10:44, Blue Swirl wrote:
>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>> that would work on any powerpc implementation.  Other
>>>>> implementation-specific release mechanisms are allowed, and are
>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>> that the OS supports it.
>>>>>
>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>
>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>> just released which clarifies some things such as WIMG.
>>>>>
>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>
>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>
>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>> device that magically loads r3 etc.
Not Open Firmware, but rather an ePAPR-compliant loader.
>>>> The device method would break abstraction layers, 
Which abstraction layers?
>>>> it's much like
>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>> Instead it should be possible to implement a small boot ROM which puts
>>>> the secondary CPUs into managed halt state without spinning, then the
>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>> the spin table, just like real HW would do.
The spin table, with no IPI or halt state, is what real HW does (or
rather, what software does on real HW) today.  It's ugly and inefficient
but it should work everywhere.  Anything else would be dependent on a
specific HW implementation.
>>>> On Sparc32 OpenBIOS this
>>>> is something like a few lines of ASM on both sides.
>>>
>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
I had some comments on the actual v1 implementation as well. :-)
>>> So we have the choice between having code inside the guest that
>>> spins, maybe even only checks every x ms, by programming a timer,
>>> or we can try to make an event out of the memory write. V1 was
>>> the former, v2 (this one) is the latter. This version performs a
>>> lot better and is easier to understand.
>>
>> The abstraction layers should not be broken lightly, I suppose some
>> performance or laziness^Wlocal optimization reasons were behind vmport
>> design too. The ideal way to solve this could be to detect a spinning
>> CPU and optimize that for all architectures, that could be tricky
>> though (if a CPU remains in the same TB for extended periods, inspect
>> the TB: if it performs a loop with a single load instruction, replace
>> the load by a special wait operation for any memory stores to that
>> page).
How's that going to work with KVM?
> In fact, the whole kernel loading way we go today is pretty much
> wrong. We should rather do it similar to OpenBIOS where firmware
> always loads and then pulls the kernel from QEMU using a PV
> interface. At that point, we would have to implement such an
> optimization as you suggest. Or implement a hypercall :). 
I think the current approach is more usable for most purposes.  If you
start U-Boot instead of a kernel, how do pass information on from the
user (kernel, rfs, etc)?  Require the user to create flash images[1]?
Maybe that's a useful mode of operation in some cases, but I don't think
we should be slavishly bound to it.  Think of the current approach as
something between whole-system and userspace emulation.
Where does the device tree come from?  How do you tell the guest about
what devices it has, especially in virtualization scenarios with non-PCI
passthrough devices, or custom qdev instantiations?
> But at least we'd always be running the same guest software stack.
No we wouldn't.  Any U-Boot that runs under QEMU would have to be
heavily modified, unless we want to implement a ton of random device
emulation, at least one extra memory translation layer (LAWs, localbus
windows, CCSRBAR, and such), hacks to allow locked cache lines to
operate despite a lack of backing store, etc.
-Scott
[1] Keep in mind that a major use case for e500 KVM is on host systems
that don't have a hard drive.  I want to *reduce* the amount of memory
we waste to store this stuff, not increase it.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-26 23:19                     ` Scott Wood
@ 2011-09-27 15:50                       ` Blue Swirl
  2011-09-27 15:59                         ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 15:50 UTC (permalink / raw)
  To: Scott Wood
  Cc: Yoder Stuart-B08248, qemu-ppc, Alexander Graf, Aurelien Jarno,
	qemu-devel Developers
On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>> that would work on any powerpc implementation.  Other
>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>> that the OS supports it.
>>>>>>
>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>
>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>> just released which clarifies some things such as WIMG.
>>>>>>
>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>
>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>
>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>> device that magically loads r3 etc.
>
> Not Open Firmware, but rather an ePAPR-compliant loader.
'boot program to client program interface definition'.
>>>>> The device method would break abstraction layers,
>
> Which abstraction layers?
QEMU system emulation emulates hardware, not software. Hardware
devices don't touch CPU registers.
>>>>> it's much like
>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>> the spin table, just like real HW would do.
>
> The spin table, with no IPI or halt state, is what real HW does (or
> rather, what software does on real HW) today.  It's ugly and inefficient
> but it should work everywhere.  Anything else would be dependent on a
> specific HW implementation.
Yes. Hardware doesn't ever implement the spin table.
>>>>> On Sparc32 OpenBIOS this
>>>>> is something like a few lines of ASM on both sides.
>>>>
>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>
> I had some comments on the actual v1 implementation as well. :-)
>
>>>> So we have the choice between having code inside the guest that
>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>> or we can try to make an event out of the memory write. V1 was
>>>> the former, v2 (this one) is the latter. This version performs a
>>>> lot better and is easier to understand.
>>>
>>> The abstraction layers should not be broken lightly, I suppose some
>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>> design too. The ideal way to solve this could be to detect a spinning
>>> CPU and optimize that for all architectures, that could be tricky
>>> though (if a CPU remains in the same TB for extended periods, inspect
>>> the TB: if it performs a loop with a single load instruction, replace
>>> the load by a special wait operation for any memory stores to that
>>> page).
>
> How's that going to work with KVM?
>
>> In fact, the whole kernel loading way we go today is pretty much
>> wrong. We should rather do it similar to OpenBIOS where firmware
>> always loads and then pulls the kernel from QEMU using a PV
>> interface. At that point, we would have to implement such an
>> optimization as you suggest. Or implement a hypercall :).
>
> I think the current approach is more usable for most purposes.  If you
> start U-Boot instead of a kernel, how do pass information on from the
> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
No, for example OpenBIOS gets the kernel command line from fw_cfg device.
> Maybe that's a useful mode of operation in some cases, but I don't think
> we should be slavishly bound to it.  Think of the current approach as
> something between whole-system and userspace emulation.
This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
kernel level but lower. Perhaps this mode should be enabled with
-semihosting flag or a new flag. Then the bare metal version could be
run without the flag.
> Where does the device tree come from?  How do you tell the guest about
> what devices it has, especially in virtualization scenarios with non-PCI
> passthrough devices, or custom qdev instantiations?
>
>> But at least we'd always be running the same guest software stack.
>
> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
> heavily modified, unless we want to implement a ton of random device
> emulation, at least one extra memory translation layer (LAWs, localbus
> windows, CCSRBAR, and such), hacks to allow locked cache lines to
> operate despite a lack of backing store, etc.
I'd say HW emulation business as usual. Now with the new memory API,
it should be possible to emulate the caches with line locking and TLBs
etc., this was not previously possible. IIRC implementing locked cache
lines would allow x86 to boot unmodified coreboot.
> -Scott
>
> [1] Keep in mind that a major use case for e500 KVM is on host systems
> that don't have a hard drive.  I want to *reduce* the amount of memory
> we waste to store this stuff, not increase it.
Interesting use case. Is there a display device?
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 15:50                       ` Blue Swirl
@ 2011-09-27 15:59                         ` Alexander Graf
  2011-09-27 16:53                           ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-27 15:59 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 27.09.2011, at 17:50, Blue Swirl wrote:
> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>> that the OS supports it.
>>>>>>> 
>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>> 
>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>> 
>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>> 
>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>> 
>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>> device that magically loads r3 etc.
>> 
>> Not Open Firmware, but rather an ePAPR-compliant loader.
> 
> 'boot program to client program interface definition'.
> 
>>>>>> The device method would break abstraction layers,
>> 
>> Which abstraction layers?
> 
> QEMU system emulation emulates hardware, not software. Hardware
> devices don't touch CPU registers.
The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
> 
>>>>>> it's much like
>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>> the spin table, just like real HW would do.
>> 
>> The spin table, with no IPI or halt state, is what real HW does (or
>> rather, what software does on real HW) today.  It's ugly and inefficient
>> but it should work everywhere.  Anything else would be dependent on a
>> specific HW implementation.
> 
> Yes. Hardware doesn't ever implement the spin table.
> 
>>>>>> On Sparc32 OpenBIOS this
>>>>>> is something like a few lines of ASM on both sides.
>>>>> 
>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>> 
>> I had some comments on the actual v1 implementation as well. :-)
>> 
>>>>> So we have the choice between having code inside the guest that
>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>> or we can try to make an event out of the memory write. V1 was
>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>> lot better and is easier to understand.
>>>> 
>>>> The abstraction layers should not be broken lightly, I suppose some
>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>> design too. The ideal way to solve this could be to detect a spinning
>>>> CPU and optimize that for all architectures, that could be tricky
>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>> the TB: if it performs a loop with a single load instruction, replace
>>>> the load by a special wait operation for any memory stores to that
>>>> page).
>> 
>> How's that going to work with KVM?
>> 
>>> In fact, the whole kernel loading way we go today is pretty much
>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>> always loads and then pulls the kernel from QEMU using a PV
>>> interface. At that point, we would have to implement such an
>>> optimization as you suggest. Or implement a hypercall :).
>> 
>> I think the current approach is more usable for most purposes.  If you
>> start U-Boot instead of a kernel, how do pass information on from the
>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
> 
> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
> 
>> Maybe that's a useful mode of operation in some cases, but I don't think
>> we should be slavishly bound to it.  Think of the current approach as
>> something between whole-system and userspace emulation.
> 
> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
> kernel level but lower. Perhaps this mode should be enabled with
> -semihosting flag or a new flag. Then the bare metal version could be
> run without the flag.
and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
> 
>> Where does the device tree come from?  How do you tell the guest about
>> what devices it has, especially in virtualization scenarios with non-PCI
>> passthrough devices, or custom qdev instantiations?
>> 
>>> But at least we'd always be running the same guest software stack.
>> 
>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>> heavily modified, unless we want to implement a ton of random device
>> emulation, at least one extra memory translation layer (LAWs, localbus
>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>> operate despite a lack of backing store, etc.
> 
> I'd say HW emulation business as usual. Now with the new memory API,
> it should be possible to emulate the caches with line locking and TLBs
> etc., this was not previously possible. IIRC implementing locked cache
> lines would allow x86 to boot unmodified coreboot.
So how would you emulate cache lines with line locking on KVM?
However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
> 
>> -Scott
>> 
>> [1] Keep in mind that a major use case for e500 KVM is on host systems
>> that don't have a hard drive.  I want to *reduce* the amount of memory
>> we waste to store this stuff, not increase it.
> 
> Interesting use case. Is there a display device?
There are some boards with display and/or PCI(e), yes.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 15:59                         ` Alexander Graf
@ 2011-09-27 16:53                           ` Blue Swirl
  2011-09-27 17:01                             ` Richard Henderson
  2011-09-27 17:03                             ` Alexander Graf
  0 siblings, 2 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 16:53 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>
> On 27.09.2011, at 17:50, Blue Swirl wrote:
>
>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>> that the OS supports it.
>>>>>>>>
>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>
>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>
>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>
>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>
>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>> device that magically loads r3 etc.
>>>
>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>
>> 'boot program to client program interface definition'.
>>
>>>>>>> The device method would break abstraction layers,
>>>
>>> Which abstraction layers?
>>
>> QEMU system emulation emulates hardware, not software. Hardware
>> devices don't touch CPU registers.
>
> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>
> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>
>>
>>>>>>> it's much like
>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>> the spin table, just like real HW would do.
>>>
>>> The spin table, with no IPI or halt state, is what real HW does (or
>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>> but it should work everywhere.  Anything else would be dependent on a
>>> specific HW implementation.
>>
>> Yes. Hardware doesn't ever implement the spin table.
>>
>>>>>>> On Sparc32 OpenBIOS this
>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>
>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>
>>> I had some comments on the actual v1 implementation as well. :-)
>>>
>>>>>> So we have the choice between having code inside the guest that
>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>> lot better and is easier to understand.
>>>>>
>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>> the load by a special wait operation for any memory stores to that
>>>>> page).
>>>
>>> How's that going to work with KVM?
>>>
>>>> In fact, the whole kernel loading way we go today is pretty much
>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>> always loads and then pulls the kernel from QEMU using a PV
>>>> interface. At that point, we would have to implement such an
>>>> optimization as you suggest. Or implement a hypercall :).
>>>
>>> I think the current approach is more usable for most purposes.  If you
>>> start U-Boot instead of a kernel, how do pass information on from the
>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>
>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>
>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>> we should be slavishly bound to it.  Think of the current approach as
>>> something between whole-system and userspace emulation.
>>
>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>> kernel level but lower. Perhaps this mode should be enabled with
>> -semihosting flag or a new flag. Then the bare metal version could be
>> run without the flag.
>
> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
No, but such hacks are not common.
>>
>>> Where does the device tree come from?  How do you tell the guest about
>>> what devices it has, especially in virtualization scenarios with non-PCI
>>> passthrough devices, or custom qdev instantiations?
>>>
>>>> But at least we'd always be running the same guest software stack.
>>>
>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>> heavily modified, unless we want to implement a ton of random device
>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>> operate despite a lack of backing store, etc.
>>
>> I'd say HW emulation business as usual. Now with the new memory API,
>> it should be possible to emulate the caches with line locking and TLBs
>> etc., this was not previously possible. IIRC implementing locked cache
>> lines would allow x86 to boot unmodified coreboot.
>
> So how would you emulate cache lines with line locking on KVM?
The cache would be a MMIO device which registers to handle all memory
space. Configuring the cache controller changes how the device
operates. Put this device between CPU and memory and other devices.
Performance would probably be horrible, so CPU should disable the
device automatically after some time.
> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
I don't think SeaBIOS breaks layering except for fw_cfg. For extremely
memory limited situation, perhaps QEMU (or Native KVM Tool for lean
and mean version) could be run without glibc, inside kernel or even
interfacing directly with the hypervisor. I'd also continue making it
possible to disable building unused devices and features.
>>
>>> -Scott
>>>
>>> [1] Keep in mind that a major use case for e500 KVM is on host systems
>>> that don't have a hard drive.  I want to *reduce* the amount of memory
>>> we waste to store this stuff, not increase it.
>>
>> Interesting use case. Is there a display device?
>
> There are some boards with display and/or PCI(e), yes.
>
>
> Alex
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 16:53                           ` Blue Swirl
@ 2011-09-27 17:01                             ` Richard Henderson
  2011-09-27 17:17                               ` Blue Swirl
  2011-09-27 17:03                             ` Alexander Graf
  1 sibling, 1 reply; 128+ messages in thread
From: Richard Henderson @ 2011-09-27 17:01 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Alexander Graf, qemu-devel Developers, Yoder Stuart-B08248,
	qemu-ppc, Scott Wood, Aurelien Jarno
On 09/27/2011 09:53 AM, Blue Swirl wrote:
>> > So how would you emulate cache lines with line locking on KVM?
> The cache would be a MMIO device which registers to handle all memory
> space. Configuring the cache controller changes how the device
> operates. Put this device between CPU and memory and other devices.
> Performance would probably be horrible, so CPU should disable the
> device automatically after some time.
> 
Seems like a better alternative would be to add an mmio device when
a line is actually locked.  And the device would cover *only* the
locked line.  I assume that following the boot process these lines
are unlocked, and the normal running state of the system would have
none of these mmio devices active.
r~
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:01                             ` Richard Henderson
@ 2011-09-27 17:17                               ` Blue Swirl
  2011-09-27 17:19                                 ` Richard Henderson
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 17:17 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Alexander Graf, qemu-devel Developers, Yoder Stuart-B08248,
	qemu-ppc, Scott Wood, Aurelien Jarno
On Tue, Sep 27, 2011 at 5:01 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/27/2011 09:53 AM, Blue Swirl wrote:
>>> > So how would you emulate cache lines with line locking on KVM?
>> The cache would be a MMIO device which registers to handle all memory
>> space. Configuring the cache controller changes how the device
>> operates. Put this device between CPU and memory and other devices.
>> Performance would probably be horrible, so CPU should disable the
>> device automatically after some time.
>>
>
> Seems like a better alternative would be to add an mmio device when
> a line is actually locked.  And the device would cover *only* the
> locked line.  I assume that following the boot process these lines
> are unlocked, and the normal running state of the system would have
> none of these mmio devices active.
The BIOS may also attempt to perform tests with the cache device,
probe for cache sizes or read back I/D TLB lines via diagnostic modes.
That wouldn't work in your approach.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:17                               ` Blue Swirl
@ 2011-09-27 17:19                                 ` Richard Henderson
  2011-09-27 17:23                                   ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Richard Henderson @ 2011-09-27 17:19 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Alexander Graf, qemu-devel Developers, Yoder Stuart-B08248,
	qemu-ppc, Scott Wood, Aurelien Jarno
On 09/27/2011 10:17 AM, Blue Swirl wrote:
> On Tue, Sep 27, 2011 at 5:01 PM, Richard Henderson <rth@twiddle.net> wrote:
>> On 09/27/2011 09:53 AM, Blue Swirl wrote:
>>>>> So how would you emulate cache lines with line locking on KVM?
>>> The cache would be a MMIO device which registers to handle all memory
>>> space. Configuring the cache controller changes how the device
>>> operates. Put this device between CPU and memory and other devices.
>>> Performance would probably be horrible, so CPU should disable the
>>> device automatically after some time.
>>>
>>
>> Seems like a better alternative would be to add an mmio device when
>> a line is actually locked.  And the device would cover *only* the
>> locked line.  I assume that following the boot process these lines
>> are unlocked, and the normal running state of the system would have
>> none of these mmio devices active.
> 
> The BIOS may also attempt to perform tests with the cache device,
> probe for cache sizes or read back I/D TLB lines via diagnostic modes.
> That wouldn't work in your approach.
Err... why not?
r~
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:19                                 ` Richard Henderson
@ 2011-09-27 17:23                                   ` Blue Swirl
  0 siblings, 0 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 17:23 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Alexander Graf, qemu-devel Developers, Yoder Stuart-B08248,
	qemu-ppc, Scott Wood, Aurelien Jarno
On Tue, Sep 27, 2011 at 5:19 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 09/27/2011 10:17 AM, Blue Swirl wrote:
>> On Tue, Sep 27, 2011 at 5:01 PM, Richard Henderson <rth@twiddle.net> wrote:
>>> On 09/27/2011 09:53 AM, Blue Swirl wrote:
>>>>>> So how would you emulate cache lines with line locking on KVM?
>>>> The cache would be a MMIO device which registers to handle all memory
>>>> space. Configuring the cache controller changes how the device
>>>> operates. Put this device between CPU and memory and other devices.
>>>> Performance would probably be horrible, so CPU should disable the
>>>> device automatically after some time.
>>>>
>>>
>>> Seems like a better alternative would be to add an mmio device when
>>> a line is actually locked.  And the device would cover *only* the
>>> locked line.  I assume that following the boot process these lines
>>> are unlocked, and the normal running state of the system would have
>>> none of these mmio devices active.
>>
>> The BIOS may also attempt to perform tests with the cache device,
>> probe for cache sizes or read back I/D TLB lines via diagnostic modes.
>> That wouldn't work in your approach.
>
> Err... why not?
This is not related to the locked cache line mode. The BIOS could just
perform ordinary writes and reads from random memory addresses and
expect that the cache diagnostics registers change accordingly. The
cache device would have to cover all of memory to catch the accesses
and then update the registers.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 16:53                           ` Blue Swirl
  2011-09-27 17:01                             ` Richard Henderson
@ 2011-09-27 17:03                             ` Alexander Graf
  2011-09-27 17:20                               ` Blue Swirl
  2011-09-27 17:58                               ` Scott Wood
  1 sibling, 2 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-27 17:03 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 27.09.2011, at 18:53, Blue Swirl wrote:
> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>> 
>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>>> that the OS supports it.
>>>>>>>>> 
>>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>> 
>>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>> 
>>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>> 
>>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>> 
>>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>>> device that magically loads r3 etc.
>>>> 
>>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>> 
>>> 'boot program to client program interface definition'.
>>> 
>>>>>>>> The device method would break abstraction layers,
>>>> 
>>>> Which abstraction layers?
>>> 
>>> QEMU system emulation emulates hardware, not software. Hardware
>>> devices don't touch CPU registers.
>> 
>> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>> 
>> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>> 
>>> 
>>>>>>>> it's much like
>>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>>> the spin table, just like real HW would do.
>>>> 
>>>> The spin table, with no IPI or halt state, is what real HW does (or
>>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>>> but it should work everywhere.  Anything else would be dependent on a
>>>> specific HW implementation.
>>> 
>>> Yes. Hardware doesn't ever implement the spin table.
>>> 
>>>>>>>> On Sparc32 OpenBIOS this
>>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>> 
>>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>> 
>>>> I had some comments on the actual v1 implementation as well. :-)
>>>> 
>>>>>>> So we have the choice between having code inside the guest that
>>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>>> lot better and is easier to understand.
>>>>>> 
>>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>>> the load by a special wait operation for any memory stores to that
>>>>>> page).
>>>> 
>>>> How's that going to work with KVM?
>>>> 
>>>>> In fact, the whole kernel loading way we go today is pretty much
>>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>>> always loads and then pulls the kernel from QEMU using a PV
>>>>> interface. At that point, we would have to implement such an
>>>>> optimization as you suggest. Or implement a hypercall :).
>>>> 
>>>> I think the current approach is more usable for most purposes.  If you
>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>> 
>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>> 
>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>> we should be slavishly bound to it.  Think of the current approach as
>>>> something between whole-system and userspace emulation.
>>> 
>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>> kernel level but lower. Perhaps this mode should be enabled with
>>> -semihosting flag or a new flag. Then the bare metal version could be
>>> run without the flag.
>> 
>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
> 
> No, but such hacks are not common.
> 
>>> 
>>>> Where does the device tree come from?  How do you tell the guest about
>>>> what devices it has, especially in virtualization scenarios with non-PCI
>>>> passthrough devices, or custom qdev instantiations?
>>>> 
>>>>> But at least we'd always be running the same guest software stack.
>>>> 
>>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>>> heavily modified, unless we want to implement a ton of random device
>>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>>> operate despite a lack of backing store, etc.
>>> 
>>> I'd say HW emulation business as usual. Now with the new memory API,
>>> it should be possible to emulate the caches with line locking and TLBs
>>> etc., this was not previously possible. IIRC implementing locked cache
>>> lines would allow x86 to boot unmodified coreboot.
>> 
>> So how would you emulate cache lines with line locking on KVM?
> 
> The cache would be a MMIO device which registers to handle all memory
> space. Configuring the cache controller changes how the device
> operates. Put this device between CPU and memory and other devices.
> Performance would probably be horrible, so CPU should disable the
> device automatically after some time.
So how would you execute code on this region then? :)
> 
>> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
> 
> I don't think SeaBIOS breaks layering except for fw_cfg.
I'm not saying we're breaking layering there. I'm saying that changing u-boot is not so bad, since it's the same as we do with SeaBIOS. It was an argument in favor of your position.
> For extremely
> memory limited situation, perhaps QEMU (or Native KVM Tool for lean
> and mean version) could be run without glibc, inside kernel or even
> interfacing directly with the hypervisor. I'd also continue making it
> possible to disable building unused devices and features.
I'm pretty sure you're not the only one with that goal ;).
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:03                             ` Alexander Graf
@ 2011-09-27 17:20                               ` Blue Swirl
  2011-09-27 17:23                                 ` Alexander Graf
  2011-09-27 17:58                               ` Scott Wood
  1 sibling, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 17:20 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On Tue, Sep 27, 2011 at 5:03 PM, Alexander Graf <agraf@suse.de> wrote:
>
> On 27.09.2011, at 18:53, Blue Swirl wrote:
>
>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>>
>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>
>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>>>> that the OS supports it.
>>>>>>>>>>
>>>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>>>
>>>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>>>
>>>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>>>
>>>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>>>
>>>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>>>> device that magically loads r3 etc.
>>>>>
>>>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>>>
>>>> 'boot program to client program interface definition'.
>>>>
>>>>>>>>> The device method would break abstraction layers,
>>>>>
>>>>> Which abstraction layers?
>>>>
>>>> QEMU system emulation emulates hardware, not software. Hardware
>>>> devices don't touch CPU registers.
>>>
>>> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>>>
>>> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>>>
>>>>
>>>>>>>>> it's much like
>>>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>>>> the spin table, just like real HW would do.
>>>>>
>>>>> The spin table, with no IPI or halt state, is what real HW does (or
>>>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>>>> but it should work everywhere.  Anything else would be dependent on a
>>>>> specific HW implementation.
>>>>
>>>> Yes. Hardware doesn't ever implement the spin table.
>>>>
>>>>>>>>> On Sparc32 OpenBIOS this
>>>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>>>
>>>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>>>
>>>>> I had some comments on the actual v1 implementation as well. :-)
>>>>>
>>>>>>>> So we have the choice between having code inside the guest that
>>>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>>>> lot better and is easier to understand.
>>>>>>>
>>>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>>>> the load by a special wait operation for any memory stores to that
>>>>>>> page).
>>>>>
>>>>> How's that going to work with KVM?
>>>>>
>>>>>> In fact, the whole kernel loading way we go today is pretty much
>>>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>>>> always loads and then pulls the kernel from QEMU using a PV
>>>>>> interface. At that point, we would have to implement such an
>>>>>> optimization as you suggest. Or implement a hypercall :).
>>>>>
>>>>> I think the current approach is more usable for most purposes.  If you
>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>
>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>>>
>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>> something between whole-system and userspace emulation.
>>>>
>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>> run without the flag.
>>>
>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>
>> No, but such hacks are not common.
>>
>>>>
>>>>> Where does the device tree come from?  How do you tell the guest about
>>>>> what devices it has, especially in virtualization scenarios with non-PCI
>>>>> passthrough devices, or custom qdev instantiations?
>>>>>
>>>>>> But at least we'd always be running the same guest software stack.
>>>>>
>>>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>>>> heavily modified, unless we want to implement a ton of random device
>>>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>>>> operate despite a lack of backing store, etc.
>>>>
>>>> I'd say HW emulation business as usual. Now with the new memory API,
>>>> it should be possible to emulate the caches with line locking and TLBs
>>>> etc., this was not previously possible. IIRC implementing locked cache
>>>> lines would allow x86 to boot unmodified coreboot.
>>>
>>> So how would you emulate cache lines with line locking on KVM?
>>
>> The cache would be a MMIO device which registers to handle all memory
>> space. Configuring the cache controller changes how the device
>> operates. Put this device between CPU and memory and other devices.
>> Performance would probably be horrible, so CPU should disable the
>> device automatically after some time.
>
> So how would you execute code on this region then? :)
Easy, fix QEMU to allow executing from MMIO. (Yeah, I forgot about that).
>>
>>> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
>>
>> I don't think SeaBIOS breaks layering except for fw_cfg.
>
> I'm not saying we're breaking layering there. I'm saying that changing u-boot is not so bad, since it's the same as we do with SeaBIOS. It was an argument in favor of your position.
Never mind then ;-)
>> For extremely
>> memory limited situation, perhaps QEMU (or Native KVM Tool for lean
>> and mean version) could be run without glibc, inside kernel or even
>> interfacing directly with the hypervisor. I'd also continue making it
>> possible to disable building unused devices and features.
>
> I'm pretty sure you're not the only one with that goal ;).
Great, let's do it.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:20                               ` Blue Swirl
@ 2011-09-27 17:23                                 ` Alexander Graf
  2011-09-27 19:05                                   ` Blue Swirl
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-27 17:23 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 27.09.2011, at 19:20, Blue Swirl wrote:
> On Tue, Sep 27, 2011 at 5:03 PM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 27.09.2011, at 18:53, Blue Swirl wrote:
>> 
>>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>>> 
>>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>> 
>>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>>>>> that the OS supports it.
>>>>>>>>>>> 
>>>>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>>>> 
>>>>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>>>> 
>>>>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>>>> 
>>>>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>>>> 
>>>>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>>>>> device that magically loads r3 etc.
>>>>>> 
>>>>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>>>> 
>>>>> 'boot program to client program interface definition'.
>>>>> 
>>>>>>>>>> The device method would break abstraction layers,
>>>>>> 
>>>>>> Which abstraction layers?
>>>>> 
>>>>> QEMU system emulation emulates hardware, not software. Hardware
>>>>> devices don't touch CPU registers.
>>>> 
>>>> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>>>> 
>>>> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>>>> 
>>>>> 
>>>>>>>>>> it's much like
>>>>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>>>>> the spin table, just like real HW would do.
>>>>>> 
>>>>>> The spin table, with no IPI or halt state, is what real HW does (or
>>>>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>>>>> but it should work everywhere.  Anything else would be dependent on a
>>>>>> specific HW implementation.
>>>>> 
>>>>> Yes. Hardware doesn't ever implement the spin table.
>>>>> 
>>>>>>>>>> On Sparc32 OpenBIOS this
>>>>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>>>> 
>>>>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>>>> 
>>>>>> I had some comments on the actual v1 implementation as well. :-)
>>>>>> 
>>>>>>>>> So we have the choice between having code inside the guest that
>>>>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>>>>> lot better and is easier to understand.
>>>>>>>> 
>>>>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>>>>> the load by a special wait operation for any memory stores to that
>>>>>>>> page).
>>>>>> 
>>>>>> How's that going to work with KVM?
>>>>>> 
>>>>>>> In fact, the whole kernel loading way we go today is pretty much
>>>>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>>>>> always loads and then pulls the kernel from QEMU using a PV
>>>>>>> interface. At that point, we would have to implement such an
>>>>>>> optimization as you suggest. Or implement a hypercall :).
>>>>>> 
>>>>>> I think the current approach is more usable for most purposes.  If you
>>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>> 
>>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>>>> 
>>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>>> something between whole-system and userspace emulation.
>>>>> 
>>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>>> run without the flag.
>>>> 
>>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>> 
>>> No, but such hacks are not common.
>>> 
>>>>> 
>>>>>> Where does the device tree come from?  How do you tell the guest about
>>>>>> what devices it has, especially in virtualization scenarios with non-PCI
>>>>>> passthrough devices, or custom qdev instantiations?
>>>>>> 
>>>>>>> But at least we'd always be running the same guest software stack.
>>>>>> 
>>>>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>>>>> heavily modified, unless we want to implement a ton of random device
>>>>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>>>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>>>>> operate despite a lack of backing store, etc.
>>>>> 
>>>>> I'd say HW emulation business as usual. Now with the new memory API,
>>>>> it should be possible to emulate the caches with line locking and TLBs
>>>>> etc., this was not previously possible. IIRC implementing locked cache
>>>>> lines would allow x86 to boot unmodified coreboot.
>>>> 
>>>> So how would you emulate cache lines with line locking on KVM?
>>> 
>>> The cache would be a MMIO device which registers to handle all memory
>>> space. Configuring the cache controller changes how the device
>>> operates. Put this device between CPU and memory and other devices.
>>> Performance would probably be horrible, so CPU should disable the
>>> device automatically after some time.
>> 
>> So how would you execute code on this region then? :)
> 
> Easy, fix QEMU to allow executing from MMIO. (Yeah, I forgot about that).
It's not quite as easy to fix KVM to do the same though unfortunately. We'd have to either implement a full instruction emulator in the kernel (x86 style) or transfer all state from KVM into QEMU to execute it there (hell breaks loose). Both alternatives are not exactly appealing.
> 
>>> 
>>>> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
>>> 
>>> I don't think SeaBIOS breaks layering except for fw_cfg.
>> 
>> I'm not saying we're breaking layering there. I'm saying that changing u-boot is not so bad, since it's the same as we do with SeaBIOS. It was an argument in favor of your position.
> 
> Never mind then ;-)
> 
>>> For extremely
>>> memory limited situation, perhaps QEMU (or Native KVM Tool for lean
>>> and mean version) could be run without glibc, inside kernel or even
>>> interfacing directly with the hypervisor. I'd also continue making it
>>> possible to disable building unused devices and features.
>> 
>> I'm pretty sure you're not the only one with that goal ;).
> 
> Great, let's do it.
VGA comes first :)
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:23                                 ` Alexander Graf
@ 2011-09-27 19:05                                   ` Blue Swirl
  2011-09-28  7:40                                     ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 19:05 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
[-- Attachment #1: Type: text/plain, Size: 9395 bytes --]
On Tue, Sep 27, 2011 at 5:23 PM, Alexander Graf <agraf@suse.de> wrote:
>
> On 27.09.2011, at 19:20, Blue Swirl wrote:
>
>> On Tue, Sep 27, 2011 at 5:03 PM, Alexander Graf <agraf@suse.de> wrote:
>>>
>>> On 27.09.2011, at 18:53, Blue Swirl wrote:
>>>
>>>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>>
>>>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>>>
>>>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>>>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>>>>>> that the OS supports it.
>>>>>>>>>>>>
>>>>>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>>>>>
>>>>>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>>>>>
>>>>>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>>>>>
>>>>>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>>>>>
>>>>>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>>>>>> device that magically loads r3 etc.
>>>>>>>
>>>>>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>>>>>
>>>>>> 'boot program to client program interface definition'.
>>>>>>
>>>>>>>>>>> The device method would break abstraction layers,
>>>>>>>
>>>>>>> Which abstraction layers?
>>>>>>
>>>>>> QEMU system emulation emulates hardware, not software. Hardware
>>>>>> devices don't touch CPU registers.
>>>>>
>>>>> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>>>>>
>>>>> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>>>>>
>>>>>>
>>>>>>>>>>> it's much like
>>>>>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>>>>>> the spin table, just like real HW would do.
>>>>>>>
>>>>>>> The spin table, with no IPI or halt state, is what real HW does (or
>>>>>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>>>>>> but it should work everywhere.  Anything else would be dependent on a
>>>>>>> specific HW implementation.
>>>>>>
>>>>>> Yes. Hardware doesn't ever implement the spin table.
>>>>>>
>>>>>>>>>>> On Sparc32 OpenBIOS this
>>>>>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>>>>>
>>>>>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>>>>>
>>>>>>> I had some comments on the actual v1 implementation as well. :-)
>>>>>>>
>>>>>>>>>> So we have the choice between having code inside the guest that
>>>>>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>>>>>> lot better and is easier to understand.
>>>>>>>>>
>>>>>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>>>>>> the load by a special wait operation for any memory stores to that
>>>>>>>>> page).
>>>>>>>
>>>>>>> How's that going to work with KVM?
>>>>>>>
>>>>>>>> In fact, the whole kernel loading way we go today is pretty much
>>>>>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>>>>>> always loads and then pulls the kernel from QEMU using a PV
>>>>>>>> interface. At that point, we would have to implement such an
>>>>>>>> optimization as you suggest. Or implement a hypercall :).
>>>>>>>
>>>>>>> I think the current approach is more usable for most purposes.  If you
>>>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>>>
>>>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>>>>>
>>>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>>>> something between whole-system and userspace emulation.
>>>>>>
>>>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>>>> run without the flag.
>>>>>
>>>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>>>
>>>> No, but such hacks are not common.
>>>>
>>>>>>
>>>>>>> Where does the device tree come from?  How do you tell the guest about
>>>>>>> what devices it has, especially in virtualization scenarios with non-PCI
>>>>>>> passthrough devices, or custom qdev instantiations?
>>>>>>>
>>>>>>>> But at least we'd always be running the same guest software stack.
>>>>>>>
>>>>>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>>>>>> heavily modified, unless we want to implement a ton of random device
>>>>>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>>>>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>>>>>> operate despite a lack of backing store, etc.
>>>>>>
>>>>>> I'd say HW emulation business as usual. Now with the new memory API,
>>>>>> it should be possible to emulate the caches with line locking and TLBs
>>>>>> etc., this was not previously possible. IIRC implementing locked cache
>>>>>> lines would allow x86 to boot unmodified coreboot.
>>>>>
>>>>> So how would you emulate cache lines with line locking on KVM?
>>>>
>>>> The cache would be a MMIO device which registers to handle all memory
>>>> space. Configuring the cache controller changes how the device
>>>> operates. Put this device between CPU and memory and other devices.
>>>> Performance would probably be horrible, so CPU should disable the
>>>> device automatically after some time.
>>>
>>> So how would you execute code on this region then? :)
>>
>> Easy, fix QEMU to allow executing from MMIO. (Yeah, I forgot about that).
>
> It's not quite as easy to fix KVM to do the same though unfortunately. We'd have to either implement a full instruction emulator in the kernel (x86 style) or transfer all state from KVM into QEMU to execute it there (hell breaks loose). Both alternatives are not exactly appealing.
>
>>
>>>>
>>>>> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
>>>>
>>>> I don't think SeaBIOS breaks layering except for fw_cfg.
>>>
>>> I'm not saying we're breaking layering there. I'm saying that changing u-boot is not so bad, since it's the same as we do with SeaBIOS. It was an argument in favor of your position.
>>
>> Never mind then ;-)
>>
>>>> For extremely
>>>> memory limited situation, perhaps QEMU (or Native KVM Tool for lean
>>>> and mean version) could be run without glibc, inside kernel or even
>>>> interfacing directly with the hypervisor. I'd also continue making it
>>>> possible to disable building unused devices and features.
>>>
>>> I'm pretty sure you're not the only one with that goal ;).
>>
>> Great, let's do it.
>
> VGA comes first :)
This patch fixes the easy parts, ISA devices remain since they are not
qdevified. But didn't someone already send patches to do that?
[-- Attachment #2: vga-optional.patch --]
[-- Type: text/x-diff, Size: 4589 bytes --]
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index c7e365b..a11444c 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -2955,11 +2955,6 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev)
      return 0;
 }
 
-void pci_cirrus_vga_init(PCIBus *bus)
-{
-    pci_create_simple(bus, -1, "cirrus-vga");
-}
-
 static PCIDeviceInfo cirrus_vga_info = {
     .qdev.name    = "cirrus-vga",
     .qdev.desc    = "Cirrus CLGD 54xx VGA",
diff --git a/hw/pc.c b/hw/pc.c
index 203627d..97f93d4 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1068,7 +1068,11 @@ void pc_vga_init(PCIBus *pci_bus)
 {
     if (cirrus_vga_enabled) {
         if (pci_bus) {
-            pci_cirrus_vga_init(pci_bus);
+            if (!pci_cirrus_vga_init(pci_bus)) {
+                fprintf(stderr, "Warning: cirrus_vga not available,"
+                        " using standard VGA instead\n");
+                pci_vga_init(pci_bus);
+            }
         } else {
             isa_cirrus_vga_init(get_system_memory());
         }
diff --git a/hw/pc.h b/hw/pc.h
index 7e6ddba..90a502d 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -8,6 +8,7 @@
 #include "fdc.h"
 #include "net.h"
 #include "memory.h"
+#include "pci.h"
 
 /* PC-style peripherals (also used by other machines).  */
 
@@ -217,13 +218,34 @@ static inline int isa_vga_init(void)
     return 1;
 }
 
-int pci_vga_init(PCIBus *bus);
+/* vga-pci.c */
+static inline bool pci_vga_init(PCIBus *bus)
+{
+    PCIDevice *dev;
+
+    dev = pci_try_create_simple(bus, -1, "VGA");
+    if (!dev) {
+        return false;
+    }
+    return true;
+}
+
 int isa_vga_mm_init(target_phys_addr_t vram_base,
                     target_phys_addr_t ctrl_base, int it_shift,
                     MemoryRegion *address_space);
 
 /* cirrus_vga.c */
-void pci_cirrus_vga_init(PCIBus *bus);
+static inline bool pci_cirrus_vga_init(PCIBus *bus)
+{
+    PCIDevice *dev;
+
+    dev = pci_try_create_simple(bus, -1, "cirrus-vga");
+    if (!dev) {
+        return false;
+    }
+    return true;
+}
+
 void isa_cirrus_vga_init(MemoryRegion *address_space);
 
 /* ne2000.c */
diff --git a/hw/pci.c b/hw/pci.c
index 749e8d8..46c01ac 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1687,6 +1687,19 @@ PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
     return dev;
 }
 
+PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn,
+                                               bool multifunction,
+                                               const char *name)
+{
+    PCIDevice *dev = pci_try_create_multifunction(bus, devfn, multifunction,
+                                                  name);
+    if (!dev) {
+        return NULL;
+    }
+    qdev_init_nofail(&dev->qdev);
+    return dev;
+}
+
 PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
 {
     return pci_create_multifunction(bus, devfn, false, name);
@@ -1702,6 +1715,11 @@ PCIDevice *pci_try_create(PCIBus *bus, int devfn, const char *name)
     return pci_try_create_multifunction(bus, devfn, false, name);
 }
 
+PCIDevice *pci_try_create_simple(PCIBus *bus, int devfn, const char *name)
+{
+    return pci_try_create_simple_multifunction(bus, devfn, false, name);
+}
+
 static int pci_find_space(PCIDevice *pdev, uint8_t size)
 {
     int config_size = pci_config_size(pdev);
diff --git a/hw/pci.h b/hw/pci.h
index 86a81c8..aa2e040 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -473,9 +473,13 @@ PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
 PCIDevice *pci_try_create_multifunction(PCIBus *bus, int devfn,
                                         bool multifunction,
                                         const char *name);
+PCIDevice *pci_try_create_simple_multifunction(PCIBus *bus, int devfn,
+                                               bool multifunction,
+                                               const char *name);
 PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name);
 PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
 PCIDevice *pci_try_create(PCIBus *bus, int devfn, const char *name);
+PCIDevice *pci_try_create_simple(PCIBus *bus, int devfn, const char *name);
 
 static inline int pci_is_express(const PCIDevice *d)
 {
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 3c8bcb0..f296b19 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -70,12 +70,6 @@ static int pci_vga_initfn(PCIDevice *dev)
      return 0;
 }
 
-int pci_vga_init(PCIBus *bus)
-{
-    pci_create_simple(bus, -1, "VGA");
-    return 0;
-}
-
 static PCIDeviceInfo vga_info = {
     .qdev.name    = "VGA",
     .qdev.size    = sizeof(PCIVGAState),
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 19:05                                   ` Blue Swirl
@ 2011-09-28  7:40                                     ` Alexander Graf
  0 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-28  7:40 UTC (permalink / raw)
  To: Blue Swirl
  Cc: Scott Wood, Yoder Stuart-B08248, qemu-ppc@nongnu.org,
	qemu-devel Developers, Aurelien Jarno
Am 27.09.2011 um 21:05 schrieb Blue Swirl <blauwirbel@gmail.com>:
> On Tue, Sep 27, 2011 at 5:23 PM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 27.09.2011, at 19:20, Blue Swirl wrote:
>> 
>>> On Tue, Sep 27, 2011 at 5:03 PM, Alexander Graf <agraf@suse.de> wrote:
>>>> 
>>>> On 27.09.2011, at 18:53, Blue Swirl wrote:
>>>> 
>>>>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>>>>> 
>>>>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>>>> 
>>>>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>> On 09/24/2011 05:00 AM, Alexander Graf wrote:
>>>>>>>>> On 24.09.2011, at 10:44, Blue Swirl wrote:
>>>>>>>>>> On Sat, Sep 24, 2011 at 8:03 AM, Alexander Graf <agraf@suse.de> wrote:
>>>>>>>>>>> On 24.09.2011, at 09:41, Blue Swirl wrote:
>>>>>>>>>>>> On Mon, Sep 19, 2011 at 4:12 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>>>>>>>>> The goal with the spin table stuff, suboptimal as it is, was something
>>>>>>>>>>>>> that would work on any powerpc implementation.  Other
>>>>>>>>>>>>> implementation-specific release mechanisms are allowed, and are
>>>>>>>>>>>>> indicated by a property in the cpu node, but only if the loader knows
>>>>>>>>>>>>> that the OS supports it.
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> IIUC the spec that includes these bits is not finalized yet. It is however in use on all u-boot versions for e500 that I'm aware of and the method Linux uses to bring up secondary CPUs.
>>>>>>>>>>>>> 
>>>>>>>>>>>>> It's in ePAPR 1.0, which has been out for a while now.  ePAPR 1.1 was
>>>>>>>>>>>>> just released which clarifies some things such as WIMG.
>>>>>>>>>>>>> 
>>>>>>>>>>>>>> Stuart / Scott, do you have any pointers to documentation where the spinning is explained?
>>>>>>>>>>>>> 
>>>>>>>>>>>>> https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
>>>>>>>>>>>> 
>>>>>>>>>>>> Chapter 5.5.2 describes the table. This is actually an interface
>>>>>>>>>>>> between OS and Open Firmware, obviously there can't be a real hardware
>>>>>>>>>>>> device that magically loads r3 etc.
>>>>>>>> 
>>>>>>>> Not Open Firmware, but rather an ePAPR-compliant loader.
>>>>>>> 
>>>>>>> 'boot program to client program interface definition'.
>>>>>>> 
>>>>>>>>>>>> The device method would break abstraction layers,
>>>>>>>> 
>>>>>>>> Which abstraction layers?
>>>>>>> 
>>>>>>> QEMU system emulation emulates hardware, not software. Hardware
>>>>>>> devices don't touch CPU registers.
>>>>>> 
>>>>>> The great part about this emulated device is that it's basically guest software running in host context. To the guest, it's not a device in the ordinary sense, such as vmport, but rather the same as software running on another core, just that the other core isn't running any software.
>>>>>> 
>>>>>> Sure, if you consider this a device, it does break abstraction layers. Just consider it as host running guest code, then it makes sense :).
>>>>>> 
>>>>>>> 
>>>>>>>>>>>> it's much like
>>>>>>>>>>>> vmport stuff in x86. Using a hypercall would be a small improvement.
>>>>>>>>>>>> Instead it should be possible to implement a small boot ROM which puts
>>>>>>>>>>>> the secondary CPUs into managed halt state without spinning, then the
>>>>>>>>>>>> boot CPU could send an IPI to a halted CPU to wake them up based on
>>>>>>>>>>>> the spin table, just like real HW would do.
>>>>>>>> 
>>>>>>>> The spin table, with no IPI or halt state, is what real HW does (or
>>>>>>>> rather, what software does on real HW) today.  It's ugly and inefficient
>>>>>>>> but it should work everywhere.  Anything else would be dependent on a
>>>>>>>> specific HW implementation.
>>>>>>> 
>>>>>>> Yes. Hardware doesn't ever implement the spin table.
>>>>>>> 
>>>>>>>>>>>> On Sparc32 OpenBIOS this
>>>>>>>>>>>> is something like a few lines of ASM on both sides.
>>>>>>>>>>> 
>>>>>>>>>>> That sounds pretty close to what I had implemented in v1. Back then the only comment was to do it using this method from Scott.
>>>>>>>> 
>>>>>>>> I had some comments on the actual v1 implementation as well. :-)
>>>>>>>> 
>>>>>>>>>>> So we have the choice between having code inside the guest that
>>>>>>>>>>> spins, maybe even only checks every x ms, by programming a timer,
>>>>>>>>>>> or we can try to make an event out of the memory write. V1 was
>>>>>>>>>>> the former, v2 (this one) is the latter. This version performs a
>>>>>>>>>>> lot better and is easier to understand.
>>>>>>>>>> 
>>>>>>>>>> The abstraction layers should not be broken lightly, I suppose some
>>>>>>>>>> performance or laziness^Wlocal optimization reasons were behind vmport
>>>>>>>>>> design too. The ideal way to solve this could be to detect a spinning
>>>>>>>>>> CPU and optimize that for all architectures, that could be tricky
>>>>>>>>>> though (if a CPU remains in the same TB for extended periods, inspect
>>>>>>>>>> the TB: if it performs a loop with a single load instruction, replace
>>>>>>>>>> the load by a special wait operation for any memory stores to that
>>>>>>>>>> page).
>>>>>>>> 
>>>>>>>> How's that going to work with KVM?
>>>>>>>> 
>>>>>>>>> In fact, the whole kernel loading way we go today is pretty much
>>>>>>>>> wrong. We should rather do it similar to OpenBIOS where firmware
>>>>>>>>> always loads and then pulls the kernel from QEMU using a PV
>>>>>>>>> interface. At that point, we would have to implement such an
>>>>>>>>> optimization as you suggest. Or implement a hypercall :).
>>>>>>>> 
>>>>>>>> I think the current approach is more usable for most purposes.  If you
>>>>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>>>> 
>>>>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>>>>>>> 
>>>>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>>>>> something between whole-system and userspace emulation.
>>>>>>> 
>>>>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>>>>> run without the flag.
>>>>>> 
>>>>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>>>> 
>>>>> No, but such hacks are not common.
>>>>> 
>>>>>>> 
>>>>>>>> Where does the device tree come from?  How do you tell the guest about
>>>>>>>> what devices it has, especially in virtualization scenarios with non-PCI
>>>>>>>> passthrough devices, or custom qdev instantiations?
>>>>>>>> 
>>>>>>>>> But at least we'd always be running the same guest software stack.
>>>>>>>> 
>>>>>>>> No we wouldn't.  Any U-Boot that runs under QEMU would have to be
>>>>>>>> heavily modified, unless we want to implement a ton of random device
>>>>>>>> emulation, at least one extra memory translation layer (LAWs, localbus
>>>>>>>> windows, CCSRBAR, and such), hacks to allow locked cache lines to
>>>>>>>> operate despite a lack of backing store, etc.
>>>>>>> 
>>>>>>> I'd say HW emulation business as usual. Now with the new memory API,
>>>>>>> it should be possible to emulate the caches with line locking and TLBs
>>>>>>> etc., this was not previously possible. IIRC implementing locked cache
>>>>>>> lines would allow x86 to boot unmodified coreboot.
>>>>>> 
>>>>>> So how would you emulate cache lines with line locking on KVM?
>>>>> 
>>>>> The cache would be a MMIO device which registers to handle all memory
>>>>> space. Configuring the cache controller changes how the device
>>>>> operates. Put this device between CPU and memory and other devices.
>>>>> Performance would probably be horrible, so CPU should disable the
>>>>> device automatically after some time.
>>>> 
>>>> So how would you execute code on this region then? :)
>>> 
>>> Easy, fix QEMU to allow executing from MMIO. (Yeah, I forgot about that).
>> 
>> It's not quite as easy to fix KVM to do the same though unfortunately. We'd have to either implement a full instruction emulator in the kernel (x86 style) or transfer all state from KVM into QEMU to execute it there (hell breaks loose). Both alternatives are not exactly appealing.
>> 
>>> 
>>>>> 
>>>>>> However, we already have a number of hacks in SeaBIOS to run in QEMU, so I don't see an issue in adding a few here and there in u-boot. The memory pressure is a real issue though. I'm not sure how we'd manage that one. Maybe we could try and reuse the host u-boot binary? heh
>>>>> 
>>>>> I don't think SeaBIOS breaks layering except for fw_cfg.
>>>> 
>>>> I'm not saying we're breaking layering there. I'm saying that changing u-boot is not so bad, since it's the same as we do with SeaBIOS. It was an argument in favor of your position.
>>> 
>>> Never mind then ;-)
>>> 
>>>>> For extremely
>>>>> memory limited situation, perhaps QEMU (or Native KVM Tool for lean
>>>>> and mean version) could be run without glibc, inside kernel or even
>>>>> interfacing directly with the hypervisor. I'd also continue making it
>>>>> possible to disable building unused devices and features.
>>>> 
>>>> I'm pretty sure you're not the only one with that goal ;).
>>> 
>>> Great, let's do it.
>> 
>> VGA comes first :)
> 
> This patch fixes the easy parts, ISA devices remain since they are not
> qdevified. But didn't someone already send patches to do that?
> <vga-optional.patch>
Heh - I was thinking about the Mac VGA breakage :). Still looking at it. Your patch did look correct, but something seems to go wrong with vram mapping. Maybe.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:03                             ` Alexander Graf
  2011-09-27 17:20                               ` Blue Swirl
@ 2011-09-27 17:58                               ` Scott Wood
  2011-09-27 18:47                                 ` Blue Swirl
  1 sibling, 1 reply; 128+ messages in thread
From: Scott Wood @ 2011-09-27 17:58 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, Yoder Stuart-B08248, qemu-ppc, qemu-devel Developers,
	Aurelien Jarno
On 09/27/2011 12:03 PM, Alexander Graf wrote:
> On 27.09.2011, at 18:53, Blue Swirl wrote:
>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>> I think the current approach is more usable for most purposes.  If you
>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>
>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
Is that really so different from making QEMU act as an ePAPR-compliant
loader?
A big difference here is that on x86 you have a large quantity of
runtime services provided by the BIOS, which I can certainly understand
not wanting to shove into QEMU (though I'd still think you'd want to
hack out the initialization parts of the BIOS that depend on special
cache behavior, RAM controllers, and such, if at all practical).  This
spin table stuff is the only runtime service provided by an ePAPR loader.
>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>> something between whole-system and userspace emulation.
>>>>
>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>> run without the flag.
>>>
>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>
>> No, but such hacks are not common.
How much would actually need to be duplicated?  Seems like other than
turning some features off (a smaller set of devices, and not needing
extra address translation/cache layers), you'd just have a different way
of loading/starting the guest.  Most of that code would be common to all
ePAPR targets (with a few HW-specific variants for the MMU setup portion).
>>> So how would you emulate cache lines with line locking on KVM?
>>
>> The cache would be a MMIO device which registers to handle all memory
>> space. Configuring the cache controller changes how the device
>> operates. Put this device between CPU and memory and other devices.
>> Performance would probably be horrible, so CPU should disable the
>> device automatically after some time.
> 
> So how would you execute code on this region then? :)
I think U-Boot only does this for data, not code.  But U-Boot will want
to execute out of flash, and it will want to be able to move/resize the
flash window using the localbus controller (can't treat it as a
statically located rom image).
Easier would be to run a U-Boot that assumes RAM is already configured,
which we support for booting from sources other than NOR flash (would be
some minor tweakage to decouple it from those boot scenarios).  There'd
still be a lot of random I/O that needs to be emulated, or avoided with
changes to U-Boot.  And there'd still be the question of where the
device tree comes from, how information gets passed on from qemu, etc.
Full system emulation at that level would be an interesting project and
have its uses, but it would be a lot of work and should not get in the
way of workloads that don't need/want it.  The requirements and
usability considerations for virtualization are not always the same as
for traditional emulation use cases.
-Scott
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
  2011-09-27 17:58                               ` Scott Wood
@ 2011-09-27 18:47                                 ` Blue Swirl
  0 siblings, 0 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-27 18:47 UTC (permalink / raw)
  To: Scott Wood
  Cc: Yoder Stuart-B08248, qemu-ppc, Alexander Graf, Aurelien Jarno,
	qemu-devel Developers
On Tue, Sep 27, 2011 at 5:58 PM, Scott Wood <scottwood@freescale.com> wrote:
> On 09/27/2011 12:03 PM, Alexander Graf wrote:
>> On 27.09.2011, at 18:53, Blue Swirl wrote:
>>> On Tue, Sep 27, 2011 at 3:59 PM, Alexander Graf <agraf@suse.de> wrote:
>>>> On 27.09.2011, at 17:50, Blue Swirl wrote:
>>>>> On Mon, Sep 26, 2011 at 11:19 PM, Scott Wood <scottwood@freescale.com> wrote:
>>>>>> I think the current approach is more usable for most purposes.  If you
>>>>>> start U-Boot instead of a kernel, how do pass information on from the
>>>>>> user (kernel, rfs, etc)?  Require the user to create flash images[1]?
>>>>>
>>>>> No, for example OpenBIOS gets the kernel command line from fw_cfg device.
>
> Is that really so different from making QEMU act as an ePAPR-compliant
> loader?
The difference is that the fw_cfg interface is pretty simple and
private between QEMU and OpenBIOS, whereas magical HW to support ePAPR
interface is public and towards a kernel. With the ROM approach, the
interface would exist between the ROM and kernel as intended.
> A big difference here is that on x86 you have a large quantity of
> runtime services provided by the BIOS, which I can certainly understand
> not wanting to shove into QEMU (though I'd still think you'd want to
> hack out the initialization parts of the BIOS that depend on special
> cache behavior, RAM controllers, and such, if at all practical).  This
> spin table stuff is the only runtime service provided by an ePAPR loader.
I think I've confused sPAPR (which has a lot of messy OF tree stuff) and ePAPR.
>>>>>> Maybe that's a useful mode of operation in some cases, but I don't think
>>>>>> we should be slavishly bound to it.  Think of the current approach as
>>>>>> something between whole-system and userspace emulation.
>>>>>
>>>>> This is similar to ARM, M68k and Xtensa semi-hosting mode, but not at
>>>>> kernel level but lower. Perhaps this mode should be enabled with
>>>>> -semihosting flag or a new flag. Then the bare metal version could be
>>>>> run without the flag.
>>>>
>>>> and then we'd have 2 implementations for running in system emulation mode and need to maintain both. I don't think that scales very well.
>>>
>>> No, but such hacks are not common.
>
> How much would actually need to be duplicated?  Seems like other than
> turning some features off (a smaller set of devices, and not needing
> extra address translation/cache layers), you'd just have a different way
> of loading/starting the guest.  Most of that code would be common to all
> ePAPR targets (with a few HW-specific variants for the MMU setup portion).
That looks OK.
>>>> So how would you emulate cache lines with line locking on KVM?
>>>
>>> The cache would be a MMIO device which registers to handle all memory
>>> space. Configuring the cache controller changes how the device
>>> operates. Put this device between CPU and memory and other devices.
>>> Performance would probably be horrible, so CPU should disable the
>>> device automatically after some time.
>>
>> So how would you execute code on this region then? :)
>
> I think U-Boot only does this for data, not code.  But U-Boot will want
> to execute out of flash, and it will want to be able to move/resize the
> flash window using the localbus controller (can't treat it as a
> statically located rom image).
This is a different issue, a limitation exists that QEMU can't execute
from MMIO region. But flash is a bit different, on write it's MMIO but
on read like RAM, so it should work.
> Easier would be to run a U-Boot that assumes RAM is already configured,
> which we support for booting from sources other than NOR flash (would be
> some minor tweakage to decouple it from those boot scenarios).  There'd
> still be a lot of random I/O that needs to be emulated, or avoided with
> changes to U-Boot.  And there'd still be the question of where the
> device tree comes from, how information gets passed on from qemu, etc.
I'd use fw_cfg for passing the device tree to the ROM.
> Full system emulation at that level would be an interesting project and
> have its uses, but it would be a lot of work and should not get in the
> way of workloads that don't need/want it.  The requirements and
> usability considerations for virtualization are not always the same as
> for traditional emulation use cases.
Yes and it's also possible to emulate different interfaces of a
system. But from QEMU code maintenance point of view, semi-hosting
approaches should not get in the way of system emulation. If they are
optional and do not affect the rest of the system, they are OK.
On the other hand, maybe it's a problem with QEMU architecture, the
design is very much tuned to support HW emulation and not various
software interfaces.
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
- * [Qemu-devel] [PATCH 25/58] PPC: E500: Update cpu-release-addr property in cpu nodes
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (23 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 26/58] device tree: add add_subnode command Alexander Graf
                   ` (32 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The guest OS wants to know where the guest spins, so let's tell him while
updating the CPU nodes with the frequencies anyways.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - use new spin table address
---
 hw/ppce500_mpc8544ds.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 3b8b449..a3e1ce4 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -125,9 +125,15 @@ static int mpc8544_load_device_tree(CPUState *env,
 
     for (i = 0; i < smp_cpus; i++) {
         char cpu_name[128];
+        uint64_t cpu_release_addr[] = {
+            cpu_to_be64(MPC8544_SPIN_BASE + (i * 0x20))
+        };
+
         snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", i);
         qemu_devtree_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
         qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+        qemu_devtree_setprop(fdt, cpu_name, "cpu-release-addr",
+                             cpu_release_addr, sizeof(cpu_release_addr));
     }
 
     for (i = smp_cpus; i < 32; i++) {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 26/58] device tree: add add_subnode command
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (24 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 25/58] PPC: E500: Update cpu-release-addr property in cpu nodes Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 27/58] device tree: dont fail operations Alexander Graf
                   ` (31 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We want to be able to create subnodes in our device tree, so export it through
the qemu device tree abstraction framework.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 device_tree.c |   24 ++++++++++++++++++++++++
 device_tree.h |    1 +
 2 files changed, 25 insertions(+), 0 deletions(-)
diff --git a/device_tree.c b/device_tree.c
index 23e89e3..f4a78c8 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -118,3 +118,27 @@ int qemu_devtree_nop_node(void *fdt, const char *node_path)
 
     return fdt_nop_node(fdt, offset);
 }
+
+int qemu_devtree_add_subnode(void *fdt, const char *name)
+{
+    int offset;
+    char *dupname = g_strdup(name);
+    char *basename = strrchr(dupname, '/');
+    int retval;
+
+    if (!basename) {
+        return -1;
+    }
+
+    basename[0] = '\0';
+    basename++;
+
+    offset = fdt_path_offset(fdt, dupname);
+    if (offset < 0) {
+        return offset;
+    }
+
+    retval = fdt_add_subnode(fdt, offset, basename);
+    g_free(dupname);
+    return retval;
+}
diff --git a/device_tree.h b/device_tree.h
index 76fce5f..4378685 100644
--- a/device_tree.h
+++ b/device_tree.h
@@ -23,5 +23,6 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
 int qemu_devtree_setprop_string(void *fdt, const char *node_path,
                                 const char *property, const char *string);
 int qemu_devtree_nop_node(void *fdt, const char *node_path);
+int qemu_devtree_add_subnode(void *fdt, const char *name);
 
 #endif /* __DEVICE_TREE_H__ */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 27/58] device tree: dont fail operations
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (25 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 26/58] device tree: add add_subnode command Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 28/58] device tree: give dt more size Alexander Graf
                   ` (30 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
When we screw up and issue an FDT command that doesn't work, we really need to
know immediately and usually can't continue to create the machine. To make sure
we don't need to add error checking in all device tree modification code users,
we can just add the fail checks to the qemu abstract functions.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 device_tree.c |   76 ++++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 51 insertions(+), 25 deletions(-)
diff --git a/device_tree.c b/device_tree.c
index f4a78c8..751538e 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -72,56 +72,81 @@ fail:
     return NULL;
 }
 
-int qemu_devtree_setprop(void *fdt, const char *node_path,
-                         const char *property, void *val_array, int size)
+static int findnode_nofail(void *fdt, const char *node_path)
 {
     int offset;
 
     offset = fdt_path_offset(fdt, node_path);
-    if (offset < 0)
-        return offset;
+    if (offset < 0) {
+        fprintf(stderr, "%s Couldn't find node %s: %s\n", __func__, node_path,
+                fdt_strerror(offset));
+        exit(1);
+    }
+
+    return offset;
+}
+
+int qemu_devtree_setprop(void *fdt, const char *node_path,
+                         const char *property, void *val_array, int size)
+{
+    int r;
+
+    r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val_array, size);
+    if (r < 0) {
+        fprintf(stderr, "%s: Couldn't set %s/%s: %s\n", __func__, node_path,
+                property, fdt_strerror(r));
+        exit(1);
+    }
 
-    return fdt_setprop(fdt, offset, property, val_array, size);
+    return r;
 }
 
 int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
                               const char *property, uint32_t val)
 {
-    int offset;
+    int r;
 
-    offset = fdt_path_offset(fdt, node_path);
-    if (offset < 0)
-        return offset;
+    r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val);
+    if (r < 0) {
+        fprintf(stderr, "%s: Couldn't set %s/%s = %#08x: %s\n", __func__,
+                node_path, property, val, fdt_strerror(r));
+        exit(1);
+    }
 
-    return fdt_setprop_cell(fdt, offset, property, val);
+    return r;
 }
 
 int qemu_devtree_setprop_string(void *fdt, const char *node_path,
                                 const char *property, const char *string)
 {
-    int offset;
+    int r;
 
-    offset = fdt_path_offset(fdt, node_path);
-    if (offset < 0)
-        return offset;
+    r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string);
+    if (r < 0) {
+        fprintf(stderr, "%s: Couldn't set %s/%s = %s: %s\n", __func__,
+                node_path, property, string, fdt_strerror(r));
+        exit(1);
+    }
 
-    return fdt_setprop_string(fdt, offset, property, string);
+    return r;
 }
 
 int qemu_devtree_nop_node(void *fdt, const char *node_path)
 {
-    int offset;
+    int r;
 
-    offset = fdt_path_offset(fdt, node_path);
-    if (offset < 0)
-        return offset;
+    r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path));
+    if (r < 0) {
+        fprintf(stderr, "%s: Couldn't nop node %s: %s\n", __func__, node_path,
+                fdt_strerror(r));
+        exit(1);
+    }
 
-    return fdt_nop_node(fdt, offset);
+    return r;
 }
 
 int qemu_devtree_add_subnode(void *fdt, const char *name)
 {
-    int offset;
     char *dupname = g_strdup(name);
     char *basename = strrchr(dupname, '/');
     int retval;
@@ -133,12 +158,13 @@ int qemu_devtree_add_subnode(void *fdt, const char *name)
     basename[0] = '\0';
     basename++;
 
-    offset = fdt_path_offset(fdt, dupname);
-    if (offset < 0) {
-        return offset;
+    retval = fdt_add_subnode(fdt, findnode_nofail(fdt, dupname), basename);
+    if (retval < 0) {
+        fprintf(stderr, "FDT: Failed to create subnode %s: %s\n", name,
+                fdt_strerror(retval));
+        exit(1);
     }
 
-    retval = fdt_add_subnode(fdt, offset, basename);
     g_free(dupname);
     return retval;
 }
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 28/58] device tree: give dt more size
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (26 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 27/58] device tree: dont fail operations Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-15  3:19   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 29/58] MPC8544DS: Remove CPU nodes Alexander Graf
                   ` (29 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We currently load a device tree blob and then just take its size x2 to
account for modifications we do inside. While this is nice and great,
it fails when we have a small device tree as blob and lots of nodes added
in machine init code.
So for now, just make it 20k bigger than it was before. We maybe want to
be more clever about this later.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 device_tree.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/device_tree.c b/device_tree.c
index 751538e..dc69232 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -41,6 +41,7 @@ void *load_device_tree(const char *filename_path, int *sizep)
     }
 
     /* Expand to 2x size to give enough room for manipulation.  */
+    dt_size += 10000;
     dt_size *= 2;
     /* First allocate space in qemu for device tree */
     fdt = g_malloc0(dt_size);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/58] device tree: give dt more size
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 28/58] device tree: give dt more size Alexander Graf
@ 2011-09-15  3:19   ` David Gibson
  2011-09-15  7:37     ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-15  3:19 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers
On Wed, Sep 14, 2011 at 10:42:52AM +0200, Alexander Graf wrote:
> We currently load a device tree blob and then just take its size x2 to
> account for modifications we do inside. While this is nice and great,
> it fails when we have a small device tree as blob and lots of nodes added
> in machine init code.
> 
> So for now, just make it 20k bigger than it was before. We maybe want to
> be more clever about this later.
In fact, one of the few things I can think of that might justify
qemu's "abstraction" of the libfdt interface, is that the wrappers
could be modified to detect -FDT_ERR_NOSPACE and realloc()
appropriately.
Otherwise the wrappers, which are limited and not notably simpler to
use than the raw libfdt functions seem pretty pointless to me.
Not that I'm biased as the author of libfdt or anything :).
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/58] device tree: give dt more size
  2011-09-15  3:19   ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2011-09-15  7:37     ` Alexander Graf
  2011-09-15 11:03       ` David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-15  7:37 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc, qemu-devel Developers
On 15.09.2011, at 05:19, David Gibson wrote:
> On Wed, Sep 14, 2011 at 10:42:52AM +0200, Alexander Graf wrote:
>> We currently load a device tree blob and then just take its size x2 to
>> account for modifications we do inside. While this is nice and great,
>> it fails when we have a small device tree as blob and lots of nodes added
>> in machine init code.
>> 
>> So for now, just make it 20k bigger than it was before. We maybe want to
>> be more clever about this later.
> 
> In fact, one of the few things I can think of that might justify
> qemu's "abstraction" of the libfdt interface, is that the wrappers
> could be modified to detect -FDT_ERR_NOSPACE and realloc()
> appropriately.
Oh, yeah, that sounds like a very good idea!
> Otherwise the wrappers, which are limited and not notably simpler to
> use than the raw libfdt functions seem pretty pointless to me.
> 
> Not that I'm biased as the author of libfdt or anything :).
I agree that the wrappers are not all that overly useful atm. I was actually very close to just ripping them out completely instead of extending them for new functionality. I did have the feeling that wrapping libfdt would give us a few benefits, maybe even the chance of getting rid of #ifdefs in target code.
Could you please put this on your todo list? We should probably force every target code in QEMU to only use the wrappers and dynamically realloc() in them.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/58] device tree: give dt more size
  2011-09-15  7:37     ` Alexander Graf
@ 2011-09-15 11:03       ` David Gibson
  2011-09-15 15:00         ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-15 11:03 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers
On Thu, Sep 15, 2011 at 09:37:48AM +0200, Alexander Graf wrote:
> 
> On 15.09.2011, at 05:19, David Gibson wrote:
> 
> > On Wed, Sep 14, 2011 at 10:42:52AM +0200, Alexander Graf wrote:
> >> We currently load a device tree blob and then just take its size x2 to
> >> account for modifications we do inside. While this is nice and great,
> >> it fails when we have a small device tree as blob and lots of nodes added
> >> in machine init code.
> >> 
> >> So for now, just make it 20k bigger than it was before. We maybe want to
> >> be more clever about this later.
> > 
> > In fact, one of the few things I can think of that might justify
> > qemu's "abstraction" of the libfdt interface, is that the wrappers
> > could be modified to detect -FDT_ERR_NOSPACE and realloc()
> > appropriately.
> 
> Oh, yeah, that sounds like a very good idea!
> 
> > Otherwise the wrappers, which are limited and not notably simpler to
> > use than the raw libfdt functions seem pretty pointless to me.
> > 
> > Not that I'm biased as the author of libfdt or anything :).
> 
> I agree that the wrappers are not all that overly useful atm. I was
> actually very close to just ripping them out completely instead of
> extending them for new functionality. I did have the feeling that
> wrapping libfdt would give us a few benefits, maybe even the chance
> of getting rid of #ifdefs in target code.
Hrm, maybe.  Can't really see it.  Of course, my preference would be
to get rid of those #ifdefs by embedding libfdt in qemu so it's always
there.
> Could you please put this on your todo list? We should probably
> force every target code in QEMU to only use the wrappers and
> dynamically realloc() in them.
Uh, sure, but it's a long list and it won't be near the top.
The wrappers would need to be a lot more extensive to do this.  I use
libfdt directly in the spapr code for a reason.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/58] device tree: give dt more size
  2011-09-15 11:03       ` David Gibson
@ 2011-09-15 15:00         ` Alexander Graf
  2011-09-16  1:49           ` David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-15 15:00 UTC (permalink / raw)
  To: David Gibson; +Cc: qemu-ppc@nongnu.org, qemu-devel Developers
Am 15.09.2011 um 13:03 schrieb David Gibson <david@gibson.dropbear.id.au>:
> On Thu, Sep 15, 2011 at 09:37:48AM +0200, Alexander Graf wrote:
>> 
>> On 15.09.2011, at 05:19, David Gibson wrote:
>> 
>>> On Wed, Sep 14, 2011 at 10:42:52AM +0200, Alexander Graf wrote:
>>>> We currently load a device tree blob and then just take its size x2 to
>>>> account for modifications we do inside. While this is nice and great,
>>>> it fails when we have a small device tree as blob and lots of nodes added
>>>> in machine init code.
>>>> 
>>>> So for now, just make it 20k bigger than it was before. We maybe want to
>>>> be more clever about this later.
>>> 
>>> In fact, one of the few things I can think of that might justify
>>> qemu's "abstraction" of the libfdt interface, is that the wrappers
>>> could be modified to detect -FDT_ERR_NOSPACE and realloc()
>>> appropriately.
>> 
>> Oh, yeah, that sounds like a very good idea!
>> 
>>> Otherwise the wrappers, which are limited and not notably simpler to
>>> use than the raw libfdt functions seem pretty pointless to me.
>>> 
>>> Not that I'm biased as the author of libfdt or anything :).
>> 
>> I agree that the wrappers are not all that overly useful atm. I was
>> actually very close to just ripping them out completely instead of
>> extending them for new functionality. I did have the feeling that
>> wrapping libfdt would give us a few benefits, maybe even the chance
>> of getting rid of #ifdefs in target code.
> 
> Hrm, maybe.  Can't really see it.  Of course, my preference would be
> to get rid of those #ifdefs by embedding libfdt in qemu so it's always
> there.
It's a library and should be treated that way. But yeah, I'm inclined to fail configure for ppc when libfdt can't be found too.
> 
>> Could you please put this on your todo list? We should probably
>> force every target code in QEMU to only use the wrappers and
>> dynamically realloc() in them.
> 
> Uh, sure, but it's a long list and it won't be near the top.
> 
> The wrappers would need to be a lot more extensive to do this.  I use
> libfdt directly in the spapr code for a reason.
Expensiveness is not too bad here, no? It should still be fast.
Also, I'm perfectly fine with this being low on the list.
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 28/58] device tree: give dt more size
  2011-09-15 15:00         ` Alexander Graf
@ 2011-09-16  1:49           ` David Gibson
  0 siblings, 0 replies; 128+ messages in thread
From: David Gibson @ 2011-09-16  1:49 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc@nongnu.org, qemu-devel Developers
On Thu, Sep 15, 2011 at 05:00:41PM +0200, Alexander Graf wrote:
> 
> Am 15.09.2011 um 13:03 schrieb David Gibson <david@gibson.dropbear.id.au>:
> 
> > On Thu, Sep 15, 2011 at 09:37:48AM +0200, Alexander Graf wrote:
> >> 
> >> On 15.09.2011, at 05:19, David Gibson wrote:
> >> 
> >>> On Wed, Sep 14, 2011 at 10:42:52AM +0200, Alexander Graf wrote:
> >>>> We currently load a device tree blob and then just take its size x2 to
> >>>> account for modifications we do inside. While this is nice and great,
> >>>> it fails when we have a small device tree as blob and lots of nodes added
> >>>> in machine init code.
> >>>> 
> >>>> So for now, just make it 20k bigger than it was before. We maybe want to
> >>>> be more clever about this later.
> >>> 
> >>> In fact, one of the few things I can think of that might justify
> >>> qemu's "abstraction" of the libfdt interface, is that the wrappers
> >>> could be modified to detect -FDT_ERR_NOSPACE and realloc()
> >>> appropriately.
> >> 
> >> Oh, yeah, that sounds like a very good idea!
> >> 
> >>> Otherwise the wrappers, which are limited and not notably simpler to
> >>> use than the raw libfdt functions seem pretty pointless to me.
> >>> 
> >>> Not that I'm biased as the author of libfdt or anything :).
> >> 
> >> I agree that the wrappers are not all that overly useful atm. I was
> >> actually very close to just ripping them out completely instead of
> >> extending them for new functionality. I did have the feeling that
> >> wrapping libfdt would give us a few benefits, maybe even the chance
> >> of getting rid of #ifdefs in target code.
> > 
> > Hrm, maybe.  Can't really see it.  Of course, my preference would be
> > to get rid of those #ifdefs by embedding libfdt in qemu so it's always
> > there.
> 
> It's a library and should be treated that way.
What the hell does that mean.  It's a library, and things are easier
when you can count on it being there.  Embedding means we can do that
without adding an external dependency.  And we already use git
submodules, so that provides an easy way of embedding.
> But yeah, I'm
> inclined to fail configure for ppc when libfdt can't be found too.
> 
> >> Could you please put this on your todo list? We should probably
> >> force every target code in QEMU to only use the wrappers and
> >> dynamically realloc() in them.
> > 
> > Uh, sure, but it's a long list and it won't be near the top.
> > 
> > The wrappers would need to be a lot more extensive to do this.  I use
> > libfdt directly in the spapr code for a reason.
> 
> Expensiveness is not too bad here, no? It should still be fast.
Uh, how did expensiveness get into this?  I'm talking about the sheer
number of potentially useful interfaces that would have to be wrapped
with boilerplate code.
> Also, I'm perfectly fine with this being low on the list.
Also note that handling -FDT_ERR_NOSPACE only applies to write
functions.  I still see little point in wrapping the read only
functions.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
 
 
- * [Qemu-devel] [PATCH 29/58] MPC8544DS: Remove CPU nodes
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (27 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 28/58] device tree: give dt more size Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 30/58] MPC8544DS: Generate CPU nodes on init Alexander Graf
                   ` (28 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We want to generate the CPU nodes in machine init code, so remove them from
the device tree definition that we precompile.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/mpc8544ds.dtb |  Bin 2277 -> 2028 bytes
 pc-bios/mpc8544ds.dts |   12 ------------
 2 files changed, 0 insertions(+), 12 deletions(-)
diff --git a/pc-bios/mpc8544ds.dtb b/pc-bios/mpc8544ds.dtb
index ae318b1fe83846cc2e133951a3666fcfcdf87f79..c6d302153c7407d5d0127be29b0c35f80e47f8fb 100644
GIT binary patch
delta 424
zcmaDV_=aEO0`I@K3=HgV7#J8V7#P?t0BH>%76f7eAO-?P8KC%#jT*{~lRq;qVGNu+
zgGpO80wTx2Se#mvnV92XVrpOj5@H5o79dUoaVFO=n@yHu7E~<+@qhp%%K^lVK&%DC
zOh63N(K9)OS(!0yas{(Dk?LOn)z6*G!y?7RuxYXeOPCPDVW4@8NM@d#Jb@*NiQ(ep
zFD&Xnqh(mF<QVQVF#G^2fdh6R3*?3eK>TTP3F~Xi9v};53e2?<KrxtWfnp$OF!&E7
zLApVn2ZjurwO}FhPlr`dQBX*1n*4+<n2~GZ2Ia}o?0Nb{iFxU%#SBTM#ky%lsfDGf
edC8Rw$*DOxx|w+?sTB;#Ir+)i2&u`Q9CHADZ%Xk1
delta 636
zcmaFE|5Q-p0`I@K3=AAk85kHW7#P?qfV2h>3j(nK5CZ{YE>PTIqlPkLJ!3$Ad1_IB
zvyO$SiHU;&Seh9~vH-DTazQCb0LJ$Paex5E4+OFmkod`He4yqApb%Vr6B@stfk6!<
z4_B}V%tP=uK>19QJs6iW9+>=rQJZnmWEm!T#^aN1n7mbC@*oFs0P!Ut)&gQCAci^e
z?&LL0%0TrOh*s~wtStKu$plcqfdJG*M&`*4%wa-|B0wQVBw?w^FPM{<7?mdbu&4v=
zD`Bx>Vl<e%fkldm(RuP2me-bdku<<uD*+UPxfUqK2ntdV_z%P&`#=!_^f#-u;0ETO
z4y&M|z{ml*!iFuFF?#X@wu$vAy2**j8L7HCnR%(Y#hF#944D`rFf}OBU`|P9Zfa6u
zajI@wQEFjnYF=_BLsDrm5-&L?KRFwTUzC`ao?6V1oSKuPo0*rA%2+WufPD@C{)cX6
diff --git a/pc-bios/mpc8544ds.dts b/pc-bios/mpc8544ds.dts
index a88b47c..7eb3160 100644
--- a/pc-bios/mpc8544ds.dts
+++ b/pc-bios/mpc8544ds.dts
@@ -25,18 +25,6 @@
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
-
-		PowerPC,8544@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			d-cache-line-size = <32>;	// 32 bytes
-			i-cache-line-size = <32>;	// 32 bytes
-			d-cache-size = <0x8000>;		// L1, 32K
-			i-cache-size = <0x8000>;		// L1, 32K
-			timebase-frequency = <0>;
-			bus-frequency = <0>;
-			clock-frequency = <0>;
-		};
 	};
 
 	memory {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 30/58] MPC8544DS: Generate CPU nodes on init
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (28 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 29/58] MPC8544DS: Remove CPU nodes Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 31/58] PPC: E500: Bump CPU count to 15 Alexander Graf
                   ` (27 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
With this patch, we generate CPU nodes in the machine initialization, giving
us the freedom to generate as many nodes as we want and as the machine supports,
but only those.
This is a first step towards a much cleaner device tree generation
infrastructure, where we would not require precompiled dtb blobs anymore.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |   46 +++++++++++++++++++++++++++++++++-------------
 1 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index a3e1ce4..dfa8034 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -123,23 +123,43 @@ static int mpc8544_load_device_tree(CPUState *env,
                              hypercall, sizeof(hypercall));
     }
 
-    for (i = 0; i < smp_cpus; i++) {
+    /* We need to generate the cpu nodes in reverse order, so Linux can pick
+       the first node as boot node and be happy */
+    for (i = smp_cpus - 1; i >= 0; i--) {
         char cpu_name[128];
-        uint64_t cpu_release_addr[] = {
-            cpu_to_be64(MPC8544_SPIN_BASE + (i * 0x20))
-        };
+        uint64_t cpu_release_addr = cpu_to_be64(MPC8544_SPIN_BASE + (i * 0x20));
+
+        for (env = first_cpu; env != NULL; env = env->next_cpu) {
+            if (env->cpu_index == i) {
+                break;
+            }
+        }
+
+        if (!env) {
+            continue;
+        }
 
-        snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", i);
+        snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", env->cpu_index);
+        qemu_devtree_add_subnode(fdt, cpu_name);
         qemu_devtree_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
         qemu_devtree_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
-        qemu_devtree_setprop(fdt, cpu_name, "cpu-release-addr",
-                             cpu_release_addr, sizeof(cpu_release_addr));
-    }
-
-    for (i = smp_cpus; i < 32; i++) {
-        char cpu_name[128];
-        snprintf(cpu_name, sizeof(cpu_name), "/cpus/PowerPC,8544@%x", i);
-        qemu_devtree_nop_node(fdt, cpu_name);
+        qemu_devtree_setprop_string(fdt, cpu_name, "device_type", "cpu");
+        qemu_devtree_setprop_cell(fdt, cpu_name, "reg", env->cpu_index);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "d-cache-line-size",
+                                  env->dcache_line_size);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "i-cache-line-size",
+                                  env->icache_line_size);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "d-cache-size", 0x8000);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "i-cache-size", 0x8000);
+        qemu_devtree_setprop_cell(fdt, cpu_name, "bus-frequency", 0);
+        if (env->cpu_index) {
+            qemu_devtree_setprop_string(fdt, cpu_name, "status", "disabled");
+            qemu_devtree_setprop_string(fdt, cpu_name, "enable-method", "spin-table");
+            qemu_devtree_setprop(fdt, cpu_name, "cpu-release-addr",
+                                 &cpu_release_addr, sizeof(cpu_release_addr));
+        } else {
+            qemu_devtree_setprop_string(fdt, cpu_name, "status", "okay");
+        }
     }
 
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 31/58] PPC: E500: Bump CPU count to 15
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (29 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 30/58] MPC8544DS: Generate CPU nodes on init Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 32/58] PPC: Add new target config for pseries Alexander Graf
                   ` (26 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Now that we have everything in place, make the machine description
aware of the fact that we can now handle 15 virtual CPUs!
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - Max cpus is 15 because of MPIC
---
 hw/ppce500_mpc8544ds.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index dfa8034..b86a008 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -396,6 +396,7 @@ static QEMUMachine mpc8544ds_machine = {
     .name = "mpc8544ds",
     .desc = "mpc8544ds",
     .init = mpc8544ds_init,
+    .max_cpus = 15,
 };
 
 static void mpc8544ds_machine_init(void)
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 32/58] PPC: Add new target config for pseries
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (30 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 31/58] PPC: E500: Bump CPU count to 15 Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-15  3:20   ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 33/58] KVM: update kernel headers Alexander Graf
                   ` (25 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
We only support -M pseries when certain prerequisites are met, such
as a PPC64 guest and libfdt. To only gather these requirements in
a single place, this patch introduces a new CONFIG_PSERIES variable
that gets set when all prerequisites are met.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Makefile.target |    6 ++----
 configure       |    3 +++
 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/Makefile.target b/Makefile.target
index 3f689ce..7160b35 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -239,10 +239,8 @@ obj-ppc-y += ppc_oldworld.o
 # NewWorld PowerMac
 obj-ppc-y += ppc_newworld.o
 # IBM pSeries (sPAPR)
-ifeq ($(CONFIG_FDT)$(TARGET_PPC64),yy)
-obj-ppc-y += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
-obj-ppc-y += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
-endif
+obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
+obj-ppc-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 # PowerPC 4xx boards
 obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-ppc-y += ppc440.o ppc440_bamboo.o
diff --git a/configure b/configure
index 0875f95..d59fbd5 100755
--- a/configure
+++ b/configure
@@ -3402,6 +3402,9 @@ case "$target_arch2" in
       fi
     fi
 esac
+if test "$target_arch2" = "ppc64" -a "$fdt" = "yes"; then
+  echo "CONFIG_PSERIES=y" >> $config_target_mak
+fi
 if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 32/58] PPC: Add new target config for pseries
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 32/58] PPC: Add new target config for pseries Alexander Graf
@ 2011-09-15  3:20   ` David Gibson
  0 siblings, 0 replies; 128+ messages in thread
From: David Gibson @ 2011-09-15  3:20 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers
On Wed, Sep 14, 2011 at 10:42:56AM +0200, Alexander Graf wrote:
> We only support -M pseries when certain prerequisites are met, such
> as a PPC64 guest and libfdt. To only gather these requirements in
> a single place, this patch introduces a new CONFIG_PSERIES variable
> that gets set when all prerequisites are met.
Oh, excellent.  Another thing taken off my todo list.  This gets even
more important when the IOMMU stuff adds another prereq.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
- * [Qemu-devel] [PATCH 33/58] KVM: update kernel headers
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (31 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 32/58] PPC: Add new target config for pseries Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-17 16:59   ` Blue Swirl
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 34/58] PPC: Enable to use PAPR with PR style KVM Alexander Graf
                   ` (24 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
This patch updates the kvm kernel headers to the latest version.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
   |   23 +++++++++++++++++++++++
  |   14 ++++++++++++++
         |   25 +++++++++++++++++--------
    |    1 +
 4 files changed, 55 insertions(+), 8 deletions(-)
 --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 777d307..579e219 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -22,6 +22,10 @@
 
 #include <linux/types.h>
 
+/* Select powerpc specific features in <linux/kvm.h> */
+#define __KVM_HAVE_SPAPR_TCE
+#define __KVM_HAVE_PPC_SMT
+
 struct kvm_regs {
 	__u64 pc;
 	__u64 cr;
@@ -145,6 +149,12 @@ struct kvm_regs {
 #define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
 
 /*
+ * Book3S special bits to indicate contents in the struct by maintaining
+ * backwards compatibility with older structs. If adding a new field,
+ * please make sure to add a flag for that new field */
+#define KVM_SREGS_S_HIOR		(1 << 0)
+
+/*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
  *
@@ -169,6 +179,8 @@ struct kvm_sregs {
 				__u64 ibat[8];
 				__u64 dbat[8];
 			} ppc32;
+			__u64 flags; /* KVM_SREGS_S_ */
+			__u64 hior;
 		} s;
 		struct {
 			union {
@@ -272,4 +284,15 @@ struct kvm_guest_debug_arch {
 #define KVM_INTERRUPT_UNSET	-2U
 #define KVM_INTERRUPT_SET_LEVEL	-3U
 
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+	__u64 liobn;
+	__u32 window_size;
+};
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+	__u64 rma_size;
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
 --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index 834d71e..f2ac46a 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -21,6 +21,7 @@
  */
 #define KVM_FEATURE_CLOCKSOURCE2        3
 #define KVM_FEATURE_ASYNC_PF		4
+#define KVM_FEATURE_STEAL_TIME		5
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -30,10 +31,23 @@
 #define MSR_KVM_WALL_CLOCK  0x11
 #define MSR_KVM_SYSTEM_TIME 0x12
 
+#define KVM_MSR_ENABLED 1
 /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
 #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_STEAL_TIME  0x4b564d03
+
+struct kvm_steal_time {
+	__u64 steal;
+	__u32 version;
+	__u32 flags;
+	__u32 pad[12];
+};
+
+#define KVM_STEAL_ALIGNMENT_BITS 5
+#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
+#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
 
 #define KVM_MAX_MMU_OP_BATCH           32
 
 --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index fc63b73..2062375 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
+#define KVM_EXIT_PAPR_HCALL	  19
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -264,6 +265,11 @@ struct kvm_run {
 		struct {
 			__u64 gprs[32];
 		} osi;
+		struct {
+			__u64 nr;
+			__u64 ret;
+			__u64 args[9];
+		} papr_hcall;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_VAPIC 6
 #define KVM_CAP_EXT_CPUID 7
 #define KVM_CAP_CLOCKSOURCE 8
-#define KVM_CAP_NR_VCPUS 9       /* returns max vcpus per vm */
+#define KVM_CAP_NR_VCPUS 9       /* returns recommended max vcpus per vm */
 #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
 #define KVM_CAP_PIT 11
 #define KVM_CAP_NOP_IO_DELAY 12
@@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_TSC_CONTROL 60
 #define KVM_CAP_GET_TSC_KHZ 61
 #define KVM_CAP_PPC_BOOKE_SREGS 62
+#define KVM_CAP_SPAPR_TCE 63
+#define KVM_CAP_PPC_SMT 64
+#define KVM_CAP_PPC_RMA	65
+#define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
+#define KVM_CAP_PPC_HIOR 67
+#define KVM_CAP_PPC_PAPR 68
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -746,6 +758,9 @@ struct kvm_clock_data {
 /* Available with KVM_CAP_XCRS */
 #define KVM_GET_XCRS		  _IOR(KVMIO,  0xa6, struct kvm_xcrs)
 #define KVM_SET_XCRS		  _IOW(KVMIO,  0xa7, struct kvm_xcrs)
+#define KVM_CREATE_SPAPR_TCE	  _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
+/* Available with KVM_CAP_RMA */
+#define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
@@ -773,20 +788,14 @@ struct kvm_assigned_pci_dev {
 
 struct kvm_assigned_irq {
 	__u32 assigned_dev_id;
-	__u32 host_irq;
+	__u32 host_irq; /* ignored (legacy field) */
 	__u32 guest_irq;
 	__u32 flags;
 	union {
-		struct {
-			__u32 addr_lo;
-			__u32 addr_hi;
-			__u32 data;
-		} guest_msi;
 		__u32 reserved[12];
 	};
 };
 
-
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
 	__u16 entry_nr;
 --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index 7bdcf93..b315e27 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -26,3 +26,4 @@
 #include <asm/kvm_para.h>
 
 #endif /* __LINUX_KVM_PARA_H */
+
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 33/58] KVM: update kernel headers Alexander Graf
@ 2011-09-17 16:59   ` Blue Swirl
  2011-09-17 17:17     ` Alexander Graf
  2011-09-19 17:50     ` [Qemu-devel] [Qemu-ppc] " Scott Wood
  0 siblings, 2 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-17 16:59 UTC (permalink / raw)
  To: Alexander Graf; +Cc: qemu-ppc, qemu-devel Developers, Aurelien Jarno
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
> This patch updates the kvm kernel headers to the latest version.
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  linux-headers/asm-powerpc/kvm.h  |   23 +++++++++++++++++++++++
>  linux-headers/asm-x86/kvm_para.h |   14 ++++++++++++++
>  linux-headers/linux/kvm.h        |   25 +++++++++++++++++--------
>  linux-headers/linux/kvm_para.h   |    1 +
>  4 files changed, 55 insertions(+), 8 deletions(-)
>
> diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
> index 777d307..579e219 100644
> --- a/linux-headers/asm-powerpc/kvm.h
> +++ b/linux-headers/asm-powerpc/kvm.h
> @@ -22,6 +22,10 @@
>
>  #include <linux/types.h>
>
> +/* Select powerpc specific features in <linux/kvm.h> */
> +#define __KVM_HAVE_SPAPR_TCE
> +#define __KVM_HAVE_PPC_SMT
> +
>  struct kvm_regs {
>        __u64 pc;
>        __u64 cr;
> @@ -145,6 +149,12 @@ struct kvm_regs {
>  #define KVM_SREGS_E_UPDATE_DBSR                (1 << 3)
>
>  /*
> + * Book3S special bits to indicate contents in the struct by maintaining
> + * backwards compatibility with older structs. If adding a new field,
> + * please make sure to add a flag for that new field */
> +#define KVM_SREGS_S_HIOR               (1 << 0)
> +
> +/*
>  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
>  * previous KVM_GET_REGS.
>  *
> @@ -169,6 +179,8 @@ struct kvm_sregs {
>                                __u64 ibat[8];
>                                __u64 dbat[8];
>                        } ppc32;
> +                       __u64 flags; /* KVM_SREGS_S_ */
> +                       __u64 hior;
>                } s;
>                struct {
>                        union {
> @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch {
>  #define KVM_INTERRUPT_UNSET    -2U
>  #define KVM_INTERRUPT_SET_LEVEL        -3U
>
> +/* for KVM_CAP_SPAPR_TCE */
> +struct kvm_create_spapr_tce {
> +       __u64 liobn;
> +       __u32 window_size;
> +};
> +
> +/* for KVM_ALLOCATE_RMA */
> +struct kvm_allocate_rma {
> +       __u64 rma_size;
> +};
> +
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
> index 834d71e..f2ac46a 100644
> --- a/linux-headers/asm-x86/kvm_para.h
> +++ b/linux-headers/asm-x86/kvm_para.h
> @@ -21,6 +21,7 @@
>  */
>  #define KVM_FEATURE_CLOCKSOURCE2        3
>  #define KVM_FEATURE_ASYNC_PF           4
> +#define KVM_FEATURE_STEAL_TIME         5
>
>  /* The last 8 bits are used to indicate how to interpret the flags field
>  * in pvclock structure. If no bits are set, all flags are ignored.
> @@ -30,10 +31,23 @@
>  #define MSR_KVM_WALL_CLOCK  0x11
>  #define MSR_KVM_SYSTEM_TIME 0x12
>
> +#define KVM_MSR_ENABLED 1
>  /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
>  #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
>  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
>  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> +#define MSR_KVM_STEAL_TIME  0x4b564d03
> +
> +struct kvm_steal_time {
> +       __u64 steal;
> +       __u32 version;
> +       __u32 flags;
> +       __u32 pad[12];
> +};
> +
> +#define KVM_STEAL_ALIGNMENT_BITS 5
> +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
> +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
>
>  #define KVM_MAX_MMU_OP_BATCH           32
>
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index fc63b73..2062375 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -161,6 +161,7 @@ struct kvm_pit_config {
>  #define KVM_EXIT_NMI              16
>  #define KVM_EXIT_INTERNAL_ERROR   17
>  #define KVM_EXIT_OSI              18
> +#define KVM_EXIT_PAPR_HCALL      19
>
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  #define KVM_INTERNAL_ERROR_EMULATION 1
> @@ -264,6 +265,11 @@ struct kvm_run {
>                struct {
>                        __u64 gprs[32];
>                } osi;
> +               struct {
> +                       __u64 nr;
> +                       __u64 ret;
> +                       __u64 args[9];
> +               } papr_hcall;
>                /* Fix the size of the union. */
>                char padding[256];
>        };
> @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo {
>  #define KVM_CAP_VAPIC 6
>  #define KVM_CAP_EXT_CPUID 7
>  #define KVM_CAP_CLOCKSOURCE 8
> -#define KVM_CAP_NR_VCPUS 9       /* returns max vcpus per vm */
> +#define KVM_CAP_NR_VCPUS 9       /* returns recommended max vcpus per vm */
>  #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
>  #define KVM_CAP_PIT 11
>  #define KVM_CAP_NOP_IO_DELAY 12
> @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo {
>  #define KVM_CAP_TSC_CONTROL 60
>  #define KVM_CAP_GET_TSC_KHZ 61
>  #define KVM_CAP_PPC_BOOKE_SREGS 62
> +#define KVM_CAP_SPAPR_TCE 63
> +#define KVM_CAP_PPC_SMT 64
> +#define KVM_CAP_PPC_RMA        65
> +#define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
> +#define KVM_CAP_PPC_HIOR 67
> +#define KVM_CAP_PPC_PAPR 68
>
>  #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -746,6 +758,9 @@ struct kvm_clock_data {
>  /* Available with KVM_CAP_XCRS */
>  #define KVM_GET_XCRS             _IOR(KVMIO,  0xa6, struct kvm_xcrs)
>  #define KVM_SET_XCRS             _IOW(KVMIO,  0xa7, struct kvm_xcrs)
> +#define KVM_CREATE_SPAPR_TCE     _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
> +/* Available with KVM_CAP_RMA */
> +#define KVM_ALLOCATE_RMA         _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
>
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
>
> @@ -773,20 +788,14 @@ struct kvm_assigned_pci_dev {
>
>  struct kvm_assigned_irq {
>        __u32 assigned_dev_id;
> -       __u32 host_irq;
> +       __u32 host_irq; /* ignored (legacy field) */
>        __u32 guest_irq;
>        __u32 flags;
>        union {
> -               struct {
> -                       __u32 addr_lo;
> -                       __u32 addr_hi;
> -                       __u32 data;
> -               } guest_msi;
>                __u32 reserved[12];
>        };
>  };
>
> -
>  struct kvm_assigned_msix_nr {
>        __u32 assigned_dev_id;
>        __u16 entry_nr;
> diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
> index 7bdcf93..b315e27 100644
> --- a/linux-headers/linux/kvm_para.h
> +++ b/linux-headers/linux/kvm_para.h
> @@ -26,3 +26,4 @@
>  #include <asm/kvm_para.h>
>
>  #endif /* __LINUX_KVM_PARA_H */
> +
Can we avoid this?
> --
> 1.6.0.2
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers
  2011-09-17 16:59   ` Blue Swirl
@ 2011-09-17 17:17     ` Alexander Graf
  2011-09-19 17:50     ` [Qemu-devel] [Qemu-ppc] " Scott Wood
  1 sibling, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-17 17:17 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-ppc@nongnu.org, qemu-devel Developers, Aurelien Jarno
Am 17.09.2011 um 18:59 schrieb Blue Swirl <blauwirbel@gmail.com>:
> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>> This patch updates the kvm kernel headers to the latest version.
>> 
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> ---
>>  linux-headers/asm-powerpc/kvm.h  |   23 +++++++++++++++++++++++
>>  linux-headers/asm-x86/kvm_para.h |   14 ++++++++++++++
>>  linux-headers/linux/kvm.h        |   25 +++++++++++++++++--------
>>  linux-headers/linux/kvm_para.h   |    1 +
>>  4 files changed, 55 insertions(+), 8 deletions(-)
>> 
>> diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
>> index 777d307..579e219 100644
>> --- a/linux-headers/asm-powerpc/kvm.h
>> +++ b/linux-headers/asm-powerpc/kvm.h
>> @@ -22,6 +22,10 @@
>> 
>>  #include <linux/types.h>
>> 
>> +/* Select powerpc specific features in <linux/kvm.h> */
>> +#define __KVM_HAVE_SPAPR_TCE
>> +#define __KVM_HAVE_PPC_SMT
>> +
>>  struct kvm_regs {
>>        __u64 pc;
>>        __u64 cr;
>> @@ -145,6 +149,12 @@ struct kvm_regs {
>>  #define KVM_SREGS_E_UPDATE_DBSR                (1 << 3)
>> 
>>  /*
>> + * Book3S special bits to indicate contents in the struct by maintaining
>> + * backwards compatibility with older structs. If adding a new field,
>> + * please make sure to add a flag for that new field */
>> +#define KVM_SREGS_S_HIOR               (1 << 0)
>> +
>> +/*
>>  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
>>  * previous KVM_GET_REGS.
>>  *
>> @@ -169,6 +179,8 @@ struct kvm_sregs {
>>                                __u64 ibat[8];
>>                                __u64 dbat[8];
>>                        } ppc32;
>> +                       __u64 flags; /* KVM_SREGS_S_ */
>> +                       __u64 hior;
>>                } s;
>>                struct {
>>                        union {
>> @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch {
>>  #define KVM_INTERRUPT_UNSET    -2U
>>  #define KVM_INTERRUPT_SET_LEVEL        -3U
>> 
>> +/* for KVM_CAP_SPAPR_TCE */
>> +struct kvm_create_spapr_tce {
>> +       __u64 liobn;
>> +       __u32 window_size;
>> +};
>> +
>> +/* for KVM_ALLOCATE_RMA */
>> +struct kvm_allocate_rma {
>> +       __u64 rma_size;
>> +};
>> +
>>  #endif /* __LINUX_KVM_POWERPC_H */
>> diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
>> index 834d71e..f2ac46a 100644
>> --- a/linux-headers/asm-x86/kvm_para.h
>> +++ b/linux-headers/asm-x86/kvm_para.h
>> @@ -21,6 +21,7 @@
>>  */
>>  #define KVM_FEATURE_CLOCKSOURCE2 3
>>  #define KVM_FEATURE_ASYNC_PF           4
>> +#define KVM_FEATURE_STEAL_TIME         5
>> 
>>  /* The last 8 bits are used to indicate how to interpret the flags field
>>  * in pvclock structure. If no bits are set, all flags are ignored.
>> @@ -30,10 +31,23 @@
>>  #define MSR_KVM_WALL_CLOCK  0x11
>>  #define MSR_KVM_SYSTEM_TIME 0x12
>> 
>> +#define KVM_MSR_ENABLED 1
>>  /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
>>  #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
>>  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
>>  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
>> +#define MSR_KVM_STEAL_TIME  0x4b564d03
>> +
>> +struct kvm_steal_time {
>> +       __u64 steal;
>> +       __u32 version;
>> +       __u32 flags;
>> +       __u32 pad[12];
>> +};
>> +
>> +#define KVM_STEAL_ALIGNMENT_BITS 5
>> +#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
>> +#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
>> 
>>  #define KVM_MAX_MMU_OP_BATCH           32
>> 
>> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
>> index fc63b73..2062375 100644
>> --- a/linux-headers/linux/kvm.h
>> +++ b/linux-headers/linux/kvm.h
>> @@ -161,6 +161,7 @@ struct kvm_pit_config {
>>  #define KVM_EXIT_NMI              16
>>  #define KVM_EXIT_INTERNAL_ERROR   17
>>  #define KVM_EXIT_OSI              18
>> +#define KVM_EXIT_PAPR_HCALL      19
>> 
>>  /* For KVM_EXIT_INTERNAL_ERROR */
>>  #define KVM_INTERNAL_ERROR_EMULATION 1
>> @@ -264,6 +265,11 @@ struct kvm_run {
>>                struct {
>>                        __u64 gprs[32];
>>                } osi;
>> +               struct {
>> +                       __u64 nr;
>> +                       __u64 ret;
>> +                       __u64 args[9];
>> +               } papr_hcall;
>>                /* Fix the size of the union. */
>>                char padding[256];
>>        };
>> @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo {
>>  #define KVM_CAP_VAPIC 6
>>  #define KVM_CAP_EXT_CPUID 7
>>  #define KVM_CAP_CLOCKSOURCE 8
>> -#define KVM_CAP_NR_VCPUS 9       /* returns max vcpus per vm */
>> +#define KVM_CAP_NR_VCPUS 9       /* returns recommended max vcpus per vm */
>>  #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
>>  #define KVM_CAP_PIT 11
>>  #define KVM_CAP_NOP_IO_DELAY 12
>> @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo {
>>  #define KVM_CAP_TSC_CONTROL 60
>>  #define KVM_CAP_GET_TSC_KHZ 61
>>  #define KVM_CAP_PPC_BOOKE_SREGS 62
>> +#define KVM_CAP_SPAPR_TCE 63
>> +#define KVM_CAP_PPC_SMT 64
>> +#define KVM_CAP_PPC_RMA        65
>> +#define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
>> +#define KVM_CAP_PPC_HIOR 67
>> +#define KVM_CAP_PPC_PAPR 68
>> 
>>  #ifdef KVM_CAP_IRQ_ROUTING
>> 
>> @@ -746,6 +758,9 @@ struct kvm_clock_data {
>>  /* Available with KVM_CAP_XCRS */
>>  #define KVM_GET_XCRS             _IOR(KVMIO,  0xa6, struct kvm_xcrs)
>>  #define KVM_SET_XCRS             _IOW(KVMIO,  0xa7, struct kvm_xcrs)
>> +#define KVM_CREATE_SPAPR_TCE     _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
>> +/* Available with KVM_CAP_RMA */
>> +#define KVM_ALLOCATE_RMA         _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
>> 
>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
>> 
>> @@ -773,20 +788,14 @@ struct kvm_assigned_pci_dev {
>> 
>>  struct kvm_assigned_irq {
>>        __u32 assigned_dev_id;
>> -       __u32 host_irq;
>> +       __u32 host_irq; /* ignored (legacy field) */
>>        __u32 guest_irq;
>>        __u32 flags;
>>        union {
>> -               struct {
>> -                       __u32 addr_lo;
>> -                       __u32 addr_hi;
>> -                       __u32 data;
>> -               } guest_msi;
>>                __u32 reserved[12];
>>        };
>>  };
>> 
>> -
>>  struct kvm_assigned_msix_nr {
>>        __u32 assigned_dev_id;
>>        __u16 entry_nr;
>> diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
>> index 7bdcf93..b315e27 100644
>> --- a/linux-headers/linux/kvm_para.h
>> +++ b/linux-headers/linux/kvm_para.h
>> @@ -26,3 +26,4 @@
>>  #include <asm/kvm_para.h>
>> 
>>  #endif /* __LINUX_KVM_PARA_H */
>> +
> 
> Can we avoid this?
It's a conflict generated by how the real header files look like and what whitespace fixing makes of them.
I suppose the best way to fix this would be to also whitespace-fix the Linux headers :).
Will send a patch.
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 33/58] KVM: update kernel headers
  2011-09-17 16:59   ` Blue Swirl
  2011-09-17 17:17     ` Alexander Graf
@ 2011-09-19 17:50     ` Scott Wood
  2011-09-19 17:50       ` Alexander Graf
  1 sibling, 1 reply; 128+ messages in thread
From: Scott Wood @ 2011-09-19 17:50 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-ppc, Alexander Graf, qemu-devel Developers
On 09/17/2011 11:59 AM, Blue Swirl wrote:
> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>> diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
>> index 7bdcf93..b315e27 100644
>> --- a/linux-headers/linux/kvm_para.h
>> +++ b/linux-headers/linux/kvm_para.h
>> @@ -26,3 +26,4 @@
>>  #include <asm/kvm_para.h>
>>
>>  #endif /* __LINUX_KVM_PARA_H */
>> +
> 
> Can we avoid this?
It could be fixed in the kernel, but I don't think we should be making
local changes to this in qemu.  It'll just get reintroduced the next
time somebody runs the update script.
-Scott
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 33/58] KVM: update kernel headers
  2011-09-19 17:50     ` [Qemu-devel] [Qemu-ppc] " Scott Wood
@ 2011-09-19 17:50       ` Alexander Graf
  0 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-19 17:50 UTC (permalink / raw)
  To: Scott Wood; +Cc: Blue Swirl, qemu-ppc, qemu-devel Developers
On 19.09.2011, at 19:50, Scott Wood wrote:
> On 09/17/2011 11:59 AM, Blue Swirl wrote:
>> On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <agraf@suse.de> wrote:
>>> diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
>>> index 7bdcf93..b315e27 100644
>>> --- a/linux-headers/linux/kvm_para.h
>>> +++ b/linux-headers/linux/kvm_para.h
>>> @@ -26,3 +26,4 @@
>>> #include <asm/kvm_para.h>
>>> 
>>> #endif /* __LINUX_KVM_PARA_H */
>>> +
>> 
>> Can we avoid this?
> 
> It could be fixed in the kernel, but I don't think we should be making
> local changes to this in qemu.  It'll just get reintroduced the next
> time somebody runs the update script.
Yeah, already got a patch for the kernel ready :)
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
- * [Qemu-devel] [PATCH 34/58] PPC: Enable to use PAPR with PR style KVM
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (32 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 33/58] KVM: update kernel headers Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 35/58] PPC: SPAPR: Use KVM function for time info Alexander Graf
                   ` (23 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
When running PR style KVM, we need to tell the kernel that we want
to run in PAPR mode now. This means that we need to pass some more
register information down and enable papr mode. We also need to align
the HTAB to htab_size boundary.
Using this patch, -M pseries works with kvm even on non-hv kvm
implementations, as long as the preceding kernel patches are in.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
  - match on CONFIG_PSERIES
v2 -> v3:
  - remove HIOR pieces from PAPR patch (ABI breakage)
---
 hw/spapr.c           |   14 +++++++++++++-
 target-ppc/kvm.c     |   40 ++++++++++++++++++++++++++++++++++++++++
 target-ppc/kvm_ppc.h |    5 +++++
 3 files changed, 58 insertions(+), 1 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 8cf93fe..c5c9a95 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -38,6 +38,9 @@
 #include "hw/spapr_vio.h"
 #include "hw/xics.h"
 
+#include "kvm.h"
+#include "kvm_ppc.h"
+
 #include <libfdt.h>
 
 #define KERNEL_LOAD_ADDR        0x00000000
@@ -336,12 +339,21 @@ static void ppc_spapr_init(ram_addr_t ram_size,
      * later we should probably make it scale to the size of guest
      * RAM */
     spapr->htab_size = 1ULL << (pteg_shift + 7);
-    spapr->htab = g_malloc(spapr->htab_size);
+    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         env->external_htab = spapr->htab;
         env->htab_base = -1;
         env->htab_mask = spapr->htab_size - 1;
+
+        /* Tell KVM that we're in PAPR mode */
+        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
+                             ((pteg_shift + 7) - 18);
+        env->spr[SPR_HIOR] = 0;
+
+        if (kvm_enabled()) {
+            kvmppc_set_papr(env);
+        }
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 77b98c4..f65b6e1 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -29,6 +29,10 @@
 #include "cpu.h"
 #include "device_tree.h"
 
+#include "hw/sysbus.h"
+#include "hw/spapr.h"
+#include "hw/spapr_vio.h"
+
 //#define DEBUG_KVM
 
 #ifdef DEBUG_KVM
@@ -455,6 +459,14 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
         dprintf("handle halt\n");
         ret = kvmppc_handle_halt(env);
         break;
+#ifdef CONFIG_PSERIES
+    case KVM_EXIT_PAPR_HCALL:
+        dprintf("handle PAPR hypercall\n");
+        run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
+                                              run->papr_hcall.args);
+        ret = 1;
+        break;
+#endif
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
@@ -606,6 +618,34 @@ int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
     return 0;
 }
 
+void kvmppc_set_papr(CPUState *env)
+{
+    struct kvm_enable_cap cap;
+    int ret;
+
+    memset(&cap, 0, sizeof(cap));
+    cap.cap = KVM_CAP_PPC_PAPR;
+    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
+
+    if (ret) {
+        goto fail;
+    }
+
+    /*
+     * XXX We set HIOR here. It really should be a qdev property of
+     *     the CPU node, but we don't have CPUs converted to qdev yet.
+     *
+     *     Once we have qdev CPUs, move HIOR to a qdev property and
+     *     remove this chunk.
+     */
+    /* XXX Set HIOR using new ioctl */
+
+    return;
+
+fail:
+    cpu_abort(env, "This KVM version does not support PAPR\n");
+}
+
 bool kvm_arch_stop_on_emulation_error(CPUState *env)
 {
     return true;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 76f98d9..c484e60 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -17,6 +17,7 @@ uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(CPUState *env, int irq, int level);
+void kvmppc_set_papr(CPUState *env);
 
 #else
 
@@ -40,6 +41,10 @@ static inline int kvmppc_set_interrupt(CPUState *env, int irq, int level)
     return -1;
 }
 
+static inline void kvmppc_set_papr(CPUState *env)
+{
+}
+
 #endif
 
 #ifndef CONFIG_KVM
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 35/58] PPC: SPAPR: Use KVM function for time info
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (33 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 34/58] PPC: Enable to use PAPR with PR style KVM Alexander Graf
@ 2011-09-14  8:42 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 36/58] pseries: Bugfixes for interrupt numbering in XICS code Alexander Graf
                   ` (22 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:42 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
One of the things we can't fake on PPC is the timer speed. So
we need to extract the frequency information from the host and
put it back into the guest device tree.
Luckily, we already have functions for that from the non-pseries
targets, so all we need to do is to connect the dots and the guest
suddenly gets to know its real timer speeds.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index c5c9a95..760e323 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -140,6 +140,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
         char *nodename;
         uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
                            0xffffffff, 0xffffffff};
+        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
 
         if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
             fprintf(stderr, "Allocation failure\n");
@@ -158,10 +160,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
                                 env->dcache_line_size)));
         _FDT((fdt_property_cell(fdt, "icache-block-size",
                                 env->icache_line_size)));
-        _FDT((fdt_property_cell(fdt, "timebase-frequency", TIMEBASE_FREQ)));
-        /* Hardcode CPU frequency for now.  It's kind of arbitrary on
-         * full emu, for kvm we should copy it from the host */
-        _FDT((fdt_property_cell(fdt, "clock-frequency", 1000000000)));
+        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
+        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
         _FDT((fdt_property(fdt, "ibm,pft-size",
                            pft_size_prop, sizeof(pft_size_prop))));
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 36/58] pseries: Bugfixes for interrupt numbering in XICS code
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (34 preceding siblings ...)
  2011-09-14  8:42 ` [Qemu-devel] [PATCH 35/58] PPC: SPAPR: Use KVM function for time info Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 37/58] pseries: Add a phandle to the xicp interrupt controller device tree node Alexander Graf
                   ` (21 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
The implementation of the XICS interrupt controller contains several
(difficult to trigger) bugs due to the fact that we were not 100%
consistent with which irq numbering we used.  In most places, global
numbers were used as handled by the presentation layer, however a few
functions took "local" numberings, that is the source number within
the interrupt source controller which is offset from the global
number.  In most cases the function and its caller agreed on this, but
in a few cases it didn't.
This patch cleans this up by always using global numbering.
Translation to the local number is now always and only done when we
look up the individual interrupt source state structure.  This should
remove the existing bugs and with luck reduce the chances of
re-introducing such bugs.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/xics.c |   17 ++++++++---------
 1 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/hw/xics.c b/hw/xics.c
index 9bf82aa..bd8d4cd 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -187,17 +187,17 @@ static int ics_valid_irq(struct ics_state *ics, uint32_t nr)
         && (nr < (ics->offset + ics->nr_irqs));
 }
 
-static void ics_set_irq_msi(void *opaque, int nr, int val)
+static void ics_set_irq_msi(void *opaque, int srcno, int val)
 {
     struct ics_state *ics = (struct ics_state *)opaque;
-    struct ics_irq_state *irq = ics->irqs + nr;
+    struct ics_irq_state *irq = ics->irqs + srcno;
 
     if (val) {
         if (irq->priority == 0xff) {
             irq->masked_pending = 1;
             /* masked pending */ ;
         } else  {
-            icp_irq(ics->icp, irq->server, nr + ics->offset, irq->priority);
+            icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
@@ -229,7 +229,7 @@ static void ics_resend_msi(struct ics_state *ics)
 static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
                                uint8_t priority)
 {
-    struct ics_irq_state *irq = ics->irqs + nr;
+    struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
 
     irq->server = server;
     irq->priority = priority;
@@ -239,7 +239,7 @@ static void ics_write_xive_msi(struct ics_state *ics, int nr, int server,
     }
 
     irq->masked_pending = 0;
-    icp_irq(ics->icp, server, nr + ics->offset, priority);
+    icp_irq(ics->icp, server, nr, priority);
 }
 
 static void ics_reject(struct ics_state *ics, int nr)
@@ -334,7 +334,7 @@ static void rtas_set_xive(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    ics_write_xive_msi(ics, nr - ics->offset, server, priority);
+    ics_write_xive_msi(ics, nr, server, priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
@@ -388,7 +388,7 @@ static void rtas_int_off(sPAPREnvironment *spapr, uint32_t token,
     struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
 
     irq->saved_priority = irq->priority;
-    ics_write_xive_msi(xics, nr - xics->offset, irq->server, 0xff);
+    ics_write_xive_msi(xics, nr, irq->server, 0xff);
 #endif
 
     rtas_st(rets, 0, 0); /* Success */
@@ -418,8 +418,7 @@ static void rtas_int_on(sPAPREnvironment *spapr, uint32_t token,
 #if 0
     struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
 
-    ics_write_xive_msi(xics, nr - xics->offset,
-                       irq->server, irq->saved_priority);
+    ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
 #endif
 
     rtas_st(rets, 0, 0); /* Success */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 37/58] pseries: Add a phandle to the xicp interrupt controller device tree node
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (35 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 36/58] pseries: Bugfixes for interrupt numbering in XICS code Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 38/58] pseries: interrupt controller should not have a 'reg' property Alexander Graf
                   ` (20 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
Future devices we will be adding to the pseries machine (e.g. PCI) will
need nodes in the device tree which explicitly reference the top-level
interrupt controller via interrupt-parent or interrupt-map properties.
In order to do this, the interrupt controller node needs an assigned
phandle.  This patch adds the appropriate property, in preparation.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 760e323..bb00ae6 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -57,6 +57,8 @@
 #define MAX_CPUS                256
 #define XICS_IRQS		1024
 
+#define PHANDLE_XICP            0x00001111
+
 sPAPREnvironment *spapr;
 
 static void *spapr_create_fdt_skel(const char *cpu_model,
@@ -202,6 +204,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                        interrupt_server_ranges_prop,
                        sizeof(interrupt_server_ranges_prop))));
+    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
+    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
+    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
 
     _FDT((fdt_end_node(fdt)));
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 38/58] pseries: interrupt controller should not have a 'reg' property
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (36 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 37/58] pseries: Add a phandle to the xicp interrupt controller device tree node Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 39/58] pseries: More complete WIMG validation in H_ENTER code Alexander Graf
                   ` (19 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
The interrupt controller presented in the device tree for the pseries
machine is manipulated by the guest only through hypervisor calls.  It
has no real or emulated registers for the guest to access.
However, it currently has a bogus 'reg' property advertising a register
window.  Moreover, this property has an invalid format, being a 32-bit
zero, when the #address-cells property on the root bus indicates that it
needs a 64-bit address.  Since the guest never attempts to manipulate
the node directly, it works, but it is ugly and can cause warnings when
manipulating the device tree in other tools (such as future firmware
versions).
This patch, therefore, corrects the problem by entirely removing the
interrupt-controller node's 'reg' property.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index bb00ae6..9eefef9 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -194,12 +194,11 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
     _FDT((fdt_end_node(fdt)));
 
     /* interrupt controller */
-    _FDT((fdt_begin_node(fdt, "interrupt-controller@0")));
+    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
 
     _FDT((fdt_property_string(fdt, "device_type",
                               "PowerPC-External-Interrupt-Presentation")));
     _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
-    _FDT((fdt_property_cell(fdt, "reg", 0)));
     _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
     _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
                        interrupt_server_ranges_prop,
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 39/58] pseries: More complete WIMG validation in H_ENTER code
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (37 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 38/58] pseries: interrupt controller should not have a 'reg' property Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 40/58] PPC: Fix sync instructions problem in SMP Alexander Graf
                   ` (18 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
Currently our implementation of the H_ENTER hypercall, which inserts a
mapping in the hash page table assumes that only ordinary memory is ever
mapped, and only permits mapping attribute bits accordingly (WIMG==0010).
However, we intend to start adding emulated IO to the pseries platform
(and real IO with PCI passthrough on kvm) which means this simple test
will no longer suffice.
This patch extends the h_enter validation code to check if the given
address is a RAM address.  If it is it enforces WIMG==0010, otherwise
it assumes that it is an IO mapping and instead enforces WIMG=010x.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c       |    3 ++-
 hw/spapr.h       |    1 +
 hw/spapr_hcall.c |   22 ++++++++++++++++++----
 3 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 9eefef9..00aed62 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -336,7 +336,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
     }
 
     /* allocate RAM */
-    ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size);
+    spapr->ram_limit = ram_size;
+    ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", spapr->ram_limit);
     cpu_register_physical_memory(0, ram_size, ram_offset);
 
     /* allocate hash page table.  For now we always make this 16mb,
diff --git a/hw/spapr.h b/hw/spapr.h
index 009c459..3d21b7a 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -10,6 +10,7 @@ typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     struct icp_state *icp;
 
+    target_phys_addr_t ram_limit;
     void *htab;
     long htab_size;
     target_phys_addr_t fdt_addr, rtas_addr;
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index f7ead04..70f853c 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -99,6 +99,8 @@ static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
     target_ulong pte_index = args[1];
     target_ulong pteh = args[2];
     target_ulong ptel = args[3];
+    target_ulong page_shift = 12;
+    target_ulong raddr;
     target_ulong i;
     uint8_t *hpte;
 
@@ -111,6 +113,7 @@ static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
 #endif
         if ((ptel & 0xff000) == 0) {
             /* 16M page */
+            page_shift = 24;
             /* lowest AVA bit must be 0 for 16M pages */
             if (pteh & 0x80) {
                 return H_PARAMETER;
@@ -120,12 +123,23 @@ static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
         }
     }
 
-    /* FIXME: bounds check the pa? */
+    raddr = (ptel & HPTE_R_RPN) & ~((1ULL << page_shift) - 1);
 
-    /* Check WIMG */
-    if ((ptel & HPTE_R_WIMG) != HPTE_R_M) {
-        return H_PARAMETER;
+    if (raddr < spapr->ram_limit) {
+        /* Regular RAM - should have WIMG=0010 */
+        if ((ptel & HPTE_R_WIMG) != HPTE_R_M) {
+            return H_PARAMETER;
+        }
+    } else {
+        /* Looks like an IO address */
+        /* FIXME: What WIMG combinations could be sensible for IO?
+         * For now we allow WIMG=010x, but are there others? */
+        /* FIXME: Should we check against registered IO addresses? */
+        if ((ptel & (HPTE_R_W | HPTE_R_I | HPTE_R_M)) != HPTE_R_I) {
+            return H_PARAMETER;
+        }
     }
+
     pteh &= ~0x60ULL;
 
     if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 40/58] PPC: Fix sync instructions problem in SMP
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (38 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 39/58] pseries: More complete WIMG validation in H_ENTER code Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 41/58] pseries: Add real mode debugging hcalls Alexander Graf
                   ` (17 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, Elie Richa, qemu-ppc, Aurelien Jarno
From: Elie Richa <richa@adacore.com>
In the current emulation of the load-and-reserve (lwarx) and
store-conditional (stwcx.) instructions, the internal reservation
mechanism is taken into account, however each CPU has its own
reservation information and this information is not synchronized between
CPUs to perform proper synchronization.
The following test case with 2 CPUs shows that the semantics of the
"lwarx" and "stwcx." instructions are not preserved by the emulation.
The test case does the following :
	- CPU0: reserve a memory location
	- CPU1: reserve the same memory location
	- CPU0: perform stwcx. on the location
The last store-conditional operation succeeds while it is supposed to
fail since the reservation was supposed to be lost at the second reserve
operation.
This (one line) patch fixes this problem in a very simple manner by
removing the reservation of a CPU every time it is scheduled (in
cpu_exec()). While this is a harsh workaround, it does not affect the
guest code much because reservations are usually held for a very short
time, that is an lwarx is almost always followed by an stwcx. a few
instructions below. Therefore, in most cases, the reservation will be
taken and consumed before a CPU switch occurs. However in the rare case
where a CPU switch does occur between the lwarx and its corresponding
stwcx.  this patch solves a potential erroneous behavior of the
synchronization instructions.
Signed-off-by: Elie Richa <richa@adacore.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 cpu-exec.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c
index aef66f2..a9fa608 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -217,6 +217,7 @@ int cpu_exec(CPUState *env)
 #elif defined(TARGET_ARM)
 #elif defined(TARGET_UNICORE32)
 #elif defined(TARGET_PPC)
+    env->reserve_addr = -1;
 #elif defined(TARGET_LM32)
 #elif defined(TARGET_MICROBLAZE)
 #elif defined(TARGET_MIPS)
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 41/58] pseries: Add real mode debugging hcalls
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (39 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 40/58] PPC: Fix sync instructions problem in SMP Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 42/58] pseries: use macro for firmware filename Alexander Graf
                   ` (16 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: David Gibson, Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
PAPR systems support several hypercalls intended for use in real mode
debugging tools.  These implement reads and writes to arbitrary guest
physical addresses.  This is useful for real mode software because it
allows access to IO addresses and memory outside the RMA without going
through the somewhat involved process of setting up the hash page table
and enabling translation.
We want these so that when we add real IO devices, the SLOF firmware can
boot from them without having to enter virtual mode.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr_hcall.c |   73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 73 insertions(+), 0 deletions(-)
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 70f853c..0c61c10 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -463,6 +463,67 @@ static target_ulong h_rtas(CPUState *env, sPAPREnvironment *spapr,
                            nret, rtas_r3 + 12 + 4*nargs);
 }
 
+static target_ulong h_logical_load(CPUState *env, sPAPREnvironment *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    target_ulong size = args[0];
+    target_ulong addr = args[1];
+
+    switch (size) {
+    case 1:
+        args[0] = ldub_phys(addr);
+        return H_SUCCESS;
+    case 2:
+        args[0] = lduw_phys(addr);
+        return H_SUCCESS;
+    case 4:
+        args[0] = ldl_phys(addr);
+        return H_SUCCESS;
+    case 8:
+        args[0] = ldq_phys(addr);
+        return H_SUCCESS;
+    }
+    return H_PARAMETER;
+}
+
+static target_ulong h_logical_store(CPUState *env, sPAPREnvironment *spapr,
+                                    target_ulong opcode, target_ulong *args)
+{
+    target_ulong size = args[0];
+    target_ulong addr = args[1];
+    target_ulong val  = args[2];
+
+    switch (size) {
+    case 1:
+        stb_phys(addr, val);
+        return H_SUCCESS;
+    case 2:
+        stw_phys(addr, val);
+        return H_SUCCESS;
+    case 4:
+        stl_phys(addr, val);
+        return H_SUCCESS;
+    case 8:
+        stq_phys(addr, val);
+        return H_SUCCESS;
+    }
+    return H_PARAMETER;
+}
+
+static target_ulong h_logical_icbi(CPUState *env, sPAPREnvironment *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    /* Nothing to do on emulation, KVM will trap this in the kernel */
+    return H_SUCCESS;
+}
+
+static target_ulong h_logical_dcbf(CPUState *env, sPAPREnvironment *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    /* Nothing to do on emulation, KVM will trap this in the kernel */
+    return H_SUCCESS;
+}
+
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
 
@@ -527,6 +588,18 @@ static void hypercall_init(void)
     spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
     spapr_register_hypercall(H_CEDE, h_cede);
 
+    /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
+     * here between the "CI" and the "CACHE" variants, they will use whatever
+     * mapping attributes qemu is using. When using KVM, the kernel will
+     * enforce the attributes more strongly
+     */
+    spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
+    spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
+    spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
+    spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
+    spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
+    spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+
     /* qemu/KVM-PPC specific hcalls */
     spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
 }
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 42/58] pseries: use macro for firmware filename
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (40 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 41/58] pseries: Add real mode debugging hcalls Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 43/58] KVM: Update kernel headers Alexander Graf
                   ` (15 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, Nishanth Aravamudan, qemu-ppc, Aurelien Jarno,
	David Gibson
From: Nishanth Aravamudan <nacc@us.ibm.com>
For some time we've had a nicely defined macro with the filename for our
firmware image.  However we didn't actually use it in the place we're
supposed to.  This patch fixes it.
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 00aed62..91953cf 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -442,7 +442,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
                     "%ldM guest RAM\n", MIN_RAM_SLOF);
             exit(1);
         }
-        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "slof.bin");
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
         fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
         if (fw_size < 0) {
             hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 43/58] KVM: Update kernel headers
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (41 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 42/58] pseries: use macro for firmware filename Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 44/58] kvm: ppc: booke206: use MMU API Alexander Graf
                   ` (14 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Another round of KVM features, another round of kernel header updates :)
Signed-off-by: Alexander Graf <agraf@suse.de>
---
  |   40 +++++++++++++++++++++++++++++++++++++++
        |   18 +++++++++++++++++
 2 files changed, 58 insertions(+), 0 deletions(-)
 --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 579e219..28eecf0 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -284,6 +284,11 @@ struct kvm_guest_debug_arch {
 #define KVM_INTERRUPT_UNSET	-2U
 #define KVM_INTERRUPT_SET_LEVEL	-3U
 
+#define KVM_CPU_440		1
+#define KVM_CPU_E500V2		2
+#define KVM_CPU_3S_32		3
+#define KVM_CPU_3S_64		4
+
 /* for KVM_CAP_SPAPR_TCE */
 struct kvm_create_spapr_tce {
 	__u64 liobn;
@@ -295,4 +300,39 @@ struct kvm_allocate_rma {
 	__u64 rma_size;
 };
 
+struct kvm_book3e_206_tlb_entry {
+	__u32 mas8;
+	__u32 mas1;
+	__u64 mas2;
+	__u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+	/*
+	 * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+	 *
+	 * - The number of ways of TLB0 must be a power of two between 2 and
+	 *   16.
+	 * - TLB1 must be fully associative.
+	 * - The size of TLB0 must be a multiple of the number of ways, and
+	 *   the number of sets must be a power of two.
+	 * - The size of TLB1 may not exceed 64 entries.
+	 * - TLB0 supports 4 KiB pages.
+	 * - The page sizes supported by TLB1 are as indicated by
+	 *   TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+	 *   as returned by KVM_GET_SREGS.
+	 * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+	 *   and tlb_ways[] must be zero.
+	 *
+	 * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+	 *
+	 * KVM will adjust TLBnCFG based on the sizes configured here,
+	 * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+	 * set to zero.
+	 */
+	__u32 tlb_sizes[4];
+	__u32 tlb_ways[4];
+	__u32 reserved[8];
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
 --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 2062375..8bb6cde 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -556,6 +556,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
 #define KVM_CAP_PPC_HIOR 67
 #define KVM_CAP_PPC_PAPR 68
+#define KVM_CAP_SW_TLB 69
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -635,6 +636,21 @@ struct kvm_clock_data {
 	__u32 pad[9];
 };
 
+#define KVM_MMU_FSL_BOOKE_NOHV		0
+#define KVM_MMU_FSL_BOOKE_HV		1
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -761,6 +777,8 @@ struct kvm_clock_data {
 #define KVM_CREATE_SPAPR_TCE	  _IOW(KVMIO,  0xa8, struct kvm_create_spapr_tce)
 /* Available with KVM_CAP_RMA */
 #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
+/* Available with KVM_CAP_SW_TLB */
+#define KVM_DIRTY_TLB		  _IOW(KVMIO,  0xaa, struct kvm_dirty_tlb)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 44/58] kvm: ppc: booke206: use MMU API
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (42 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 43/58] KVM: Update kernel headers Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 45/58] ppc: booke206: add "info tlb" support Alexander Graf
                   ` (13 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, Scott Wood, qemu-ppc, Aurelien Jarno
From: Scott Wood <scottwood@freescale.com>
Share the TLB array with KVM.  This allows us to set the initial TLB
both on initial boot and reset, is useful for debugging, and could
eventually be used to support migration.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |    2 +
 target-ppc/cpu.h       |    2 +
 target-ppc/kvm.c       |   85 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index b86a008..61151d8 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -189,6 +189,8 @@ static void mmubooke_create_initial_mapping(CPUState *env,
     tlb->mas2 = va & TARGET_PAGE_MASK;
     tlb->mas7_3 = pa & TARGET_PAGE_MASK;
     tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+
+    env->tlb_dirty = true;
 }
 
 static void mpc8544ds_cpu_reset_sec(void *opaque)
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index b8d42e0..3e7f797 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -934,6 +934,8 @@ struct CPUPPCState {
     ppc_tlb_t tlb;   /* TLB is optional. Allocate them only if needed        */
     /* 403 dedicated access protection registers */
     target_ulong pb[4];
+    bool tlb_dirty;   /* Set to non-zero when modifying TLB                  */
+    bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */
 #endif
 
     /* Other registers */
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index f65b6e1..35a6f10 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -112,6 +112,52 @@ static int kvm_arch_sync_sregs(CPUState *cenv)
     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 }
 
+/* Set up a shared TLB array with KVM */
+static int kvm_booke206_tlb_init(CPUState *env)
+{
+    struct kvm_book3e_206_tlb_params params = {};
+    struct kvm_config_tlb cfg = {};
+    struct kvm_enable_cap encap = {};
+    unsigned int entries = 0;
+    int ret, i;
+
+    if (!kvm_enabled() ||
+        !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
+        return 0;
+    }
+
+    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
+
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        params.tlb_sizes[i] = booke206_tlb_size(env, i);
+        params.tlb_ways[i] = booke206_tlb_ways(env, i);
+        entries += params.tlb_sizes[i];
+    }
+
+    assert(entries == env->nb_tlb);
+    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
+
+    env->tlb_dirty = true;
+
+    cfg.array = (uintptr_t)env->tlb.tlbm;
+    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
+    cfg.params = (uintptr_t)¶ms;
+    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
+
+    encap.cap = KVM_CAP_SW_TLB;
+    encap.args[0] = (uintptr_t)&cfg;
+
+    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
+    if (ret < 0) {
+        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
+                __func__, strerror(-ret));
+        return ret;
+    }
+
+    env->kvm_sw_tlb = true;
+    return 0;
+}
+
 int kvm_arch_init_vcpu(CPUState *cenv)
 {
     int ret;
@@ -123,6 +169,15 @@ int kvm_arch_init_vcpu(CPUState *cenv)
 
     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
 
+    /* Some targets support access to KVM's guest TLB. */
+    switch (cenv->mmu_model) {
+    case POWERPC_MMU_BOOKE206:
+        ret = kvm_booke206_tlb_init(cenv);
+        break;
+    default:
+        break;
+    }
+
     return ret;
 }
 
@@ -130,6 +185,31 @@ void kvm_arch_reset_vcpu(CPUState *env)
 {
 }
 
+static void kvm_sw_tlb_put(CPUState *env)
+{
+    struct kvm_dirty_tlb dirty_tlb;
+    unsigned char *bitmap;
+    int ret;
+
+    if (!env->kvm_sw_tlb) {
+        return;
+    }
+
+    bitmap = g_malloc((env->nb_tlb + 7) / 8);
+    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
+
+    dirty_tlb.bitmap = (uintptr_t)bitmap;
+    dirty_tlb.num_dirty = env->nb_tlb;
+
+    ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
+    if (ret) {
+        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
+                __func__, strerror(-ret));
+    }
+
+    g_free(bitmap);
+}
+
 int kvm_arch_put_registers(CPUState *env, int level)
 {
     struct kvm_regs regs;
@@ -167,6 +247,11 @@ int kvm_arch_put_registers(CPUState *env, int level)
     if (ret < 0)
         return ret;
 
+    if (env->tlb_dirty) {
+        kvm_sw_tlb_put(env);
+        env->tlb_dirty = false;
+    }
+
     return ret;
 }
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 45/58] ppc: booke206: add "info tlb" support
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (43 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 44/58] kvm: ppc: booke206: use MMU API Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 46/58] ppc: booke206: use MAV=2.0 TSIZE definition, fix 4G pages Alexander Graf
                   ` (12 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, Scott Wood, qemu-ppc, Aurelien Jarno
From: Scott Wood <scottwood@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hmp-commands.hx     |    2 +-
 monitor.c           |    5 ++-
 target-ppc/cpu.h    |    2 +
 target-ppc/helper.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 9e1cca8..506014c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1306,7 +1306,7 @@ show i8259 (PIC) state
 @item info pci
 show emulated PCI device info
 @item info tlb
-show virtual to physical memory mappings (i386, SH4 and SPARC only)
+show virtual to physical memory mappings (i386, SH4, SPARC, and PPC only)
 @item info mem
 show the active virtual memory mappings (i386 only)
 @item info jit
diff --git a/monitor.c b/monitor.c
index 03ae997..46bfeec 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2456,7 +2456,7 @@ static void tlb_info(Monitor *mon)
 
 #endif
 
-#if defined(TARGET_SPARC)
+#if defined(TARGET_SPARC) || defined(TARGET_PPC)
 static void tlb_info(Monitor *mon)
 {
     CPUState *env1 = mon_get_cpu();
@@ -2949,7 +2949,8 @@ static const mon_cmd_t info_cmds[] = {
         .user_print = do_pci_info_print,
         .mhandler.info_new = do_pci_info,
     },
-#if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC)
+#if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC) || \
+    defined(TARGET_PPC)
     {
         .name       = "tlb",
         .args_type  = "",
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 3e7f797..5200e6e 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2045,4 +2045,6 @@ static inline void cpu_pc_from_tb(CPUState *env, TranslationBlock *tb)
     env->nip = tb->pc;
 }
 
+void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUState *env);
+
 #endif /* !defined (__CPU_PPC_H__) */
diff --git a/target-ppc/helper.c b/target-ppc/helper.c
index 5ec83f2..d1bc574 100644
--- a/target-ppc/helper.c
+++ b/target-ppc/helper.c
@@ -1465,6 +1465,94 @@ found_tlb:
     return ret;
 }
 
+static const char *book3e_tsize_to_str[32] = {
+    "1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
+    "1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M",
+    "1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G",
+    "1T", "2T"
+};
+
+static void mmubooke206_dump_one_tlb(FILE *f, fprintf_function cpu_fprintf,
+                                     CPUState *env, int tlbn, int offset,
+                                     int tlbsize)
+{
+    ppcmas_tlb_t *entry;
+    int i;
+
+    cpu_fprintf(f, "\nTLB%d:\n", tlbn);
+    cpu_fprintf(f, "Effective          Physical           Size TID   TS SRWX URWX WIMGE U0123\n");
+
+    entry = &env->tlb.tlbm[offset];
+    for (i = 0; i < tlbsize; i++, entry++) {
+        target_phys_addr_t ea, pa, size;
+        int tsize;
+
+        if (!(entry->mas1 & MAS1_VALID)) {
+            continue;
+        }
+
+        tsize = (entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+        size = 1024ULL << tsize;
+        ea = entry->mas2 & ~(size - 1);
+        pa = entry->mas7_3 & ~(size - 1);
+
+        cpu_fprintf(f, "0x%016" PRIx64 " 0x%016" PRIx64 " %4s %-5u %1u  S%c%c%c U%c%c%c %c%c%c%c%c U%c%c%c%c\n",
+                    (uint64_t)ea, (uint64_t)pa,
+                    book3e_tsize_to_str[tsize],
+                    (entry->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT,
+                    (entry->mas1 & MAS1_TS) >> MAS1_TS_SHIFT,
+                    entry->mas7_3 & MAS3_SR ? 'R' : '-',
+                    entry->mas7_3 & MAS3_SW ? 'W' : '-',
+                    entry->mas7_3 & MAS3_SX ? 'X' : '-',
+                    entry->mas7_3 & MAS3_UR ? 'R' : '-',
+                    entry->mas7_3 & MAS3_UW ? 'W' : '-',
+                    entry->mas7_3 & MAS3_UX ? 'X' : '-',
+                    entry->mas2 & MAS2_W ? 'W' : '-',
+                    entry->mas2 & MAS2_I ? 'I' : '-',
+                    entry->mas2 & MAS2_M ? 'M' : '-',
+                    entry->mas2 & MAS2_G ? 'G' : '-',
+                    entry->mas2 & MAS2_E ? 'E' : '-',
+                    entry->mas7_3 & MAS3_U0 ? '0' : '-',
+                    entry->mas7_3 & MAS3_U1 ? '1' : '-',
+                    entry->mas7_3 & MAS3_U2 ? '2' : '-',
+                    entry->mas7_3 & MAS3_U3 ? '3' : '-');
+    }
+}
+
+static void mmubooke206_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
+                                 CPUState *env)
+{
+    int offset = 0;
+    int i;
+
+    if (kvm_enabled() && !env->kvm_sw_tlb) {
+        cpu_fprintf(f, "Cannot access KVM TLB\n");
+        return;
+    }
+
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        int size = booke206_tlb_size(env, i);
+
+        if (size == 0) {
+            continue;
+        }
+
+        mmubooke206_dump_one_tlb(f, cpu_fprintf, env, i, offset, size);
+        offset += size;
+    }
+}
+
+void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUState *env)
+{
+    switch (env->mmu_model) {
+    case POWERPC_MMU_BOOKE206:
+        mmubooke206_dump_mmu(f, cpu_fprintf, env);
+        break;
+    default:
+        cpu_fprintf(f, "%s: unimplemented\n", __func__);
+    }
+}
+
 static inline int check_physical(CPUState *env, mmu_ctx_t *ctx,
                                  target_ulong eaddr, int rw)
 {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 46/58] ppc: booke206: use MAV=2.0 TSIZE definition, fix 4G pages
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (44 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 45/58] ppc: booke206: add "info tlb" support Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG Alexander Graf
                   ` (11 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, Scott Wood, qemu-ppc, Aurelien Jarno
From: Scott Wood <scottwood@freescale.com>
This definition is backward compatible with MAV=1.0 as long as
the guest does not set reserved bits in MAS1/MAS4.
Also, fix the shift in booke206_tlb_to_page_size -- it's the base
that should be able to hold a 4G page size, not the shift count.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppce500_mpc8544ds.c |    2 +-
 target-ppc/cpu.h       |    4 ++--
 target-ppc/helper.c    |    5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 61151d8..8095516 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -174,7 +174,7 @@ out:
 /* Create -kernel TLB entries for BookE, linearly spanning 256MB.  */
 static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size)
 {
-    return (ffs(size >> 10) - 1) >> 1;
+    return ffs(size >> 10) - 1;
 }
 
 static void mmubooke_create_initial_mapping(CPUState *env,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 5200e6e..32706df 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -667,8 +667,8 @@ enum {
 #define MAS0_ATSEL_TLB     0
 #define MAS0_ATSEL_LRAT    MAS0_ATSEL
 
-#define MAS1_TSIZE_SHIFT   8
-#define MAS1_TSIZE_MASK    (0xf << MAS1_TSIZE_SHIFT)
+#define MAS1_TSIZE_SHIFT   7
+#define MAS1_TSIZE_MASK    (0x1f << MAS1_TSIZE_SHIFT)
 
 #define MAS1_TS_SHIFT      12
 #define MAS1_TS            (1 << MAS1_TS_SHIFT)
diff --git a/target-ppc/helper.c b/target-ppc/helper.c
index d1bc574..73796c8 100644
--- a/target-ppc/helper.c
+++ b/target-ppc/helper.c
@@ -1293,7 +1293,7 @@ target_phys_addr_t booke206_tlb_to_page_size(CPUState *env, ppcmas_tlb_t *tlb)
 {
     uint32_t tlbncfg;
     int tlbn = booke206_tlbm_to_tlbn(env, tlb);
-    target_phys_addr_t tlbm_size;
+    int tlbm_size;
 
     tlbncfg = env->spr[SPR_BOOKE_TLB0CFG + tlbn];
 
@@ -1301,9 +1301,10 @@ target_phys_addr_t booke206_tlb_to_page_size(CPUState *env, ppcmas_tlb_t *tlb)
         tlbm_size = (tlb->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
     } else {
         tlbm_size = (tlbncfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+        tlbm_size <<= 1;
     }
 
-    return (1 << (tlbm_size << 1)) << 10;
+    return 1024ULL << tlbm_size;
 }
 
 /* TLB check function for MAS based SoftTLBs */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (45 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 46/58] ppc: booke206: use MAV=2.0 TSIZE definition, fix 4G pages Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-17 17:08   ` Blue Swirl
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 48/58] pseries: Implement hcall-bulk hypervisor interface Alexander Graf
                   ` (10 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
This patch implements support for the CFAR SPR on POWER7 (Come From
Address Register), which snapshots the PC value at the time of a branch or
an rfid.  The latest powerpc-next kernel also catches it and can show it in
xmon or in the signal frames.
This works well enough to let recent kernels boot (which otherwise oops
on the CFAR access).  It hasn't been tested enough to be confident that the
CFAR values are actually accurate, but one thing at a time.
Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 target-ppc/cpu.h            |    8 ++++++++
 target-ppc/translate.c      |   28 ++++++++++++++++++++++++++++
 target-ppc/translate_init.c |   23 ++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 1 deletions(-)
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 32706df..3f4af22 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -555,6 +555,8 @@ enum {
     /* Decrementer clock: RTC clock (POWER, 601) or bus clock                */
     POWERPC_FLAG_RTC_CLK  = 0x00010000,
     POWERPC_FLAG_BUS_CLK  = 0x00020000,
+    /* Has CFAR                                                              */
+    POWERPC_FLAG_CFAR     = 0x00040000,
 };
 
 /*****************************************************************************/
@@ -872,6 +874,10 @@ struct CPUPPCState {
     target_ulong ctr;
     /* condition register */
     uint32_t crf[8];
+#if defined(TARGET_PPC64)
+    /* CFAR */
+    target_ulong cfar;
+#endif
     /* XER */
     target_ulong xer;
     /* Reservation address */
@@ -1204,6 +1210,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
 #define SPR_601_UDECR         (0x006)
 #define SPR_LR                (0x008)
 #define SPR_CTR               (0x009)
+#define SPR_DSCR              (0x011)
 #define SPR_DSISR             (0x012)
 #define SPR_DAR               (0x013) /* DAE for PowerPC 601 */
 #define SPR_601_RTCU          (0x014)
@@ -1212,6 +1219,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
 #define SPR_SDR1              (0x019)
 #define SPR_SRR0              (0x01A)
 #define SPR_SRR1              (0x01B)
+#define SPR_CFAR              (0x01C)
 #define SPR_AMR               (0x01D)
 #define SPR_BOOKE_PID         (0x030)
 #define SPR_BOOKE_DECAR       (0x036)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4277460..1e362fc 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -69,6 +69,9 @@ static TCGv cpu_nip;
 static TCGv cpu_msr;
 static TCGv cpu_ctr;
 static TCGv cpu_lr;
+#if defined(TARGET_PPC64)
+static TCGv cpu_cfar;
+#endif
 static TCGv cpu_xer;
 static TCGv cpu_reserve;
 static TCGv_i32 cpu_fpscr;
@@ -154,6 +157,11 @@ void ppc_translate_init(void)
     cpu_lr = tcg_global_mem_new(TCG_AREG0,
                                 offsetof(CPUState, lr), "lr");
 
+#if defined(TARGET_PPC64)
+    cpu_cfar = tcg_global_mem_new(TCG_AREG0,
+                                  offsetof(CPUState, cfar), "cfar");
+#endif
+
     cpu_xer = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUState, xer), "xer");
 
@@ -187,6 +195,7 @@ typedef struct DisasContext {
     int le_mode;
 #if defined(TARGET_PPC64)
     int sf_mode;
+    int has_cfar;
 #endif
     int fpu_enabled;
     int altivec_enabled;
@@ -3345,6 +3354,14 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
 /* stfiwx */
 GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX);
 
+static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
+{
+#if defined(TARGET_PPC64)
+    if (ctx->has_cfar)
+        tcg_gen_movi_tl(cpu_cfar, nip);
+#endif
+}
+
 /***                                Branch                                 ***/
 static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 {
@@ -3407,6 +3424,7 @@ static void gen_b(DisasContext *ctx)
         target = li;
     if (LK(ctx->opcode))
         gen_setlr(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip);
     gen_goto_tb(ctx, 0, target);
 }
 
@@ -3469,6 +3487,7 @@ static inline void gen_bcond(DisasContext *ctx, int type)
         }
         tcg_temp_free_i32(temp);
     }
+    gen_update_cfar(ctx, ctx->nip);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
@@ -3580,6 +3599,7 @@ static void gen_rfi(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
+    gen_update_cfar(ctx, ctx->nip);
     gen_helper_rfi();
     gen_sync_exception(ctx);
 #endif
@@ -3596,6 +3616,7 @@ static void gen_rfid(DisasContext *ctx)
         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
         return;
     }
+    gen_update_cfar(ctx, ctx->nip);
     gen_helper_rfid();
     gen_sync_exception(ctx);
 #endif
@@ -9263,6 +9284,12 @@ void cpu_dump_state (CPUState *env, FILE *f, fprintf_function cpu_fprintf,
          */
     }
 
+#if defined(TARGET_PPC64)
+    if (env->flags & POWERPC_FLAG_CFAR) {
+        cpu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
+    }
+#endif
+
     switch (env->mmu_model) {
     case POWERPC_MMU_32B:
     case POWERPC_MMU_601:
@@ -9371,6 +9398,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0;
 #if defined(TARGET_PPC64)
     ctx.sf_mode = msr_sf;
+    ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
 #endif
     ctx.fpu_enabled = msr_fp;
     if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 9ea193d..211f3bd 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -129,6 +129,19 @@ static void spr_write_lr (void *opaque, int sprn, int gprn)
     tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
 }
 
+/* CFAR */
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+static void spr_read_cfar (void *opaque, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
+}
+
+static void spr_write_cfar (void *opaque, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
+}
+#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
+
 /* CTR */
 static void spr_read_ctr (void *opaque, int gprn, int sprn)
 {
@@ -6489,7 +6502,7 @@ static void init_proc_970MP (CPUPPCState *env)
 #define POWERPC_BFDM_POWER7   (bfd_mach_ppc64)
 #define POWERPC_FLAG_POWER7   (POWERPC_FLAG_VRE | POWERPC_FLAG_SE |            \
                               POWERPC_FLAG_BE | POWERPC_FLAG_PMM |            \
-                              POWERPC_FLAG_BUS_CLK)
+                              POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR)
 #define check_pow_POWER7    check_pow_nocheck
 
 static void init_proc_POWER7 (CPUPPCState *env)
@@ -6508,6 +6521,14 @@ static void init_proc_POWER7 (CPUPPCState *env)
                  &spr_read_purr, SPR_NOACCESS,
                  &spr_read_purr, SPR_NOACCESS,
                  0x00000000);
+    spr_register(env, SPR_CFAR, "SPR_CFAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_cfar, &spr_write_cfar,
+                 0x00000000);
+    spr_register(env, SPR_DSCR, "SPR_DSCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
 #endif /* !CONFIG_USER_ONLY */
     /* Memory management */
     /* XXX : not implemented */
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG Alexander Graf
@ 2011-09-17 17:08   ` Blue Swirl
  2011-09-19  6:00     ` [Qemu-devel] [Qemu-ppc] " David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Blue Swirl @ 2011-09-17 17:08 UTC (permalink / raw)
  To: Alexander Graf
  Cc: qemu-ppc, qemu-devel Developers, Aurelien Jarno, David Gibson
On Wed, Sep 14, 2011 at 8:43 AM, Alexander Graf <agraf@suse.de> wrote:
> From: David Gibson <david@gibson.dropbear.id.au>
>
> This patch implements support for the CFAR SPR on POWER7 (Come From
> Address Register), which snapshots the PC value at the time of a branch or
> an rfid.  The latest powerpc-next kernel also catches it and can show it in
> xmon or in the signal frames.
>
> This works well enough to let recent kernels boot (which otherwise oops
> on the CFAR access).  It hasn't been tested enough to be confident that the
> CFAR values are actually accurate, but one thing at a time.
This looks accurate at least for the cases covered.
A higher performance implementation could be to only update the
register lazily when the SPR is read, in most other times CFAR would
be only stored to DisasContext.
> Signed-off-by: Ben Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  target-ppc/cpu.h            |    8 ++++++++
>  target-ppc/translate.c      |   28 ++++++++++++++++++++++++++++
>  target-ppc/translate_init.c |   23 ++++++++++++++++++++++-
>  3 files changed, 58 insertions(+), 1 deletions(-)
>
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 32706df..3f4af22 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -555,6 +555,8 @@ enum {
>     /* Decrementer clock: RTC clock (POWER, 601) or bus clock                */
>     POWERPC_FLAG_RTC_CLK  = 0x00010000,
>     POWERPC_FLAG_BUS_CLK  = 0x00020000,
> +    /* Has CFAR                                                              */
> +    POWERPC_FLAG_CFAR     = 0x00040000,
>  };
>
>  /*****************************************************************************/
> @@ -872,6 +874,10 @@ struct CPUPPCState {
>     target_ulong ctr;
>     /* condition register */
>     uint32_t crf[8];
> +#if defined(TARGET_PPC64)
> +    /* CFAR */
> +    target_ulong cfar;
> +#endif
>     /* XER */
>     target_ulong xer;
>     /* Reservation address */
> @@ -1204,6 +1210,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
>  #define SPR_601_UDECR         (0x006)
>  #define SPR_LR                (0x008)
>  #define SPR_CTR               (0x009)
> +#define SPR_DSCR              (0x011)
>  #define SPR_DSISR             (0x012)
>  #define SPR_DAR               (0x013) /* DAE for PowerPC 601 */
>  #define SPR_601_RTCU          (0x014)
> @@ -1212,6 +1219,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
>  #define SPR_SDR1              (0x019)
>  #define SPR_SRR0              (0x01A)
>  #define SPR_SRR1              (0x01B)
> +#define SPR_CFAR              (0x01C)
>  #define SPR_AMR               (0x01D)
>  #define SPR_BOOKE_PID         (0x030)
>  #define SPR_BOOKE_DECAR       (0x036)
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 4277460..1e362fc 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -69,6 +69,9 @@ static TCGv cpu_nip;
>  static TCGv cpu_msr;
>  static TCGv cpu_ctr;
>  static TCGv cpu_lr;
> +#if defined(TARGET_PPC64)
> +static TCGv cpu_cfar;
> +#endif
>  static TCGv cpu_xer;
>  static TCGv cpu_reserve;
>  static TCGv_i32 cpu_fpscr;
> @@ -154,6 +157,11 @@ void ppc_translate_init(void)
>     cpu_lr = tcg_global_mem_new(TCG_AREG0,
>                                 offsetof(CPUState, lr), "lr");
>
> +#if defined(TARGET_PPC64)
> +    cpu_cfar = tcg_global_mem_new(TCG_AREG0,
> +                                  offsetof(CPUState, cfar), "cfar");
> +#endif
> +
>     cpu_xer = tcg_global_mem_new(TCG_AREG0,
>                                  offsetof(CPUState, xer), "xer");
>
> @@ -187,6 +195,7 @@ typedef struct DisasContext {
>     int le_mode;
>  #if defined(TARGET_PPC64)
>     int sf_mode;
> +    int has_cfar;
>  #endif
>     int fpu_enabled;
>     int altivec_enabled;
> @@ -3345,6 +3354,14 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
>  /* stfiwx */
>  GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX);
>
> +static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
> +{
> +#if defined(TARGET_PPC64)
> +    if (ctx->has_cfar)
Braces missing, please use checkpatch.pl.
> +        tcg_gen_movi_tl(cpu_cfar, nip);
> +#endif
> +}
> +
>  /***                                Branch                                 ***/
>  static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
>  {
> @@ -3407,6 +3424,7 @@ static void gen_b(DisasContext *ctx)
>         target = li;
>     if (LK(ctx->opcode))
>         gen_setlr(ctx, ctx->nip);
> +    gen_update_cfar(ctx, ctx->nip);
>     gen_goto_tb(ctx, 0, target);
>  }
>
> @@ -3469,6 +3487,7 @@ static inline void gen_bcond(DisasContext *ctx, int type)
>         }
>         tcg_temp_free_i32(temp);
>     }
> +    gen_update_cfar(ctx, ctx->nip);
>     if (type == BCOND_IM) {
>         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
>         if (likely(AA(ctx->opcode) == 0)) {
> @@ -3580,6 +3599,7 @@ static void gen_rfi(DisasContext *ctx)
>         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>         return;
>     }
> +    gen_update_cfar(ctx, ctx->nip);
>     gen_helper_rfi();
>     gen_sync_exception(ctx);
>  #endif
> @@ -3596,6 +3616,7 @@ static void gen_rfid(DisasContext *ctx)
>         gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
>         return;
>     }
> +    gen_update_cfar(ctx, ctx->nip);
>     gen_helper_rfid();
>     gen_sync_exception(ctx);
>  #endif
> @@ -9263,6 +9284,12 @@ void cpu_dump_state (CPUState *env, FILE *f, fprintf_function cpu_fprintf,
>          */
>     }
>
> +#if defined(TARGET_PPC64)
> +    if (env->flags & POWERPC_FLAG_CFAR) {
> +        cpu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
> +    }
> +#endif
> +
>     switch (env->mmu_model) {
>     case POWERPC_MMU_32B:
>     case POWERPC_MMU_601:
> @@ -9371,6 +9398,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
>     ctx.le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0;
>  #if defined(TARGET_PPC64)
>     ctx.sf_mode = msr_sf;
> +    ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
>  #endif
>     ctx.fpu_enabled = msr_fp;
>     if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 9ea193d..211f3bd 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -129,6 +129,19 @@ static void spr_write_lr (void *opaque, int sprn, int gprn)
>     tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
>  }
>
> +/* CFAR */
> +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
> +static void spr_read_cfar (void *opaque, int gprn, int sprn)
> +{
> +    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
> +}
> +
> +static void spr_write_cfar (void *opaque, int sprn, int gprn)
> +{
> +    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
> +}
> +#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
> +
>  /* CTR */
>  static void spr_read_ctr (void *opaque, int gprn, int sprn)
>  {
> @@ -6489,7 +6502,7 @@ static void init_proc_970MP (CPUPPCState *env)
>  #define POWERPC_BFDM_POWER7   (bfd_mach_ppc64)
>  #define POWERPC_FLAG_POWER7   (POWERPC_FLAG_VRE | POWERPC_FLAG_SE |            \
>                               POWERPC_FLAG_BE | POWERPC_FLAG_PMM |            \
> -                              POWERPC_FLAG_BUS_CLK)
> +                              POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR)
>  #define check_pow_POWER7    check_pow_nocheck
>
>  static void init_proc_POWER7 (CPUPPCState *env)
> @@ -6508,6 +6521,14 @@ static void init_proc_POWER7 (CPUPPCState *env)
>                  &spr_read_purr, SPR_NOACCESS,
>                  &spr_read_purr, SPR_NOACCESS,
>                  0x00000000);
> +    spr_register(env, SPR_CFAR, "SPR_CFAR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_cfar, &spr_write_cfar,
> +                 0x00000000);
> +    spr_register(env, SPR_DSCR, "SPR_DSCR",
> +                 SPR_NOACCESS, SPR_NOACCESS,
> +                 &spr_read_generic, &spr_write_generic,
> +                 0x00000000);
>  #endif /* !CONFIG_USER_ONLY */
>     /* Memory management */
>     /* XXX : not implemented */
> --
> 1.6.0.2
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 47/58] Implement POWER7's CFAR in TCG
  2011-09-17 17:08   ` Blue Swirl
@ 2011-09-19  6:00     ` David Gibson
  2011-09-19  6:47       ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-19  6:00 UTC (permalink / raw)
  To: Blue Swirl; +Cc: qemu-ppc, Alexander Graf, qemu-devel Developers
On Sat, Sep 17, 2011 at 05:08:29PM +0000, Blue Swirl wrote:
> On Wed, Sep 14, 2011 at 8:43 AM, Alexander Graf <agraf@suse.de> wrote:
> > From: David Gibson <david@gibson.dropbear.id.au>
> >
> > This patch implements support for the CFAR SPR on POWER7 (Come From
> > Address Register), which snapshots the PC value at the time of a branch or
> > an rfid.  The latest powerpc-next kernel also catches it and can show it in
> > xmon or in the signal frames.
> >
> > This works well enough to let recent kernels boot (which otherwise oops
> > on the CFAR access).  It hasn't been tested enough to be confident that the
> > CFAR values are actually accurate, but one thing at a time.
> 
> This looks accurate at least for the cases covered.
> 
> A higher performance implementation could be to only update the
> register lazily when the SPR is read, in most other times CFAR would
> be only stored to DisasContext.
Uh, yeah.  I think I'll leave that to someone who actually understands
TCG, rather than just bluffing it.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 47/58] Implement POWER7's CFAR in TCG
  2011-09-19  6:00     ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2011-09-19  6:47       ` Alexander Graf
  0 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-19  6:47 UTC (permalink / raw)
  To: David Gibson; +Cc: Blue Swirl, qemu-ppc@nongnu.org, qemu-devel Developers
Am 19.09.2011 um 08:00 schrieb David Gibson <david@gibson.dropbear.id.au>:
> On Sat, Sep 17, 2011 at 05:08:29PM +0000, Blue Swirl wrote:
>> On Wed, Sep 14, 2011 at 8:43 AM, Alexander Graf <agraf@suse.de> wrote:
>>> From: David Gibson <david@gibson.dropbear.id.au>
>>> 
>>> This patch implements support for the CFAR SPR on POWER7 (Come From
>>> Address Register), which snapshots the PC value at the time of a branch or
>>> an rfid.  The latest powerpc-next kernel also catches it and can show it in
>>> xmon or in the signal frames.
>>> 
>>> This works well enough to let recent kernels boot (which otherwise oops
>>> on the CFAR access).  It hasn't been tested enough to be confident that the
>>> CFAR values are actually accurate, but one thing at a time.
>> 
>> This looks accurate at least for the cases covered.
>> 
>> A higher performance implementation could be to only update the
>> register lazily when the SPR is read, in most other times CFAR would
>> be only stored to DisasContext.
> 
> Uh, yeah.  I think I'll leave that to someone who actually understands
> TCG, rather than just bluffing it.
CFAR is only written on branches at which point we'd have to flush everything from disascontext to env anyways :). So putting it there doesn't buy us anything.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
- * [Qemu-devel] [PATCH 48/58] pseries: Implement hcall-bulk hypervisor interface
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (46 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 49/58] vscsi: send the CHECK_CONDITION status down together with autosense data Alexander Graf
                   ` (9 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
This patch adds support for the H_REMOVE_BULK hypercall on the pseries
machine.  Strictly speaking this isn't necessarym since the kernel will
only attempt to use this if hcall-bulk is advertised in the device tree,
which previously it was not.
Adding this support may give a marginal performance increase, but more
importantly it reduces the differences between the emulated machine and
an existing PowerVM or kvm system, both of which already implement
hcall-bulk.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.c       |    2 +-
 hw/spapr_hcall.c |  125 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 114 insertions(+), 13 deletions(-)
diff --git a/hw/spapr.c b/hw/spapr.c
index 91953cf..deb4ae5 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -75,7 +75,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
     uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
     char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-        "\0hcall-tce\0hcall-vio\0hcall-splpar";
+        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
     uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
     int i;
     char *modelname;
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 0c61c10..84281be 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -174,20 +174,26 @@ static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
-static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr,
-                             target_ulong opcode, target_ulong *args)
+enum {
+    REMOVE_SUCCESS = 0,
+    REMOVE_NOT_FOUND = 1,
+    REMOVE_PARM = 2,
+    REMOVE_HW = 3,
+};
+
+static target_ulong remove_hpte(CPUState *env, target_ulong ptex,
+                                target_ulong avpn,
+                                target_ulong flags,
+                                target_ulong *vp, target_ulong *rp)
 {
-    target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
-    target_ulong avpn = args[2];
     uint8_t *hpte;
     target_ulong v, r, rb;
 
-    if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
-        return H_PARAMETER;
+    if ((ptex * HASH_PTE_SIZE_64) & ~env->htab_mask) {
+        return REMOVE_PARM;
     }
 
-    hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+    hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64);
     while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
         /* We have no real concurrency in qemu soft-emulation, so we
          * will never actually have a contested lock */
@@ -202,14 +208,106 @@ static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr,
         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
         stq_p(hpte, v & ~HPTE_V_HVLOCK);
         assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
-        return H_NOT_FOUND;
+        return REMOVE_NOT_FOUND;
     }
-    args[0] = v & ~HPTE_V_HVLOCK;
-    args[1] = r;
+    *vp = v & ~HPTE_V_HVLOCK;
+    *rp = r;
     stq_p(hpte, 0);
-    rb = compute_tlbie_rb(v, r, pte_index);
+    rb = compute_tlbie_rb(v, r, ptex);
     ppc_tlb_invalidate_one(env, rb);
     assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
+    return REMOVE_SUCCESS;
+}
+
+static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong pte_index = args[1];
+    target_ulong avpn = args[2];
+    int ret;
+
+    ret = remove_hpte(env, pte_index, avpn, flags,
+                      &args[0], &args[1]);
+
+    switch (ret) {
+    case REMOVE_SUCCESS:
+        return H_SUCCESS;
+
+    case REMOVE_NOT_FOUND:
+        return H_NOT_FOUND;
+
+    case REMOVE_PARM:
+        return H_PARAMETER;
+
+    case REMOVE_HW:
+        return H_HARDWARE;
+    }
+
+    assert(0);
+}
+
+#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
+#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
+#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
+#define   H_BULK_REMOVE_END            0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
+#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
+#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
+#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
+#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
+#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
+#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
+#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
+#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
+
+#define H_BULK_REMOVE_MAX_BATCH        4
+
+static target_ulong h_bulk_remove(CPUState *env, sPAPREnvironment *spapr,
+                                  target_ulong opcode, target_ulong *args)
+{
+    int i;
+
+    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+        target_ulong *tsh = &args[i*2];
+        target_ulong tsl = args[i*2 + 1];
+        target_ulong v, r, ret;
+
+        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+            break;
+        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
+            return H_PARAMETER;
+        }
+
+        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+        *tsh |= H_BULK_REMOVE_RESPONSE;
+
+        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
+            *tsh |= H_BULK_REMOVE_PARM;
+            return H_PARAMETER;
+        }
+
+        ret = remove_hpte(env, *tsh & H_BULK_REMOVE_PTEX, tsl,
+                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
+                          &v, &r);
+
+        *tsh |= ret << 60;
+
+        switch (ret) {
+        case REMOVE_SUCCESS:
+            *tsh |= (r & (HPTE_R_C | HPTE_R_R)) << 43;
+            break;
+
+        case REMOVE_PARM:
+            return H_PARAMETER;
+
+        case REMOVE_HW:
+            return H_HARDWARE;
+        }
+    }
+
     return H_SUCCESS;
 }
 
@@ -581,6 +679,9 @@ static void hypercall_init(void)
     spapr_register_hypercall(H_REMOVE, h_remove);
     spapr_register_hypercall(H_PROTECT, h_protect);
 
+    /* hcall-bulk */
+    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+
     /* hcall-dabr */
     spapr_register_hypercall(H_SET_DABR, h_set_dabr);
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 49/58] vscsi: send the CHECK_CONDITION status down together with autosense data
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (47 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 48/58] pseries: Implement hcall-bulk hypervisor interface Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image Alexander Graf
                   ` (8 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, Paolo Bonzini, qemu-ppc, Aurelien Jarno
From: Paolo Bonzini <pbonzini@redhat.com>
I introduced this bug in commit 05751d3 (vscsi: always use get_sense,
2011-08-03) because at the time there was no way to expose a sense
condition to SLOF and Linux manages to work around the bug.  However,
the bug becomes evident now that SCSI devices also report unit
attention on reset.
SLOF also has problems dealing with unit attention conditions, so
it still will not boot even with this fix (just like OpenBIOS).
IBM folks are aware of their part of the bug. :-)
Reported-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr_vscsi.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 6fc82f6..e8426d7 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -483,7 +483,6 @@ static void vscsi_command_complete(SCSIRequest *sreq, uint32_t status)
     if (status == CHECK_CONDITION) {
         req->senselen = scsi_req_get_sense(req->sreq, req->sense,
                                            sizeof(req->sense));
-        status = 0;
         dprintf("VSCSI: Sense data, %d bytes:\n", len);
         dprintf("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
                 req->sense[0], req->sense[1], req->sense[2], req->sense[3],
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (48 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 49/58] vscsi: send the CHECK_CONDITION status down together with autosense data Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14 11:01   ` Peter Maydell
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 51/58] Gdbstub: handle read of fpscr Alexander Graf
                   ` (7 subsequent siblings)
  57 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno, David Gibson
From: David Gibson <david@gibson.dropbear.id.au>
The current SLOF firmware for the pseries machine has a bug in SCSI
condition handling that was exposed by recent updates to qemu's SCSI
emulation.  This patch updates the SLOF image to one with the bug fixed.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/README   |    2 +-
 pc-bios/slof.bin |  Bin 579072 -> 578888 bytes
 2 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/pc-bios/README b/pc-bios/README
index f74b246..8912211 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/dgibson/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20110323.
+  built from git tag qemu-slof-20110830.
 
 - The PXE roms come from the iPXE project. Built with BANNER_TIME 0.
   Sources available at http://ipxe.org.  Vendor:Device ID -> ROM mapping:
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 22c4c7f5c448e3002aefecf3438f5d080586d666..66163031c6eb5539b54b73214bf18b8cb6aa8743 100644
GIT binary patch
delta 2674
zcma)-e{2)i9l+l^+vI#pAfbVz1)3MqK~gx|NkeJ^ZtP|W2}zTNkV3ag&E@WF-#FhL
zcjv}l6WP#ojQ&#+OS}A$K{{>iHcf~vXkI6+cq@ZwKT6q#)c!NIV^hiKCZ<+ZO0;?J
z9Q%>+&vw%D_w&B*`@X;4yL0-74U0c$IMYVm=$ZDnI^A$n)wEXa{KrcNPFfeP)0;CX
zS8)1Id!Rov9sYQR3R9uw>7m+8x<oS$qAJmhlc-8G<FZxQ#q7*FLS=+#LT*9~p)U|B
zCsaXbJ)sSRDhX8)+DPaDLe+#e5!y_shEOe`2MIkyXbYjOgtifSm{1)dM5vz7c0vt=
z9wGEaLOTfUB-BW#iO{2jb`jc5h$ZA9<h5x5pHbyZumPX=D5&TG;Bzb)EdPn+FqTVL
z_G9@WmV;Qnk7YNO*YS8Wb2OOPIV@@jHKP_^b2Dn*)85?N-r7>Zpzb5+8BsSRRY3tH
zj7Ksn!L6?E`bT@qGh0LT)Oci}Gn9sK@51FLFRg=1nW^3*75#nryYGYIllh?cCj5Tp
z^1+?(MCRjzyWq$|ZQmsblbQGWFTudV>{GWPJf4{vJP7}txi*;JNP(?aK;^{MmTLbz
z81kn<I`hHPUxierZzzj@9fux!xM=eA?bZ8p0E}J%AaSMaihw2N^upmoTQ<V}1x?s_
z&aD;Fa0q^D-T?>UPpvx+_&9tn+vtS!{?R2WJ&Pj%+zl`~mCx^=2XL~yGl8eT9vIMM
zBRKCp4Zu@p0ovODpg#vdvI(Hn=|&Jb3+Rd8_MG?M1k_{{oc9B;KJgJi6K==r6HEIN
z*)u_Sp`yQIUK&fi{YhdO0+auFY&Q-$lzRDQ!k??VlX(qjefdv+bejX|TU7djt+#D$
z8-K2>ux{^r@kM~J-D&I00DN`ZgSWn&&K1oxe{RE_zWgVD?aO~w<*x@hD>w{&R?m#H
z;#7PP;z8lz$i@eqZ)|cO=<IAqjRyusvLmANO-OzIsZ|+qdaItp<pSW~RwV#c*3(#B
z>K~^~57k)bBF;wls=-T__gX_?=R?-tBF=44v*@UE+#N0WfIh$Vji~bg^q+b+>dd|y
zb^U~0j{`QLR!^&Ek8P8k`=;v{goPMetcF?F0}Q*ox|zjaDjX^7EVuYs*H<WZIh&hx
z{hhMJX=hEUBb^2-ujOy8{33scpQFLbhxz4|`uyT@eLi&i&*|FKzg80II#4}|ot4H?
z2dF(!2T%g+_{(H3RXr&I&+SJ6J%SBV=^zN6g_X(Cnw$G_0EAA0qv=<GGd1;IF!U|3
z==}}=6KBgV;)4$Qr}EYL<S{2oOyOIR{BpH_3cytFAFLyDt{tv=U}jEOC+A#VcL1P3
zQyNSMte?%fI$+?`-8mN?>+mSc-kqaa%8k>rpIvWfedp+ln;z*M7(N^t=<Gtzg!)Ii
z5FgidQ88IbV0@^wmi_D^z1wX)`xaeSea`@yYSG+!<t_SEk0J7=s-tEf5_QfH8C~QA
zmgh8%m&_zmR1@`fW2ZjUb+q>|V@a3jXx5RVkGrz3|B5~it+8LyHI2v@u+2Q%(U&CS
zSYem#Ct1H8@jCOEpsN~lxTk9%yZI{pW4OVGBxHyt%L#%m8pa#97HO)>juF91MHNJ(
zYNE1A9YS1NbzjH;*>0Fvk~Ks*jGoXLdl{Eh)L0+=nr`mrWz`T1+Zp|dHLJ`%<^_W{
zBvw)+99<DugXa{c(As?QDNKNKz9^s1_WXu^nX+K{x-C}YJ9MRW^>=je*%+5#$7Gd1
z#wzicogP}98PeB8dpdZi*pF^bl10JJ0oR5EyE~=kt~RGT)IZ#{+T22i9)E}KfZ4<~
zdVr!F-PYoDIu!J(n(5_r)iAsfo@WcM1ujpP#(1x$j*Gg+x3#jeq{I{6F-f^!$6D^+
z)1Fkxgy~e3uoQ{wIInTZjJ6|hRE>#VQ&p1QvA84)oGg0{owtK|`I0eWS(DRo6@mLg
z;ssIR#$>U$ZUpdttkiTdEF}UZ=c%>c3&nU6{<i1aTK{)V_aexg5{YvW(FpKZX=l$W
z;y7+Zai+#iHi~Z^{tGIt8%e_yW2~H##^TDjqzHHcpF<&GMYhdKGc#&m+O--Z%Gq7I
zXM!We;zC&FwEtEb#(j#28e6LoM4pjioXm!0l{2mEAL(tWLiRB&tdkb4?U3DnoI1PO
zBOI+@9*JrHpLMCGF;z7UQ|IveSEvrf@arYFBSUIeBC^QfBStS2|F8@93V-W6JDU)L
yKfp*93SWNoB)(auBpXer#na;RG+U=`(2cIPR!!rr@7<u;jR35DeHyN*t^War#<sNp
delta 2908
zcmb`Ie{3699l+nSeaYD@YZsg!YnSz9q4_~_{*fldP2x3fm$pgTq-oQwLQM1d?tH%X
zVw<~j>d;j7N=k=>v?Z2yVT9tMX@db(r&L%Vx?~~>)!0}-8|nm1={nR*-K0nZ6fGe0
z&gZvu9shu^_3rz5-|zQ*-+SNpeRtPhs!RX0?wOsIKY~eLo6TBRacnZ~yvN)3_(Hz;
z%Y2<0n0&V#c4aOu+?othQ<gB?GdXmBCS8OnD^V0-%0?7Ln4+)(5C85|34u}qGyyvS
z2Z1jTxQ9R)fqMyTBCwgjeFVx0e35{YKm~zH0#yX63EWR$3xTZ!9w1Od;6Vc02p|Hr
z1Rf$#N8n2YwiBo)&_JM(Kofy46KE#TLVzLQBH%XRR_5};_5V{lJJns+PWghM>;M4&
z!qS1|-?2Q3<wsZ^#PY9L4r2KiEcapgHf~ol$GejU53ver-GN#?UN7>tceJ;6wE4;$
zXy0S#Q9)6qSQPmYFG?Xb7VU0~E7B;d2`H41WWK4)0RR|}SY9{)s2}@+z0OQ^pti*H
zo$U;yA@t7vYR_B=ypb92eT*9J&wuhxX1Vt!d^xkw*8snr`LM4UK0aIBKL_EHnd=AV
zV9)IIq1zCSWhMsu;Bw~8!F)La4=jSs<G*Qj`m$iim!?2w`S1ydGW|n2{Inj~@nF&A
zTMs$A<^c#U0+3uRUF5M0E>d82_{i3BI6NEY8-8HV;uQm6<s*953J2kezG{VEf$3bM
z4bojfUmAc9Ea_>SBoLg)=ex21Vg#i5tN{Avad-ewD%}VI7XUqe4O~hF0rK4hmgFsf
z#y{vxo{4}<J^=0=w_9HzV-1;mrW;-^JB(KecW0&08Uz}5b)+S@m|so(`r{;?ekgMh
z#8c;PCj0Y$``K+4q*pBI%SNBree;8>hL17!^l5;w-#Ps|1MumZij^Ox=ZkJ}-~6W4
z{`|)``t$!N^VNcRz56Ki=&!3b`}xEm#La@6HJ2E)y;!jeugr#<6F2Mrfny*`mru-{
zSbToddfB0uD&Lu}dGG!D{d@OzpvFgpki<q&o2$*$&Uih!?SkzW(DF&Oz8to>?@QwV
zI0S1-0BUq3+T0}PY<6;}N{@-QM!V_o&4pchZP>O&|D|ZFfm!{!Xgg((8zBJnbnC|>
zw*9dCd^TdsWh2zjnaaB+Z!(vW<DaEYLRh0O>6A-<c$%W2YjUVs-!@H&78l4pHBEhF
z*;SjrwjRhYjgM_jjz3qI#M#@t&gQT04yM8SllkTKi}@AqG!52o<k!|;&9AQ2<^#9i
zOIN4<sbBgYRn_?fKuJ*l{u#T)c}4=RV?h8?>BFG=0(6cAt8RAA0}wb1zLtIp*isWq
z-GT3e*WJ$paQjDqes+ecaGd$7(>DQNs_D1-wi&9Px(KuktM|=NZhIF%{-!jT?9$K9
zP`gXJ09Xm759sOVD0F^hhQh4`x6<6o3|(rmR~y*@p!wb0+6DSbMfszFgU5OhmrxWT
zsxcDp@Z^@R&@J{#59%E_Ivg0-+k;FWja5WJfAtDo=_sJa$cd=Few9Y$#lFNNErv`#
z_bc?%hd-m=n8S1_j5h`l0}kg<1eRwwHqLUAHin`x4fXCr9{le)-aCx+4_~1p-v2sk
z?&ut?Q2)cYg#}uHxzR=XWr(CZmx_1lxwDU#{^HJSv_&6(jrMpt9iu9zN{kehG=`1x
zjLNZ5{Hh~gVDkuU_zi0OY*cRTHTs-Izw`#(p`Ut#_U88dj(!+IkW0Nu4_Yj?5`F3S
zbU+{a1MNyNvJ_1u88Mo0?2Sdkl9*5g9-Wf3NC$Gc+^WL4Iig3*C}J4H#>7vGqvqS#
z-*DG8XdphJ1P&(q!;&m?xZUoAs<_n%s|apB#<@j~WALp_xsUV&9y!$G+LI7fJWz<T
zAz5H#HYz4qQBeI{3)kG-5{@aF!bV$Sp%a|(V6YN<#}y$gCH+R|LN6vN><FVK!iLre
zzY(!8Gr!RvNywVSDk(0qp~I;-DJV?gTY^g$8wz_>$5c%iVZI?tA*{krAt`XYq!i|L
zn_9y$mcRuc<>Uk}@Qf&Envm2sf@=|@7FB;drb<Z?T1+LAWDN6Z$YKyT-lW^y1fJwm
zt2tpYQBnDhtZ{OTV`agQ4|8K)?5=7&hv(r1d?FhLBbQ7{<Tf;_fUk}amyK2Pl3F+k
zH5S&wa*VxmC&tCqjGdeklsIQrP)o75v(4<|5QXG%@r0ij!Yoerj#i?SV^NXeV~LQQ
z5*Tl*Z(|8uMBq+JQPHmmvcReWwiJ%zvy&O+iY<|ts{KcUd80pXats#^0gc3XLGD2K
zD7H&U2QEcaEN1f!{<+VJe)TQ7Y74?uh|FSdLanY=kIT!LwW0s{ExJ+P{5D<Rj1Us!
a@Xoe)oO77GqxA_jp1A9}PW|a8xqkylTQ0T$
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image Alexander Graf
@ 2011-09-14 11:01   ` Peter Maydell
  2011-09-14 12:24     ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: Peter Maydell @ 2011-09-14 11:01 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Blue Swirl, qemu-ppc, qemu-devel Developers, Aurelien Jarno,
	David Gibson
On 14 September 2011 09:43, Alexander Graf <agraf@suse.de> wrote:
> From: David Gibson <david@gibson.dropbear.id.au>
>
> The current SLOF firmware for the pseries machine has a bug in SCSI
> condition handling that was exposed by recent updates to qemu's SCSI
> emulation.  This patch updates the SLOF image to one with the bug fixed.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  pc-bios/README   |    2 +-
>  pc-bios/slof.bin |  Bin 579072 -> 578888 bytes
>  2 files changed, 1 insertions(+), 1 deletions(-)
I confess to not really understanding how we keep the git
submodules and the binary blobs in sync, but shouldn't there
be a reference in the commit message to the git commit hash
for the slof sources corresponding to this blob, and maybe
also an update to roms/SLOF here? (cf commit d67c3f2c for
example) ?
-- PMM
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 11:01   ` Peter Maydell
@ 2011-09-14 12:24     ` Alexander Graf
  2011-09-14 12:28       ` Peter Maydell
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-14 12:24 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Blue Swirl, qemu-ppc@nongnu.org, qemu-devel Developers,
	Aurelien Jarno, David Gibson
Am 14.09.2011 um 13:01 schrieb Peter Maydell <peter.maydell@linaro.org>:
> On 14 September 2011 09:43, Alexander Graf <agraf@suse.de> wrote:
>> From: David Gibson <david@gibson.dropbear.id.au>
>> 
>> The current SLOF firmware for the pseries machine has a bug in SCSI
>> condition handling that was exposed by recent updates to qemu's SCSI
>> emulation.  This patch updates the SLOF image to one with the bug fixed.
>> 
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> ---
>>  pc-bios/README   |    2 +-
>>  pc-bios/slof.bin |  Bin 579072 -> 578888 bytes
>>  2 files changed, 1 insertions(+), 1 deletions(-)
> 
> I confess to not really understanding how we keep the git
> submodules and the binary blobs in sync, but shouldn't there
> be a reference in the commit message to the git commit hash
> for the slof sources corresponding to this blob, and maybe
> also an update to roms/SLOF here? (cf commit d67c3f2c for
> example) ?
Oh? Since I have absolutely no idea on git submodules, it might be helpful to add some description on how to do a blob update into README?
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 12:24     ` Alexander Graf
@ 2011-09-14 12:28       ` Peter Maydell
  2011-09-14 12:59         ` Anthony Liguori
  0 siblings, 1 reply; 128+ messages in thread
From: Peter Maydell @ 2011-09-14 12:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Anthony Liguori, qemu-devel Developers, Blue Swirl,
	qemu-ppc@nongnu.org, Aurelien Jarno, David Gibson
On 14 September 2011 13:24, Alexander Graf <agraf@suse.de> wrote:
> Am 14.09.2011 um 13:01 schrieb Peter Maydell <peter.maydell@linaro.org>:
>> I confess to not really understanding how we keep the git
>> submodules and the binary blobs in sync, but shouldn't there
>> be a reference in the commit message to the git commit hash
>> for the slof sources corresponding to this blob, and maybe
>> also an update to roms/SLOF here? (cf commit d67c3f2c for
>> example) ?
>
> Oh? Since I have absolutely no idea on git submodules, it might
> be helpful to add some description on how to do a blob update
> into README?
Sounds like a good idea -- I think Anthony is the expert here.
-- PMM
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 12:28       ` Peter Maydell
@ 2011-09-14 12:59         ` Anthony Liguori
  2011-09-14 20:17           ` Blue Swirl
                             ` (2 more replies)
  0 siblings, 3 replies; 128+ messages in thread
From: Anthony Liguori @ 2011-09-14 12:59 UTC (permalink / raw)
  To: Peter Maydell
  Cc: Alexander Graf, qemu-devel Developers, Blue Swirl,
	qemu-ppc@nongnu.org, Aurelien Jarno, David Gibson
On 09/14/2011 07:28 AM, Peter Maydell wrote:
> On 14 September 2011 13:24, Alexander Graf<agraf@suse.de>  wrote:
>> Am 14.09.2011 um 13:01 schrieb Peter Maydell<peter.maydell@linaro.org>:
>>> I confess to not really understanding how we keep the git
>>> submodules and the binary blobs in sync, but shouldn't there
>>> be a reference in the commit message to the git commit hash
>>> for the slof sources corresponding to this blob, and maybe
>>> also an update to roms/SLOF here? (cf commit d67c3f2c for
>>> example) ?
>>
>> Oh? Since I have absolutely no idea on git submodules, it might
>> be helpful to add some description on how to do a blob update
>> into README?
>
> Sounds like a good idea -- I think Anthony is the expert here.
You should be able to just checkout the desired version of the submodule 
(you may need to refetch from git.qemu.org), then build the binary and 
copy the results to pc-bios/.  Then in the top level, do a single commit 
that includes the submodule commit change and the new binary blob.
For seabios, it would look something like:
$ cd roms/seabios
$ git checkout v0.7.0
$ make
$ cp out/bios.bin ../../pc-bios/bios.bin
$ cd ../..
$ git commit -a -m 'Update seabios to newest version'
I use the following script to generate the seabios commit message which 
gives a short summary of the changes:
#!/bin/sh
git --git-dir=roms/seabios/.git log --pretty=format:' - %h %s' $(git 
diff roms/seabios | grep '^-Subproject commit' | cut -f3 -d' ')..$(git 
diff roms/seabios | grep '^+Subproject commit' | cut -f3 -d' ')
Regards,
Anthony Liguori
>
> -- PMM
>
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 12:59         ` Anthony Liguori
@ 2011-09-14 20:17           ` Blue Swirl
  2011-09-19  8:32           ` Alexander Graf
  2011-09-20  3:40           ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2 siblings, 0 replies; 128+ messages in thread
From: Blue Swirl @ 2011-09-14 20:17 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Peter Maydell, Alexander Graf, qemu-devel Developers,
	qemu-ppc@nongnu.org, Aurelien Jarno, David Gibson
On Wed, Sep 14, 2011 at 12:59 PM, Anthony Liguori <aliguori@us.ibm.com> wrote:
> On 09/14/2011 07:28 AM, Peter Maydell wrote:
>>
>> On 14 September 2011 13:24, Alexander Graf<agraf@suse.de>  wrote:
>>>
>>> Am 14.09.2011 um 13:01 schrieb Peter Maydell<peter.maydell@linaro.org>:
>>>>
>>>> I confess to not really understanding how we keep the git
>>>> submodules and the binary blobs in sync, but shouldn't there
>>>> be a reference in the commit message to the git commit hash
>>>> for the slof sources corresponding to this blob, and maybe
>>>> also an update to roms/SLOF here? (cf commit d67c3f2c for
>>>> example) ?
>>>
>>> Oh? Since I have absolutely no idea on git submodules, it might
>>> be helpful to add some description on how to do a blob update
>>> into README?
>>
>> Sounds like a good idea -- I think Anthony is the expert here.
>
> You should be able to just checkout the desired version of the submodule
> (you may need to refetch from git.qemu.org), then build the binary and copy
> the results to pc-bios/.  Then in the top level, do a single commit that
> includes the submodule commit change and the new binary blob.
>
> For seabios, it would look something like:
>
> $ cd roms/seabios
> $ git checkout v0.7.0
> $ make
> $ cp out/bios.bin ../../pc-bios/bios.bin
> $ cd ../..
> $ git commit -a -m 'Update seabios to newest version'
Oh, a full integration of build systems is not needed (QEMU make would
run OpenBIOS make)? That makes things much easier.
> I use the following script to generate the seabios commit message which
> gives a short summary of the changes:
>
> #!/bin/sh
>
> git --git-dir=roms/seabios/.git log --pretty=format:' - %h %s' $(git diff
> roms/seabios | grep '^-Subproject commit' | cut -f3 -d' ')..$(git diff
> roms/seabios | grep '^+Subproject commit' | cut -f3 -d' ')
>
>
> Regards,
>
> Anthony Liguori
>
>>
>> -- PMM
>>
>
>
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 12:59         ` Anthony Liguori
  2011-09-14 20:17           ` Blue Swirl
@ 2011-09-19  8:32           ` Alexander Graf
  2011-09-20  3:40           ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-19  8:32 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Peter Maydell, qemu-devel Developers, Blue Swirl,
	qemu-ppc@nongnu.org, Aurelien Jarno, David Gibson
On 14.09.2011, at 14:59, Anthony Liguori wrote:
> On 09/14/2011 07:28 AM, Peter Maydell wrote:
>> On 14 September 2011 13:24, Alexander Graf<agraf@suse.de>  wrote:
>>> Am 14.09.2011 um 13:01 schrieb Peter Maydell<peter.maydell@linaro.org>:
>>>> I confess to not really understanding how we keep the git
>>>> submodules and the binary blobs in sync, but shouldn't there
>>>> be a reference in the commit message to the git commit hash
>>>> for the slof sources corresponding to this blob, and maybe
>>>> also an update to roms/SLOF here? (cf commit d67c3f2c for
>>>> example) ?
>>> 
>>> Oh? Since I have absolutely no idea on git submodules, it might
>>> be helpful to add some description on how to do a blob update
>>> into README?
>> 
>> Sounds like a good idea -- I think Anthony is the expert here.
> 
> You should be able to just checkout the desired version of the submodule (you may need to refetch from git.qemu.org), then build the binary and copy the results to pc-bios/.  Then in the top level, do a single commit that includes the submodule commit change and the new binary blob.
> 
> For seabios, it would look something like:
> 
agraf@lychee:/home/agraf/release/qemu> git submodule init
Submodule 'roms/SLOF' (git://git.qemu.org/SLOF.git) registered for path 'roms/SLOF'
Submodule 'roms/ipxe' (git://git.qemu.org/ipxe.git) registered for path 'roms/ipxe'
Submodule 'roms/seabios' (git://git.qemu.org/seabios.git/) registered for path 'roms/seabios'
Submodule 'roms/vgabios' (git://git.qemu.org/vgabios.git/) registered for path 'roms/vgabios'
agraf@lychee:/home/agraf/release/qemu> cd roms/SLOF/
agraf@lychee:/home/agraf/release/qemu/roms/SLOF> l
total 8
drwxr-xr-x 2 agraf suse 4096 2011-04-01 18:34 ./
drwxr-xr-x 7 agraf suse 4096 2011-05-01 00:42 ../
agraf@lychee:/home/agraf/release/qemu/roms/SLOF> git checkout master
Switched to branch "master"
... which switches to "master" on my qemu git tree.
Alex
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-14 12:59         ` Anthony Liguori
  2011-09-14 20:17           ` Blue Swirl
  2011-09-19  8:32           ` Alexander Graf
@ 2011-09-20  3:40           ` David Gibson
  2011-09-24 12:45             ` Paolo Bonzini
  2 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-20  3:40 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: Peter Maydell, qemu-ppc@nongnu.org, qemu-devel Developers
On Wed, Sep 14, 2011 at 07:59:28AM -0500, Anthony Liguori wrote:
> On 09/14/2011 07:28 AM, Peter Maydell wrote:
> >On 14 September 2011 13:24, Alexander Graf<agraf@suse.de>  wrote:
> >>Am 14.09.2011 um 13:01 schrieb Peter Maydell<peter.maydell@linaro.org>:
> >>>I confess to not really understanding how we keep the git
> >>>submodules and the binary blobs in sync, but shouldn't there
> >>>be a reference in the commit message to the git commit hash
> >>>for the slof sources corresponding to this blob, and maybe
> >>>also an update to roms/SLOF here? (cf commit d67c3f2c for
> >>>example) ?
> >>
> >>Oh? Since I have absolutely no idea on git submodules, it might
> >>be helpful to add some description on how to do a blob update
> >>into README?
> >
> >Sounds like a good idea -- I think Anthony is the expert here.
> 
> You should be able to just checkout the desired version of the
> submodule (you may need to refetch from git.qemu.org), then build
Ah, yes, here's the point.  Updating git.qemu.org is not within my
power, so I can't myself construct a commit doing a submodule + binary
update in this fashion.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-20  3:40           ` [Qemu-devel] [Qemu-ppc] " David Gibson
@ 2011-09-24 12:45             ` Paolo Bonzini
  2011-09-27  1:01               ` David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Paolo Bonzini @ 2011-09-24 12:45 UTC (permalink / raw)
  To: Anthony Liguori, Peter Maydell, qemu-devel Developers,
	qemu-ppc@nongnu.org
On 09/20/2011 05:40 AM, David Gibson wrote:
> Ah, yes, here's the point.  Updating git.qemu.org is not within my
> power, so I can't myself construct a commit doing a submodule + binary
> update in this fashion.
Sure you can.  A submodule checkout is just like any other git commit. 
A commit doing a submodule update is just saying "from now on, submodule 
roms/SLOF is attached to commit 12ab34cd".
First of all, you should configure your qemu tree so that it uses your 
official upstream repository instead of git.qemu.org's SLOF mirror:
     git config submodule.roms/SLOF.url git://github.com/dgibson/SLOF.git
     rm -rf roms/SLOF
     git submodule update roms/SLOF
Producing a valid patch is as simple as this:
     cd roms/SLOF
     git fetch origin            # if needed
     git checkout origin/master
     cd ..
     cp /path/to/SLOF.bin pc-bios/
     git commit pc-bios roms -m'pseries: Update SLOF firmware image'
Then, whoever updates git.qemu.org indeed has to pull manually from 
https://github.com/dgibson/SLOF to ensure that git.qemu.org's SLOF 
mirror does include the new commit.  But that doesn't prevent you from 
*submitting* the patch.
BTW, the above configuration steps should probably be done by everybody 
who's working with the pseries machine, so that they will have easy 
access to upstream SLOF bugfixes.
Paolo
^ permalink raw reply	[flat|nested] 128+ messages in thread
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-24 12:45             ` Paolo Bonzini
@ 2011-09-27  1:01               ` David Gibson
  2011-09-27  6:39                 ` Alexander Graf
  0 siblings, 1 reply; 128+ messages in thread
From: David Gibson @ 2011-09-27  1:01 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Maydell, Anthony Liguori, qemu-ppc@nongnu.org, agraf,
	qemu-devel Developers
On Sat, Sep 24, 2011 at 02:45:20PM +0200, Paolo Bonzini wrote:
> On 09/20/2011 05:40 AM, David Gibson wrote:
> >Ah, yes, here's the point.  Updating git.qemu.org is not within my
> >power, so I can't myself construct a commit doing a submodule + binary
> >update in this fashion.
> 
> Sure you can.  A submodule checkout is just like any other git
> commit. A commit doing a submodule update is just saying "from now
> on, submodule roms/SLOF is attached to commit 12ab34cd".
> 
> First of all, you should configure your qemu tree so that it uses
> your official upstream repository instead of git.qemu.org's SLOF
> mirror:
> 
>     git config submodule.roms/SLOF.url git://github.com/dgibson/SLOF.git
>     rm -rf roms/SLOF
>     git submodule update roms/SLOF
> 
> Producing a valid patch is as simple as this:
> 
>     cd roms/SLOF
>     git fetch origin            # if needed
>     git checkout origin/master
>     cd ..
>     cp /path/to/SLOF.bin pc-bios/
>     git commit pc-bios roms -m'pseries: Update SLOF firmware image'
> 
> Then, whoever updates git.qemu.org indeed has to pull manually from
> https://github.com/dgibson/SLOF to ensure that git.qemu.org's SLOF
> mirror does include the new commit.  But that doesn't prevent you
> from *submitting* the patch.
> 
> BTW, the above configuration steps should probably be done by
> everybody who's working with the pseries machine, so that they will
> have easy access to upstream SLOF bugfixes.
Thanks for the recipe.
Alex, do you want me to submit a replacement SLOF update patch which
does this, or will you fix up the existing one in your ppc queue?
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-27  1:01               ` David Gibson
@ 2011-09-27  6:39                 ` Alexander Graf
  2011-09-29  4:21                   ` David Gibson
  0 siblings, 1 reply; 128+ messages in thread
From: Alexander Graf @ 2011-09-27  6:39 UTC (permalink / raw)
  To: David Gibson
  Cc: Paolo Bonzini, Anthony Liguori, qemu-ppc@nongnu.org,
	qemu-devel Developers, Peter Maydell
Am 27.09.2011 um 03:01 schrieb David Gibson <dwg@au1.ibm.com>:
> On Sat, Sep 24, 2011 at 02:45:20PM +0200, Paolo Bonzini wrote:
>> On 09/20/2011 05:40 AM, David Gibson wrote:
>>> Ah, yes, here's the point.  Updating git.qemu.org is not within my
>>> power, so I can't myself construct a commit doing a submodule + binary
>>> update in this fashion.
>> 
>> Sure you can.  A submodule checkout is just like any other git
>> commit. A commit doing a submodule update is just saying "from now
>> on, submodule roms/SLOF is attached to commit 12ab34cd".
>> 
>> First of all, you should configure your qemu tree so that it uses
>> your official upstream repository instead of git.qemu.org's SLOF
>> mirror:
>> 
>>    git config submodule.roms/SLOF.url git://github.com/dgibson/SLOF.git
>>    rm -rf roms/SLOF
>>    git submodule update roms/SLOF
>> 
>> Producing a valid patch is as simple as this:
>> 
>>    cd roms/SLOF
>>    git fetch origin            # if needed
>>    git checkout origin/master
>>    cd ..
>>    cp /path/to/SLOF.bin pc-bios/
>>    git commit pc-bios roms -m'pseries: Update SLOF firmware image'
>> 
>> Then, whoever updates git.qemu.org indeed has to pull manually from
>> https://github.com/dgibson/SLOF to ensure that git.qemu.org's SLOF
>> mirror does include the new commit.  But that doesn't prevent you
>> from *submitting* the patch.
>> 
>> BTW, the above configuration steps should probably be done by
>> everybody who's working with the pseries machine, so that they will
>> have easy access to upstream SLOF bugfixes.
> 
> Thanks for the recipe.
> 
> Alex, do you want me to submit a replacement SLOF update patch which
> does this, or will you fix up the existing one in your ppc queue?
I would very much appreciate if you did it :)
Alex
> 
^ permalink raw reply	[flat|nested] 128+ messages in thread 
- * Re: [Qemu-devel] [Qemu-ppc] [PATCH 50/58] pseries: Update SLOF firmware image
  2011-09-27  6:39                 ` Alexander Graf
@ 2011-09-29  4:21                   ` David Gibson
  0 siblings, 0 replies; 128+ messages in thread
From: David Gibson @ 2011-09-29  4:21 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Paolo Bonzini, Anthony Liguori, qemu-ppc@nongnu.org,
	qemu-devel Developers, Peter Maydell
On Tue, Sep 27, 2011 at 08:39:51AM +0200, Alexander Graf wrote:
> 
> Am 27.09.2011 um 03:01 schrieb David Gibson <dwg@au1.ibm.com>:
> 
> > On Sat, Sep 24, 2011 at 02:45:20PM +0200, Paolo Bonzini wrote:
> >> On 09/20/2011 05:40 AM, David Gibson wrote:
> >>> Ah, yes, here's the point.  Updating git.qemu.org is not within my
> >>> power, so I can't myself construct a commit doing a submodule + binary
> >>> update in this fashion.
> >> 
> >> Sure you can.  A submodule checkout is just like any other git
> >> commit. A commit doing a submodule update is just saying "from now
> >> on, submodule roms/SLOF is attached to commit 12ab34cd".
> >> 
> >> First of all, you should configure your qemu tree so that it uses
> >> your official upstream repository instead of git.qemu.org's SLOF
> >> mirror:
> >> 
> >>    git config submodule.roms/SLOF.url git://github.com/dgibson/SLOF.git
> >>    rm -rf roms/SLOF
> >>    git submodule update roms/SLOF
> >> 
> >> Producing a valid patch is as simple as this:
> >> 
> >>    cd roms/SLOF
> >>    git fetch origin            # if needed
> >>    git checkout origin/master
> >>    cd ..
> >>    cp /path/to/SLOF.bin pc-bios/
> >>    git commit pc-bios roms -m'pseries: Update SLOF firmware image'
> >> 
> >> Then, whoever updates git.qemu.org indeed has to pull manually from
> >> https://github.com/dgibson/SLOF to ensure that git.qemu.org's SLOF
> >> mirror does include the new commit.  But that doesn't prevent you
> >> from *submitting* the patch.
> >> 
> >> BTW, the above configuration steps should probably be done by
> >> everybody who's working with the pseries machine, so that they will
> >> have easy access to upstream SLOF bugfixes.
> > 
> > Thanks for the recipe.
> > 
> > Alex, do you want me to submit a replacement SLOF update patch which
> > does this, or will you fix up the existing one in your ppc queue?
> 
> I would very much appreciate if you did it :)
Done.
-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
^ permalink raw reply	[flat|nested] 128+ messages in thread 
 
 
 
 
 
 
 
 
 
- * [Qemu-devel] [PATCH 51/58] Gdbstub: handle read of fpscr
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (49 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 50/58] pseries: Update SLOF firmware image Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 52/58] ppc405: use RAM_ADDR_FMT instead of %08lx Alexander Graf
                   ` (6 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, qemu-ppc, Fabien Chouteau, Aurelien Jarno
From: Fabien Chouteau <chouteau@adacore.com>
Signed-off-by: Fabien Chouteau <chouteau@adacore.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 gdbstub.c                   |    2 +-
 target-ppc/translate_init.c |    3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/gdbstub.c b/gdbstub.c
index 90683a4..efe7b5f 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -733,7 +733,7 @@ static int cpu_gdb_read_register(CPUState *env, uint8_t *mem_buf, int n)
             {
                 if (gdb_has_xml)
                     return 0;
-                GET_REG32(0); /* fpscr */
+                GET_REG32(env->fpscr);
             }
         }
     }
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 211f3bd..d09c7ca 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9700,8 +9700,7 @@ static int gdb_get_float_reg(CPUState *env, uint8_t *mem_buf, int n)
         return 8;
     }
     if (n == 32) {
-        /* FPSCR not implemented  */
-        memset(mem_buf, 0, 4);
+        stl_p(mem_buf, env->fpscr);
         return 4;
     }
     return 0;
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 52/58] ppc405: use RAM_ADDR_FMT instead of %08lx
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (50 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 51/58] Gdbstub: handle read of fpscr Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 53/58] openpic: Unfold read_IRQreg Alexander Graf
                   ` (5 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, qemu-ppc, Stefan Hajnoczi, Aurelien Jarno
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
The RAM_ADDR_FMT macro hides the type of ram_addr_t so that format
strings can be safely used.  Make sure to use RAM_ADDR_FMT so that the
build works on 32-bit hosts with Xen enabled.  Whether Xen should affect
ppc TCG targets is questionable but a separate issue.
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/ppc405_boards.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/hw/ppc405_boards.c b/hw/ppc405_boards.c
index e6c8ac6..712a6be 100644
--- a/hw/ppc405_boards.c
+++ b/hw/ppc405_boards.c
@@ -213,7 +213,8 @@ static void ref405ep_init (ram_addr_t ram_size,
     sram_size = 512 * 1024;
     sram_offset = qemu_ram_alloc(NULL, "ef405ep.sram", sram_size);
 #ifdef DEBUG_BOARD_INIT
-    printf("%s: register SRAM at offset %08lx\n", __func__, sram_offset);
+    printf("%s: register SRAM at offset " RAM_ADDR_FMT "\n",
+           __func__, sram_offset);
 #endif
     cpu_register_physical_memory(0xFFF00000, sram_size,
                                  sram_offset | IO_MEM_RAM);
@@ -357,7 +358,7 @@ static void ref405ep_init (ram_addr_t ram_size,
 #ifdef DEBUG_BOARD_INIT
     printf("%s: Done\n", __func__);
 #endif
-    printf("bdloc %016lx\n", (unsigned long)bdloc);
+    printf("bdloc " RAM_ADDR_FMT "\n", bdloc);
 }
 
 static QEMUMachine ref405ep_machine = {
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 53/58] openpic: Unfold read_IRQreg
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (51 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 52/58] ppc405: use RAM_ADDR_FMT instead of %08lx Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 54/58] openpic: Unfold write_IRQreg Alexander Graf
                   ` (4 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The helper function read_IRQreg was always called with a specific argument on
the type of register to access. Inside the function we were simply doing a
switch on that constant argument again. It's a lot easier to just unfold this
into two separate functions and call each individually.
Reported-by: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |   56 +++++++++++++++++++++++++-------------------------------
 1 files changed, 25 insertions(+), 31 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index 03e442b..fbd8837 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -472,20 +472,14 @@ static void openpic_reset (void *opaque)
     opp->glbc = 0x00000000;
 }
 
-static inline uint32_t read_IRQreg (openpic_t *opp, int n_IRQ, uint32_t reg)
+static inline uint32_t read_IRQreg_ide(openpic_t *opp, int n_IRQ)
 {
-    uint32_t retval;
-
-    switch (reg) {
-    case IRQ_IPVP:
-        retval = opp->src[n_IRQ].ipvp;
-        break;
-    case IRQ_IDE:
-        retval = opp->src[n_IRQ].ide;
-        break;
-    }
+    return opp->src[n_IRQ].ide;
+}
 
-    return retval;
+static inline uint32_t read_IRQreg_ipvp(openpic_t *opp, int n_IRQ)
+{
+    return opp->src[n_IRQ].ipvp;
 }
 
 static inline void write_IRQreg (openpic_t *opp, int n_IRQ,
@@ -523,10 +517,10 @@ static uint32_t read_doorbell_register (openpic_t *opp,
 
     switch (offset) {
     case DBL_IPVP_OFFSET:
-        retval = read_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IPVP);
+        retval = read_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl);
         break;
     case DBL_IDE_OFFSET:
-        retval = read_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IDE);
+        retval = read_IRQreg_ide(opp, IRQ_DBL0 + n_dbl);
         break;
     case DBL_DMR_OFFSET:
         retval = opp->doorbells[n_dbl].dmr;
@@ -564,10 +558,10 @@ static uint32_t read_mailbox_register (openpic_t *opp,
         retval = opp->mailboxes[n_mbx].mbr;
         break;
     case MBX_IVPR_OFFSET:
-        retval = read_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IPVP);
+        retval = read_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx);
         break;
     case MBX_DMR_OFFSET:
-        retval = read_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IDE);
+        retval = read_IRQreg_ide(opp, IRQ_MBX0 + n_mbx);
         break;
     }
 
@@ -695,7 +689,7 @@ static uint32_t openpic_gbl_read (void *opaque, target_phys_addr_t addr)
         {
             int idx;
             idx = (addr - 0x10A0) >> 4;
-            retval = read_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IPVP);
+            retval = read_IRQreg_ipvp(opp, opp->irq_ipi0 + idx);
         }
         break;
     case 0x10E0: /* SPVE */
@@ -765,10 +759,10 @@ static uint32_t openpic_timer_read (void *opaque, uint32_t addr)
         retval = opp->timers[idx].tibc;
         break;
     case 0x20: /* TIPV */
-        retval = read_IRQreg(opp, opp->irq_tim0 + idx, IRQ_IPVP);
+        retval = read_IRQreg_ipvp(opp, opp->irq_tim0 + idx);
         break;
     case 0x30: /* TIDE */
-        retval = read_IRQreg(opp, opp->irq_tim0 + idx, IRQ_IDE);
+        retval = read_IRQreg_ide(opp, opp->irq_tim0 + idx);
         break;
     }
     DPRINTF("%s: => %08x\n", __func__, retval);
@@ -809,10 +803,10 @@ static uint32_t openpic_src_read (void *opaque, uint32_t addr)
     idx = addr >> 5;
     if (addr & 0x10) {
         /* EXDE / IFEDE / IEEDE */
-        retval = read_IRQreg(opp, idx, IRQ_IDE);
+        retval = read_IRQreg_ide(opp, idx);
     } else {
         /* EXVP / IFEVP / IEEVP */
-        retval = read_IRQreg(opp, idx, IRQ_IPVP);
+        retval = read_IRQreg_ipvp(opp, idx);
     }
     DPRINTF("%s: => %08x\n", __func__, retval);
 
@@ -1368,13 +1362,13 @@ static uint32_t mpic_timer_read (void *opaque, target_phys_addr_t addr)
         retval = mpp->timers[idx].tibc;
         break;
     case 0x20: /* TIPV */
-        retval = read_IRQreg(mpp, MPIC_TMR_IRQ + idx, IRQ_IPVP);
+        retval = read_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx);
         break;
     case 0x30: /* TIDR */
         if ((addr &0xF0) == 0XF0)
             retval = mpp->dst[cpu].tfrr;
         else
-            retval = read_IRQreg(mpp, MPIC_TMR_IRQ + idx, IRQ_IDE);
+            retval = read_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx);
         break;
     }
     DPRINTF("%s: => %08x\n", __func__, retval);
@@ -1421,10 +1415,10 @@ static uint32_t mpic_src_ext_read (void *opaque, target_phys_addr_t addr)
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg(mpp, idx, IRQ_IDE);
+            retval = read_IRQreg_ide(mpp, idx);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg(mpp, idx, IRQ_IPVP);
+            retval = read_IRQreg_ipvp(mpp, idx);
         }
         DPRINTF("%s: => %08x\n", __func__, retval);
     }
@@ -1471,10 +1465,10 @@ static uint32_t mpic_src_int_read (void *opaque, target_phys_addr_t addr)
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg(mpp, idx, IRQ_IDE);
+            retval = read_IRQreg_ide(mpp, idx);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg(mpp, idx, IRQ_IPVP);
+            retval = read_IRQreg_ipvp(mpp, idx);
         }
         DPRINTF("%s: => %08x\n", __func__, retval);
     }
@@ -1521,10 +1515,10 @@ static uint32_t mpic_src_msg_read (void *opaque, target_phys_addr_t addr)
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg(mpp, idx, IRQ_IDE);
+            retval = read_IRQreg_ide(mpp, idx);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg(mpp, idx, IRQ_IPVP);
+            retval = read_IRQreg_ipvp(mpp, idx);
         }
         DPRINTF("%s: => %08x\n", __func__, retval);
     }
@@ -1570,10 +1564,10 @@ static uint32_t mpic_src_msi_read (void *opaque, target_phys_addr_t addr)
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg(mpp, idx, IRQ_IDE);
+            retval = read_IRQreg_ide(mpp, idx);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg(mpp, idx, IRQ_IPVP);
+            retval = read_IRQreg_ipvp(mpp, idx);
         }
         DPRINTF("%s: => %08x\n", __func__, retval);
     }
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 54/58] openpic: Unfold write_IRQreg
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (52 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 53/58] openpic: Unfold read_IRQreg Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 55/58] ppc: move ADB stuff from ppc_mac.h to adb.h Alexander Graf
                   ` (3 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
The helper function write_IRQreg was always called with a specific argument on
the type of register to access. Inside the function we were simply doing a
switch on that constant argument again. It's a lot easier to just unfold this
into two separate functions and call each individually.
Reported-by: Blue Swirl <blauwirbel@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/openpic.c |   79 +++++++++++++++++++++++++++------------------------------
 1 files changed, 37 insertions(+), 42 deletions(-)
diff --git a/hw/openpic.c b/hw/openpic.c
index fbd8837..43b8f27 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -482,30 +482,25 @@ static inline uint32_t read_IRQreg_ipvp(openpic_t *opp, int n_IRQ)
     return opp->src[n_IRQ].ipvp;
 }
 
-static inline void write_IRQreg (openpic_t *opp, int n_IRQ,
-                                 uint32_t reg, uint32_t val)
+static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val)
 {
     uint32_t tmp;
 
-    switch (reg) {
-    case IRQ_IPVP:
-        /* NOTE: not fully accurate for special IRQs, but simple and
-           sufficient */
-        /* ACTIVITY bit is read-only */
-        opp->src[n_IRQ].ipvp =
-            (opp->src[n_IRQ].ipvp & 0x40000000) |
-            (val & 0x800F00FF);
-        openpic_update_irq(opp, n_IRQ);
-        DPRINTF("Set IPVP %d to 0x%08x -> 0x%08x\n",
-                n_IRQ, val, opp->src[n_IRQ].ipvp);
-        break;
-    case IRQ_IDE:
-        tmp = val & 0xC0000000;
-        tmp |= val & ((1ULL << MAX_CPU) - 1);
-        opp->src[n_IRQ].ide = tmp;
-        DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
-        break;
-    }
+    tmp = val & 0xC0000000;
+    tmp |= val & ((1ULL << MAX_CPU) - 1);
+    opp->src[n_IRQ].ide = tmp;
+    DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
+}
+
+static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
+{
+    /* NOTE: not fully accurate for special IRQs, but simple and sufficient */
+    /* ACTIVITY bit is read-only */
+    opp->src[n_IRQ].ipvp = (opp->src[n_IRQ].ipvp & 0x40000000)
+                         | (val & 0x800F00FF);
+    openpic_update_irq(opp, n_IRQ);
+    DPRINTF("Set IPVP %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
+            opp->src[n_IRQ].ipvp);
 }
 
 #if 0 // Code provision for Intel model
@@ -535,10 +530,10 @@ static void write_doorbell_register (penpic_t *opp, int n_dbl,
 {
     switch (offset) {
     case DBL_IVPR_OFFSET:
-        write_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IPVP, value);
+        write_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl, value);
         break;
     case DBL_IDE_OFFSET:
-        write_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IDE, value);
+        write_IRQreg_ide(opp, IRQ_DBL0 + n_dbl, value);
         break;
     case DBL_DMR_OFFSET:
         opp->doorbells[n_dbl].dmr = value;
@@ -576,10 +571,10 @@ static void write_mailbox_register (openpic_t *opp, int n_mbx,
         opp->mailboxes[n_mbx].mbr = value;
         break;
     case MBX_IVPR_OFFSET:
-        write_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IPVP, value);
+        write_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx, value);
         break;
     case MBX_DMR_OFFSET:
-        write_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IDE, value);
+        write_IRQreg_ide(opp, IRQ_MBX0 + n_mbx, value);
         break;
     }
 }
@@ -636,7 +631,7 @@ static void openpic_gbl_write (void *opaque, target_phys_addr_t addr, uint32_t v
         {
             int idx;
             idx = (addr - 0x10A0) >> 4;
-            write_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IPVP, val);
+            write_IRQreg_ipvp(opp, opp->irq_ipi0 + idx, val);
         }
         break;
     case 0x10E0: /* SPVE */
@@ -729,10 +724,10 @@ static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val)
         opp->timers[idx].tibc = val;
         break;
     case 0x20: /* TIVP */
-        write_IRQreg(opp, opp->irq_tim0 + idx, IRQ_IPVP, val);
+        write_IRQreg_ipvp(opp, opp->irq_tim0 + idx, val);
         break;
     case 0x30: /* TIDE */
-        write_IRQreg(opp, opp->irq_tim0 + idx, IRQ_IDE, val);
+        write_IRQreg_ide(opp, opp->irq_tim0 + idx, val);
         break;
     }
 }
@@ -782,10 +777,10 @@ static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
     idx = addr >> 5;
     if (addr & 0x10) {
         /* EXDE / IFEDE / IEEDE */
-        write_IRQreg(opp, idx, IRQ_IDE, val);
+        write_IRQreg_ide(opp, idx, val);
     } else {
         /* EXVP / IFEVP / IEEVP */
-        write_IRQreg(opp, idx, IRQ_IPVP, val);
+        write_IRQreg_ipvp(opp, idx, val);
     }
 }
 
@@ -835,8 +830,8 @@ static void openpic_cpu_write_internal(void *opaque, target_phys_addr_t addr,
     case 0x70:
         idx = (addr - 0x40) >> 4;
         /* we use IDE as mask which CPUs to deliver the IPI to still. */
-        write_IRQreg(opp, opp->irq_ipi0 + idx, IRQ_IDE,
-                     opp->src[opp->irq_ipi0 + idx].ide | val);
+        write_IRQreg_ide(opp, opp->irq_ipi0 + idx,
+                         opp->src[opp->irq_ipi0 + idx].ide | val);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
         break;
@@ -1330,13 +1325,13 @@ static void mpic_timer_write (void *opaque, target_phys_addr_t addr, uint32_t va
         mpp->timers[idx].tibc = val;
         break;
     case 0x20: /* GTIVPR */
-        write_IRQreg(mpp, MPIC_TMR_IRQ + idx, IRQ_IPVP, val);
+        write_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx, val);
         break;
     case 0x30: /* GTIDR & TFRR */
         if ((addr & 0xF0) == 0xF0)
             mpp->dst[cpu].tfrr = val;
         else
-            write_IRQreg(mpp, MPIC_TMR_IRQ + idx, IRQ_IDE, val);
+            write_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx, val);
         break;
     }
 }
@@ -1391,10 +1386,10 @@ static void mpic_src_ext_write (void *opaque, target_phys_addr_t addr,
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            write_IRQreg(mpp, idx, IRQ_IDE, val);
+            write_IRQreg_ide(mpp, idx, val);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            write_IRQreg(mpp, idx, IRQ_IPVP, val);
+            write_IRQreg_ipvp(mpp, idx, val);
         }
     }
 }
@@ -1441,10 +1436,10 @@ static void mpic_src_int_write (void *opaque, target_phys_addr_t addr,
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            write_IRQreg(mpp, idx, IRQ_IDE, val);
+            write_IRQreg_ide(mpp, idx, val);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            write_IRQreg(mpp, idx, IRQ_IPVP, val);
+            write_IRQreg_ipvp(mpp, idx, val);
         }
     }
 }
@@ -1491,10 +1486,10 @@ static void mpic_src_msg_write (void *opaque, target_phys_addr_t addr,
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            write_IRQreg(mpp, idx, IRQ_IDE, val);
+            write_IRQreg_ide(mpp, idx, val);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            write_IRQreg(mpp, idx, IRQ_IPVP, val);
+            write_IRQreg_ipvp(mpp, idx, val);
         }
     }
 }
@@ -1541,10 +1536,10 @@ static void mpic_src_msi_write (void *opaque, target_phys_addr_t addr,
         idx += (addr & 0xFFF0) >> 5;
         if (addr & 0x10) {
             /* EXDE / IFEDE / IEEDE */
-            write_IRQreg(mpp, idx, IRQ_IDE, val);
+            write_IRQreg_ide(mpp, idx, val);
         } else {
             /* EXVP / IFEVP / IEEVP */
-            write_IRQreg(mpp, idx, IRQ_IPVP, val);
+            write_IRQreg_ipvp(mpp, idx, val);
         }
     }
 }
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 55/58] ppc: move ADB stuff from ppc_mac.h to adb.h
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (53 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 54/58] openpic: Unfold write_IRQreg Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 56/58] PPC: Fix via-cuda memory registration Alexander Graf
                   ` (2 subsequent siblings)
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers
  Cc: Blue Swirl, qemu-ppc, Laurent Vivier, Aurelien Jarno
From: Laurent Vivier <laurent@vivier.eu>
Allow to use ADB in non-ppc macintosh
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/adb.c          |    2 +-
 hw/adb.h          |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/cuda.c         |    1 +
 hw/ppc_mac.h      |   42 ---------------------------------
 hw/ppc_newworld.c |    1 +
 hw/ppc_oldworld.c |    1 +
 6 files changed, 71 insertions(+), 43 deletions(-)
 create mode 100644 hw/adb.h
diff --git a/hw/adb.c b/hw/adb.c
index 8dedbf8..aa15f55 100644
--- a/hw/adb.c
+++ b/hw/adb.c
@@ -22,7 +22,7 @@
  * THE SOFTWARE.
  */
 #include "hw.h"
-#include "ppc_mac.h"
+#include "adb.h"
 #include "console.h"
 
 /* debug ADB */
diff --git a/hw/adb.h b/hw/adb.h
new file mode 100644
index 0000000..b2a591c
--- /dev/null
+++ b/hw/adb.h
@@ -0,0 +1,67 @@
+/*
+ * QEMU ADB emulation shared definitions and prototypes
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#if !defined(__ADB_H__)
+#define __ADB_H__
+
+#define MAX_ADB_DEVICES 16
+
+#define ADB_MAX_OUT_LEN 16
+
+typedef struct ADBDevice ADBDevice;
+
+/* buf = NULL means polling */
+typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
+                              const uint8_t *buf, int len);
+typedef int ADBDeviceReset(ADBDevice *d);
+
+struct ADBDevice {
+    struct ADBBusState *bus;
+    int devaddr;
+    int handler;
+    ADBDeviceRequest *devreq;
+    ADBDeviceReset *devreset;
+    void *opaque;
+};
+
+typedef struct ADBBusState {
+    ADBDevice devices[MAX_ADB_DEVICES];
+    int nb_devices;
+    int poll_index;
+} ADBBusState;
+
+int adb_request(ADBBusState *s, uint8_t *buf_out,
+                const uint8_t *buf, int len);
+int adb_poll(ADBBusState *s, uint8_t *buf_out);
+
+ADBDevice *adb_register_device(ADBBusState *s, int devaddr,
+                               ADBDeviceRequest *devreq,
+                               ADBDeviceReset *devreset,
+                               void *opaque);
+void adb_kbd_init(ADBBusState *bus);
+void adb_mouse_init(ADBBusState *bus);
+
+extern ADBBusState adb_bus;
+#endif /* !defined(__ADB_H__) */
diff --git a/hw/cuda.c b/hw/cuda.c
index 5c92d81..6f05975 100644
--- a/hw/cuda.c
+++ b/hw/cuda.c
@@ -24,6 +24,7 @@
  */
 #include "hw.h"
 #include "ppc_mac.h"
+#include "adb.h"
 #include "qemu-timer.h"
 #include "sysemu.h"
 
diff --git a/hw/ppc_mac.h b/hw/ppc_mac.h
index 7351bb6..af75e45 100644
--- a/hw/ppc_mac.h
+++ b/hw/ppc_mac.h
@@ -77,46 +77,4 @@ void macio_nvram_setup_bar(MacIONVRAMState *s, MemoryRegion *bar,
 void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
 uint32_t macio_nvram_read (void *opaque, uint32_t addr);
 void macio_nvram_write (void *opaque, uint32_t addr, uint32_t val);
-
-/* adb.c */
-
-#define MAX_ADB_DEVICES 16
-
-#define ADB_MAX_OUT_LEN 16
-
-typedef struct ADBDevice ADBDevice;
-
-/* buf = NULL means polling */
-typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
-                              const uint8_t *buf, int len);
-typedef int ADBDeviceReset(ADBDevice *d);
-
-struct ADBDevice {
-    struct ADBBusState *bus;
-    int devaddr;
-    int handler;
-    ADBDeviceRequest *devreq;
-    ADBDeviceReset *devreset;
-    void *opaque;
-};
-
-typedef struct ADBBusState {
-    ADBDevice devices[MAX_ADB_DEVICES];
-    int nb_devices;
-    int poll_index;
-} ADBBusState;
-
-int adb_request(ADBBusState *s, uint8_t *buf_out,
-                const uint8_t *buf, int len);
-int adb_poll(ADBBusState *s, uint8_t *buf_out);
-
-ADBDevice *adb_register_device(ADBBusState *s, int devaddr,
-                               ADBDeviceRequest *devreq,
-                               ADBDeviceReset *devreset,
-                               void *opaque);
-void adb_kbd_init(ADBBusState *bus);
-void adb_mouse_init(ADBBusState *bus);
-
-extern ADBBusState adb_bus;
-
 #endif /* !defined(__PPC_MAC_H__) */
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index fbd443d..5fb9359 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -49,6 +49,7 @@
 #include "hw.h"
 #include "ppc.h"
 #include "ppc_mac.h"
+#include "adb.h"
 #include "mac_dbdma.h"
 #include "nvram.h"
 #include "pc.h"
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 235d2ef..3857075 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -26,6 +26,7 @@
 #include "hw.h"
 #include "ppc.h"
 #include "ppc_mac.h"
+#include "adb.h"
 #include "mac_dbdma.h"
 #include "nvram.h"
 #include "pc.h"
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 56/58] PPC: Fix via-cuda memory registration
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (54 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 55/58] ppc: move ADB stuff from ppc_mac.h to adb.h Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 57/58] PPC: Fix heathrow PIC to use little endian MMIO Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 58/58] KVM: Update kernel headers Alexander Graf
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Commit 23c5e4ca (convert to memory API) broke the VIA Cuda emulation layer
by not registering the IO structs.
This patch registers them properly and thus makes -M g3beige and -M mac99
work again.
Tested-by: Andreas Färber <andreas.faerber@web.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/cuda.c |   28 ++++++++++++++++------------
 1 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/hw/cuda.c b/hw/cuda.c
index 6f05975..4077436 100644
--- a/hw/cuda.c
+++ b/hw/cuda.c
@@ -634,16 +634,20 @@ static uint32_t cuda_readl (void *opaque, target_phys_addr_t addr)
     return 0;
 }
 
-static CPUWriteMemoryFunc * const cuda_write[] = {
-    &cuda_writeb,
-    &cuda_writew,
-    &cuda_writel,
-};
-
-static CPUReadMemoryFunc * const cuda_read[] = {
-    &cuda_readb,
-    &cuda_readw,
-    &cuda_readl,
+static MemoryRegionOps cuda_ops = {
+    .old_mmio = {
+        .write = {
+            cuda_writeb,
+            cuda_writew,
+            cuda_writel,
+        },
+        .read = {
+            cuda_readb,
+            cuda_readw,
+            cuda_readl,
+        },
+    },
+    .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
 static bool cuda_timer_exist(void *opaque, int version_id)
@@ -740,8 +744,8 @@ void cuda_init (MemoryRegion **cuda_mem, qemu_irq irq)
     s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
 
     s->adb_poll_timer = qemu_new_timer_ns(vm_clock, cuda_adb_poll, s);
-    cpu_register_io_memory(cuda_read, cuda_write, s,
-                                             DEVICE_NATIVE_ENDIAN);
+    memory_region_init_io(&s->mem, &cuda_ops, s, "cuda", 0x2000);
+
     *cuda_mem = &s->mem;
     vmstate_register(NULL, -1, &vmstate_cuda, s);
     qemu_register_reset(cuda_reset, s);
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 57/58] PPC: Fix heathrow PIC to use little endian MMIO
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (55 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 56/58] PPC: Fix via-cuda memory registration Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 58/58] KVM: Update kernel headers Alexander Graf
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
During the memory API conversion, the indication on little endianness of
MMIO for the heathrow PIC got dropped. This patch adds it back again.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/heathrow_pic.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/hw/heathrow_pic.c b/hw/heathrow_pic.c
index 51996ab..16f48d1 100644
--- a/hw/heathrow_pic.c
+++ b/hw/heathrow_pic.c
@@ -126,7 +126,7 @@ static uint64_t pic_read(void *opaque, target_phys_addr_t addr,
 static const MemoryRegionOps heathrow_pic_ops = {
     .read = pic_read,
     .write = pic_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void heathrow_pic_set_irq(void *opaque, int num, int level)
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread
- * [Qemu-devel] [PATCH 58/58] KVM: Update kernel headers
  2011-09-14  8:42 [Qemu-devel] [PULL 00/58] ppc patch queue 2011-09-14 Alexander Graf
                   ` (56 preceding siblings ...)
  2011-09-14  8:43 ` [Qemu-devel] [PATCH 57/58] PPC: Fix heathrow PIC to use little endian MMIO Alexander Graf
@ 2011-09-14  8:43 ` Alexander Graf
  57 siblings, 0 replies; 128+ messages in thread
From: Alexander Graf @ 2011-09-14  8:43 UTC (permalink / raw)
  To: qemu-devel Developers; +Cc: Blue Swirl, qemu-ppc, Aurelien Jarno
Removes ABI-breaking HIOR parts - KVM patch to follow.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
  |   12 ++----------
        |    1 -
 2 files changed, 2 insertions(+), 11 deletions(-)
 --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 28eecf0..a635e22 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -149,12 +149,6 @@ struct kvm_regs {
 #define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
 
 /*
- * Book3S special bits to indicate contents in the struct by maintaining
- * backwards compatibility with older structs. If adding a new field,
- * please make sure to add a flag for that new field */
-#define KVM_SREGS_S_HIOR		(1 << 0)
-
-/*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
  *
@@ -176,11 +170,9 @@ struct kvm_sregs {
 			} ppc64;
 			struct {
 				__u32 sr[16];
-				__u64 ibat[8];
-				__u64 dbat[8];
+				__u64 ibat[8]; 
+				__u64 dbat[8]; 
 			} ppc32;
-			__u64 flags; /* KVM_SREGS_S_ */
-			__u64 hior;
 		} s;
 		struct {
 			union {
 --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 8bb6cde..6f5095c 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -554,7 +554,6 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_SMT 64
 #define KVM_CAP_PPC_RMA	65
 #define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
-#define KVM_CAP_PPC_HIOR 67
 #define KVM_CAP_PPC_PAPR 68
 #define KVM_CAP_SW_TLB 69
 
-- 
1.6.0.2
^ permalink raw reply related	[flat|nested] 128+ messages in thread