[PATCH v4] x86: add etc/phys-bits fw

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v4] x86: add etc/phys-bits fw_cfg file
@ 2022-09-22 10:14 Gerd Hoffmann
  2022-09-22 11:24 ` Daniel P. Berrangé
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2022-09-22 10:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: Sergio Lopez, Eduardo Habkost, Marcel Apfelbaum,
	Richard Henderson, kvm, Marcelo Tosatti, Paolo Bonzini,
	Michael S. Tsirkin, Gerd Hoffmann

In case phys bits are functional and can be used by the guest (aka
host-phys-bits=on) add a fw_cfg file carrying the value.  This can
be used by the guest firmware for address space configuration.

The value in the etc/phys-bits fw_cfg file should be identical to
the phys bits value published via cpuid leaf 0x80000008.

This is only enabled for 7.2+ machine types for live migration
compatibility reasons.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/fw_cfg.h     |  1 +
 include/hw/i386/pc.h |  1 +
 hw/i386/fw_cfg.c     | 12 ++++++++++++
 hw/i386/pc.c         |  5 +++++
 hw/i386/pc_piix.c    |  2 ++
 hw/i386/pc_q35.c     |  2 ++
 6 files changed, 23 insertions(+)

diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
index 275f15c1c5e8..6ff198a6cb85 100644
--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
 void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
 void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
 void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
+void fw_cfg_phys_bits(FWCfgState *fw_cfg);
 
 #endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c95333514ed3..bedef1ee13c1 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -119,6 +119,7 @@ struct PCMachineClass {
     bool enforce_aligned_dimm;
     bool broken_reserved_end;
     bool enforce_amd_1tb_hole;
+    bool phys_bits_in_fw_cfg;
 
     /* generate legacy CPU hotplug AML */
     bool legacy_cpu_hotplug;
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index a283785a8de4..6a1f18925725 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
     aml_append(dev, aml_name_decl("_CRS", crs));
     aml_append(scope, dev);
 }
+
+void fw_cfg_phys_bits(FWCfgState *fw_cfg)
+{
+    X86CPU *cpu = X86_CPU(first_cpu);
+    uint64_t phys_bits = cpu->phys_bits;
+
+    if (cpu->host_phys_bits) {
+        fw_cfg_add_file(fw_cfg, "etc/phys-bits",
+                        g_memdup2(&phys_bits, sizeof(phys_bits)),
+                        sizeof(phys_bits));
+    }
+}
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 566accf7e60a..17ecc7fe4331 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data)
 {
     PCMachineState *pcms = container_of(notifier,
                                         PCMachineState, machine_done);
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     X86MachineState *x86ms = X86_MACHINE(pcms);
 
     cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state,
@@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data)
         fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
         /* update FW_CFG_NB_CPUS to account for -device added CPUs */
         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
+        if (pcmc->phys_bits_in_fw_cfg) {
+            fw_cfg_phys_bits(x86ms->fw_cfg);
+        }
     }
 }
 
@@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     pcmc->kvmclock_enabled = true;
     pcmc->enforce_aligned_dimm = true;
     pcmc->enforce_amd_1tb_hole = true;
+    pcmc->phys_bits_in_fw_cfg = true;
     /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
      * to be used at the moment, 32K should be enough for a while.  */
     pcmc->acpi_data_size = 0x20000 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8043a250adf3..c6a4dbd5c0b0 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
 
 static void pc_i440fx_7_1_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_i440fx_7_2_machine_options(m);
     m->alias = NULL;
     m->is_default = false;
+    pcmc->phys_bits_in_fw_cfg = false;
     compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
     compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 53eda50e818c..c2b56daa1550 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
 
 static void pc_q35_7_1_machine_options(MachineClass *m)
 {
+    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_q35_7_2_machine_options(m);
     m->alias = NULL;
+    pcmc->phys_bits_in_fw_cfg = false;
     compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
     compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
 }
-- 
2.37.3



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 10:14 [PATCH v4] x86: add etc/phys-bits fw_cfg file Gerd Hoffmann
@ 2022-09-22 11:24 ` Daniel P. Berrangé
  2022-09-22 11:56   ` Michael S. Tsirkin
  2022-09-22 12:20   ` Gerd Hoffmann
  0 siblings, 2 replies; 13+ messages in thread
From: Daniel P. Berrangé @ 2022-09-22 11:24 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: qemu-devel, Sergio Lopez, Eduardo Habkost, Marcel Apfelbaum,
	Richard Henderson, kvm, Marcelo Tosatti, Paolo Bonzini,
	Michael S. Tsirkin

On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote:
> In case phys bits are functional and can be used by the guest (aka
> host-phys-bits=on) add a fw_cfg file carrying the value.  This can
> be used by the guest firmware for address space configuration.
> 
> The value in the etc/phys-bits fw_cfg file should be identical to
> the phys bits value published via cpuid leaf 0x80000008.
> 
> This is only enabled for 7.2+ machine types for live migration
> compatibility reasons.

Is this going to have any implications for what mgmt apps must
take into account when selecting valid migration target hosts ?

Historically, apps have tended to ignore any checks for phys
bits between src/dst migration hosts and hoped for the best.

Will this new behaviour introduce / change any failure scenarios
where the target host has fewer phys bits than the src host, that
mgmt apps need to be made aware of ?

> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/i386/fw_cfg.h     |  1 +
>  include/hw/i386/pc.h |  1 +
>  hw/i386/fw_cfg.c     | 12 ++++++++++++
>  hw/i386/pc.c         |  5 +++++
>  hw/i386/pc_piix.c    |  2 ++
>  hw/i386/pc_q35.c     |  2 ++
>  6 files changed, 23 insertions(+)
> 
> diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
> index 275f15c1c5e8..6ff198a6cb85 100644
> --- a/hw/i386/fw_cfg.h
> +++ b/hw/i386/fw_cfg.h
> @@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
>  void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
>  void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
>  void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
> +void fw_cfg_phys_bits(FWCfgState *fw_cfg);
>  
>  #endif
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index c95333514ed3..bedef1ee13c1 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -119,6 +119,7 @@ struct PCMachineClass {
>      bool enforce_aligned_dimm;
>      bool broken_reserved_end;
>      bool enforce_amd_1tb_hole;
> +    bool phys_bits_in_fw_cfg;
>  
>      /* generate legacy CPU hotplug AML */
>      bool legacy_cpu_hotplug;
> diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
> index a283785a8de4..6a1f18925725 100644
> --- a/hw/i386/fw_cfg.c
> +++ b/hw/i386/fw_cfg.c
> @@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
>      aml_append(dev, aml_name_decl("_CRS", crs));
>      aml_append(scope, dev);
>  }
> +
> +void fw_cfg_phys_bits(FWCfgState *fw_cfg)
> +{
> +    X86CPU *cpu = X86_CPU(first_cpu);
> +    uint64_t phys_bits = cpu->phys_bits;
> +
> +    if (cpu->host_phys_bits) {
> +        fw_cfg_add_file(fw_cfg, "etc/phys-bits",
> +                        g_memdup2(&phys_bits, sizeof(phys_bits)),
> +                        sizeof(phys_bits));
> +    }
> +}
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 566accf7e60a..17ecc7fe4331 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data)
>  {
>      PCMachineState *pcms = container_of(notifier,
>                                          PCMachineState, machine_done);
> +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>      X86MachineState *x86ms = X86_MACHINE(pcms);
>  
>      cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state,
> @@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data)
>          fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
>          /* update FW_CFG_NB_CPUS to account for -device added CPUs */
>          fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
> +        if (pcmc->phys_bits_in_fw_cfg) {
> +            fw_cfg_phys_bits(x86ms->fw_cfg);
> +        }
>      }
>  }
>  
> @@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
>      pcmc->kvmclock_enabled = true;
>      pcmc->enforce_aligned_dimm = true;
>      pcmc->enforce_amd_1tb_hole = true;
> +    pcmc->phys_bits_in_fw_cfg = true;
>      /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
>       * to be used at the moment, 32K should be enough for a while.  */
>      pcmc->acpi_data_size = 0x20000 + 0x8000;
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 8043a250adf3..c6a4dbd5c0b0 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
>  
>  static void pc_i440fx_7_1_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      pc_i440fx_7_2_machine_options(m);
>      m->alias = NULL;
>      m->is_default = false;
> +    pcmc->phys_bits_in_fw_cfg = false;
>      compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
>      compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
>  }
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 53eda50e818c..c2b56daa1550 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
>  
>  static void pc_q35_7_1_machine_options(MachineClass *m)
>  {
> +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>      pc_q35_7_2_machine_options(m);
>      m->alias = NULL;
> +    pcmc->phys_bits_in_fw_cfg = false;
>      compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
>      compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
>  }
> -- 
> 2.37.3
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 11:24 ` Daniel P. Berrangé
@ 2022-09-22 11:56   ` Michael S. Tsirkin
  2022-09-22 12:20   ` Gerd Hoffmann
  1 sibling, 0 replies; 13+ messages in thread
From: Michael S. Tsirkin @ 2022-09-22 11:56 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: Gerd Hoffmann, qemu-devel, Sergio Lopez, Eduardo Habkost,
	Marcel Apfelbaum, Richard Henderson, kvm, Marcelo Tosatti,
	Paolo Bonzini

On Thu, Sep 22, 2022 at 12:24:09PM +0100, Daniel P. Berrangé wrote:
> On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote:
> > In case phys bits are functional and can be used by the guest (aka
> > host-phys-bits=on) add a fw_cfg file carrying the value.  This can
> > be used by the guest firmware for address space configuration.
> > 
> > The value in the etc/phys-bits fw_cfg file should be identical to
> > the phys bits value published via cpuid leaf 0x80000008.
> > 
> > This is only enabled for 7.2+ machine types for live migration
> > compatibility reasons.
> 
> Is this going to have any implications for what mgmt apps must
> take into account when selecting valid migration target hosts ?

I don't think this does anything by itself. It just tells the firmware
which value to use, since historically it ignored CPUID. I am still
debating with myself whether a boolean would be better.  Would
appreciate KVM maintainer's take on this. But in any case guests already
sometimes use CPUID (e.g. just grep for cpuid_maxphyaddr).
This value is just for firmware use.


> Historically, apps have tended to ignore any checks for phys
> bits between src/dst migration hosts and hoped for the best.
> 
> Will this new behaviour introduce / change any failure scenarios
> where the target host has fewer phys bits than the src host, that
> mgmt apps need to be made aware of ?
> 
> > 
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/i386/fw_cfg.h     |  1 +
> >  include/hw/i386/pc.h |  1 +
> >  hw/i386/fw_cfg.c     | 12 ++++++++++++
> >  hw/i386/pc.c         |  5 +++++
> >  hw/i386/pc_piix.c    |  2 ++
> >  hw/i386/pc_q35.c     |  2 ++
> >  6 files changed, 23 insertions(+)
> > 
> > diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
> > index 275f15c1c5e8..6ff198a6cb85 100644
> > --- a/hw/i386/fw_cfg.h
> > +++ b/hw/i386/fw_cfg.h
> > @@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
> >  void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
> >  void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
> >  void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);
> > +void fw_cfg_phys_bits(FWCfgState *fw_cfg);
> >  
> >  #endif
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index c95333514ed3..bedef1ee13c1 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -119,6 +119,7 @@ struct PCMachineClass {
> >      bool enforce_aligned_dimm;
> >      bool broken_reserved_end;
> >      bool enforce_amd_1tb_hole;
> > +    bool phys_bits_in_fw_cfg;
> >  
> >      /* generate legacy CPU hotplug AML */
> >      bool legacy_cpu_hotplug;
> > diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
> > index a283785a8de4..6a1f18925725 100644
> > --- a/hw/i386/fw_cfg.c
> > +++ b/hw/i386/fw_cfg.c
> > @@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
> >      aml_append(dev, aml_name_decl("_CRS", crs));
> >      aml_append(scope, dev);
> >  }
> > +
> > +void fw_cfg_phys_bits(FWCfgState *fw_cfg)
> > +{
> > +    X86CPU *cpu = X86_CPU(first_cpu);
> > +    uint64_t phys_bits = cpu->phys_bits;
> > +
> > +    if (cpu->host_phys_bits) {
> > +        fw_cfg_add_file(fw_cfg, "etc/phys-bits",
> > +                        g_memdup2(&phys_bits, sizeof(phys_bits)),
> > +                        sizeof(phys_bits));
> > +    }
> > +}
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 566accf7e60a..17ecc7fe4331 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data)
> >  {
> >      PCMachineState *pcms = container_of(notifier,
> >                                          PCMachineState, machine_done);
> > +    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> >      X86MachineState *x86ms = X86_MACHINE(pcms);
> >  
> >      cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state,
> > @@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data)
> >          fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
> >          /* update FW_CFG_NB_CPUS to account for -device added CPUs */
> >          fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
> > +        if (pcmc->phys_bits_in_fw_cfg) {
> > +            fw_cfg_phys_bits(x86ms->fw_cfg);
> > +        }
> >      }
> >  }
> >  
> > @@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
> >      pcmc->kvmclock_enabled = true;
> >      pcmc->enforce_aligned_dimm = true;
> >      pcmc->enforce_amd_1tb_hole = true;
> > +    pcmc->phys_bits_in_fw_cfg = true;
> >      /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
> >       * to be used at the moment, 32K should be enough for a while.  */
> >      pcmc->acpi_data_size = 0x20000 + 0x8000;
> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > index 8043a250adf3..c6a4dbd5c0b0 100644
> > --- a/hw/i386/pc_piix.c
> > +++ b/hw/i386/pc_piix.c
> > @@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
> >  
> >  static void pc_i440fx_7_1_machine_options(MachineClass *m)
> >  {
> > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >      pc_i440fx_7_2_machine_options(m);
> >      m->alias = NULL;
> >      m->is_default = false;
> > +    pcmc->phys_bits_in_fw_cfg = false;
> >      compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
> >      compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
> >  }
> > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > index 53eda50e818c..c2b56daa1550 100644
> > --- a/hw/i386/pc_q35.c
> > +++ b/hw/i386/pc_q35.c
> > @@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
> >  
> >  static void pc_q35_7_1_machine_options(MachineClass *m)
> >  {
> > +    PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
> >      pc_q35_7_2_machine_options(m);
> >      m->alias = NULL;
> > +    pcmc->phys_bits_in_fw_cfg = false;
> >      compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
> >      compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
> >  }
> > -- 
> > 2.37.3
> > 
> > 
> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 11:24 ` Daniel P. Berrangé
  2022-09-22 11:56   ` Michael S. Tsirkin
@ 2022-09-22 12:20   ` Gerd Hoffmann
  2022-09-22 12:38     ` Paolo Bonzini
  1 sibling, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2022-09-22 12:20 UTC (permalink / raw)
  To: Daniel P. Berrangé
  Cc: qemu-devel, Sergio Lopez, Eduardo Habkost, Marcel Apfelbaum,
	Richard Henderson, kvm, Marcelo Tosatti, Paolo Bonzini,
	Michael S. Tsirkin

On Thu, Sep 22, 2022 at 12:24:09PM +0100, Daniel P. Berrangé wrote:
> On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote:
> > In case phys bits are functional and can be used by the guest (aka
> > host-phys-bits=on) add a fw_cfg file carrying the value.  This can
> > be used by the guest firmware for address space configuration.
> > 
> > The value in the etc/phys-bits fw_cfg file should be identical to
> > the phys bits value published via cpuid leaf 0x80000008.
> > 
> > This is only enabled for 7.2+ machine types for live migration
> > compatibility reasons.
> 
> Is this going to have any implications for what mgmt apps must
> take into account when selecting valid migration target hosts ?
> 
> Historically, apps have tended to ignore any checks for phys
> bits between src/dst migration hosts and hoped for the best.
> 
> Will this new behaviour introduce / change any failure scenarios
> where the target host has fewer phys bits than the src host, that
> mgmt apps need to be made aware of ?

No.  This will basically inform the guest that host-phys-bits has been
enabled (and pass the number of bits).  So the firmware can make use of
the available address space instead of trying to be as conservative as
possible to avoid going beyond the (unknown) limit.

The phys-bits config itself is not touched.

take care,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 12:20   ` Gerd Hoffmann
@ 2022-09-22 12:38     ` Paolo Bonzini
  2022-09-22 14:16       ` Gerd Hoffmann
  2022-09-22 20:33       ` Gerd Hoffmann
  0 siblings, 2 replies; 13+ messages in thread
From: Paolo Bonzini @ 2022-09-22 12:38 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
> No.  This will basically inform the guest that host-phys-bits has been
> enabled (and pass the number of bits).  So the firmware can make use of
> the available address space instead of trying to be as conservative as
> possible to avoid going beyond the (unknown) limit.

Intel processors that are not extremely old have host-phys-bits equal
to 39, 46 or 52. Older processors that had 36, in all likelihood,
didn't have IOMMUs (so no big 64-bit BARs).

AMD processors have had 48 for a while, though older consumer processors had 40.

QEMU has always used 40, though many downstream packages (IIRC RHEL
and Ubuntu) just use host-phys-bits = true when using KVM.

Would it work to:

1) set host-phys-bits to true on new machine types when not using TCG
(i.e. KVM / HVF / WHPX)

2) in the firmware treat 40 as if it were 39, to support old machine types?

Paolo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 12:38     ` Paolo Bonzini
@ 2022-09-22 14:16       ` Gerd Hoffmann
  2022-09-22 17:13         ` Jim Mattson
  2022-09-22 20:33       ` Gerd Hoffmann
  1 sibling, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2022-09-22 14:16 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote:
> On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
> > No.  This will basically inform the guest that host-phys-bits has been
> > enabled (and pass the number of bits).  So the firmware can make use of
> > the available address space instead of trying to be as conservative as
> > possible to avoid going beyond the (unknown) limit.
> 
> Intel processors that are not extremely old have host-phys-bits equal
> to 39, 46 or 52. Older processors that had 36, in all likelihood,
> didn't have IOMMUs (so no big 64-bit BARs).

Well, I happen to have a intel box with 36 physbits + iommu.

> 1) set host-phys-bits to true on new machine types when not using TCG
> (i.e. KVM / HVF / WHPX)

That is probably a good idea, but an independent problem.

Has live migration problems (when hosts have different phys bits),
which is IIRC the reason this hasn't happen yet.  Maybe that is solved
meanwhile the one way or another, I've seen some phys-bits changes in
libvirt recently ...

> 2) in the firmware treat 40 as if it were 39, to support old machine
> types?

The background of all this is that devices need more and more memory,
and the very conservative edk2 defaults are becoming increasingly
problematic.  So what I want do is scale things up with the address
space size.  Use 1/4 or 1/8 of the physical address space as 64bit
pci mmio window.  Likewise scale up the default pcie root port window
sizes, to have more room for hotplug.

For that to work the firmware obviously needs to know how much it
actually has, which is not the case.

Yes, the problematic cases are intel machines with 36 or 39.

Treating 40 as if it were 39 will explode with 36 cpus.

Treating 40 as if it were 36 will mostly work.  Will leave a big
chunk of address space unused.  Will cause regressions on guests
with > 32G of RAM.

Treating 40 as invalid and continue to use the current conservative
heuristic, otherwise treat phys-bits as valid might work.  Obvious
corner case is that it'll not catch broken manual configurations
(host-phys-bits=off,phys-bits=<larger-than-host>), only the broken
default.  Not sure how much of a problem that is in practice, maybe
it isn't.

I think I still prefer to explicitly communicate a reliable phys-bits
value to the guest somehow.

take care,
  Gerd

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 14:16       ` Gerd Hoffmann
@ 2022-09-22 17:13         ` Jim Mattson
  2022-09-22 19:49           ` Paolo Bonzini
  0 siblings, 1 reply; 13+ messages in thread
From: Jim Mattson @ 2022-09-22 17:13 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Paolo Bonzini, Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

On Thu, Sep 22, 2022 at 7:16 AM Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote:
> > On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > No.  This will basically inform the guest that host-phys-bits has been
> > > enabled (and pass the number of bits).  So the firmware can make use of
> > > the available address space instead of trying to be as conservative as
> > > possible to avoid going beyond the (unknown) limit.
> >
> > Intel processors that are not extremely old have host-phys-bits equal
> > to 39, 46 or 52. Older processors that had 36, in all likelihood,
> > didn't have IOMMUs (so no big 64-bit BARs).
>
> Well, I happen to have a intel box with 36 physbits + iommu.
>
> > 1) set host-phys-bits to true on new machine types when not using TCG
> > (i.e. KVM / HVF / WHPX)
>
> That is probably a good idea, but an independent problem.
>
> Has live migration problems (when hosts have different phys bits),
> which is IIRC the reason this hasn't happen yet.  Maybe that is solved
> meanwhile the one way or another, I've seen some phys-bits changes in
> libvirt recently ...
>
> > 2) in the firmware treat 40 as if it were 39, to support old machine
> > types?
>
> The background of all this is that devices need more and more memory,
> and the very conservative edk2 defaults are becoming increasingly
> problematic.  So what I want do is scale things up with the address
> space size.  Use 1/4 or 1/8 of the physical address space as 64bit
> pci mmio window.  Likewise scale up the default pcie root port window
> sizes, to have more room for hotplug.
>
> For that to work the firmware obviously needs to know how much it
> actually has, which is not the case.
>
> Yes, the problematic cases are intel machines with 36 or 39.
>
> Treating 40 as if it were 39 will explode with 36 cpus.
>
> Treating 40 as if it were 36 will mostly work.  Will leave a big
> chunk of address space unused.  Will cause regressions on guests
> with > 32G of RAM.
>
> Treating 40 as invalid and continue to use the current conservative
> heuristic, otherwise treat phys-bits as valid might work.  Obvious
> corner case is that it'll not catch broken manual configurations
> (host-phys-bits=off,phys-bits=<larger-than-host>), only the broken
> default.  Not sure how much of a problem that is in practice, maybe
> it isn't.
>
> I think I still prefer to explicitly communicate a reliable phys-bits
> value to the guest somehow.

On x86 hardware, KVM is incapable of emulating a guest physical width
that differs from the host physical width. There isn't support in the
hardware for it.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 17:13         ` Jim Mattson
@ 2022-09-22 19:49           ` Paolo Bonzini
  0 siblings, 0 replies; 13+ messages in thread
From: Paolo Bonzini @ 2022-09-22 19:49 UTC (permalink / raw)
  To: Jim Mattson
  Cc: Gerd Hoffmann, Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

On Thu, Sep 22, 2022 at 7:13 PM Jim Mattson <jmattson@google.com> wrote:
> > Treating 40 as invalid and continue to use the current conservative
> > heuristic, otherwise treat phys-bits as valid might work.  Obvious
> > corner case is that it'll not catch broken manual configurations
> > (host-phys-bits=off,phys-bits=<larger-than-host>), only the broken
> > default.  Not sure how much of a problem that is in practice, maybe
> > it isn't.
> >
> > I think I still prefer to explicitly communicate a reliable phys-bits
> > value to the guest somehow.
>
> On x86 hardware, KVM is incapable of emulating a guest physical width
> that differs from the host physical width. There isn't support in the
> hardware for it.

Indeed, everything else is a userspace bug. Especially since here
we're talking of host_maxphyaddr < guest_maxphyaddr, which is
completely impossible.

Paolo



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 12:38     ` Paolo Bonzini
  2022-09-22 14:16       ` Gerd Hoffmann
@ 2022-09-22 20:33       ` Gerd Hoffmann
  2022-09-23  6:00         ` Paolo Bonzini
  1 sibling, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2022-09-22 20:33 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote:
> On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
> > No.  This will basically inform the guest that host-phys-bits has been
> > enabled (and pass the number of bits).  So the firmware can make use of
> > the available address space instead of trying to be as conservative as
> > possible to avoid going beyond the (unknown) limit.
> 
> Intel processors that are not extremely old have host-phys-bits equal
> to 39, 46 or 52. Older processors that had 36, in all likelihood,
> didn't have IOMMUs (so no big 64-bit BARs).
> 
> AMD processors have had 48 for a while, though older consumer processors had 40.

How reliable is the vendorid?

Given newer processors have more than 40 and for older ones we know
the possible values for the two relevant x86 vendors we could do
something along the lines of:

   phys-bits >= 41                   -> valid
   phys-bits == 40    + AuthenticAMD -> valid
   phys-bits == 36,39 + GenuineIntel -> valid
   everything else                   -> invalid

Does that look sensible to you?

take care,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-22 20:33       ` Gerd Hoffmann
@ 2022-09-23  6:00         ` Paolo Bonzini
  2022-09-23  6:23           ` Gerd Hoffmann
  0 siblings, 1 reply; 13+ messages in thread
From: Paolo Bonzini @ 2022-09-23  6:00 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

[-- Attachment #1: Type: text/plain, Size: 1543 bytes --]

Il gio 22 set 2022, 22:33 Gerd Hoffmann <kraxel@redhat.com> ha scritto:

> On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote:
> > On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > No.  This will basically inform the guest that host-phys-bits has been
> > > enabled (and pass the number of bits).  So the firmware can make use of
> > > the available address space instead of trying to be as conservative as
> > > possible to avoid going beyond the (unknown) limit.
> >
> > Intel processors that are not extremely old have host-phys-bits equal
> > to 39, 46 or 52. Older processors that had 36, in all likelihood,
> > didn't have IOMMUs (so no big 64-bit BARs).
> >
> > AMD processors have had 48 for a while, though older consumer processors
> had 40.
>
> How reliable is the vendorid?
>

Pretty reliable. In principle it can be changed, but there's no good reason
to do it (especially in a long lived VM) and it requires manual command
line intervention.


> Given newer processors have more than 40 and for older ones we know
> the possible values for the two relevant x86 vendors we could do
> something along the lines of:
>
>    phys-bits >= 41                   -> valid
>    phys-bits == 40    + AuthenticAMD -> valid
>    phys-bits == 36,39 + GenuineIntel -> valid
>    everything else                   -> invalid
>
> Does that look sensible to you?
>

Yes, it does! Is phys-bits == 36 the same as invalid? If so that's even one
fewer special case to handle.

Paolo


> take care,
>   Gerd
>
>

[-- Attachment #2: Type: text/html, Size: 2615 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-23  6:00         ` Paolo Bonzini
@ 2022-09-23  6:23           ` Gerd Hoffmann
  2022-10-07 13:44             ` Michael S. Tsirkin
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2022-09-23  6:23 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti, Michael S. Tsirkin

  Hi,

> > Given newer processors have more than 40 and for older ones we know
> > the possible values for the two relevant x86 vendors we could do
> > something along the lines of:
> >
> >    phys-bits >= 41                   -> valid
> >    phys-bits == 40    + AuthenticAMD -> valid
> >    phys-bits == 36,39 + GenuineIntel -> valid
> >    everything else                   -> invalid
> >
> > Does that look sensible to you?
> >
> 
> Yes, it does! Is phys-bits == 36 the same as invalid?

'invalid' would continue to use the current guesswork codepath for
phys-bits.  Which will end up with phys-bits = 36 for smaller VMs, but
it can go beyond that in VMs with alot (32G or more) of memory.  That
logic assumes that physical machines with enough RAM for 32G+ guests
have a physical address space > 64G.

'phys-bits = 36' would be a hard limit.

So, it's not exactly the same but small VMs wouldn't see a difference.

take care,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-09-23  6:23           ` Gerd Hoffmann
@ 2022-10-07 13:44             ` Michael S. Tsirkin
  2022-10-10  7:30               ` Gerd Hoffmann
  0 siblings, 1 reply; 13+ messages in thread
From: Michael S. Tsirkin @ 2022-10-07 13:44 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Paolo Bonzini, Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti

On Fri, Sep 23, 2022 at 08:23:12AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > Given newer processors have more than 40 and for older ones we know
> > > the possible values for the two relevant x86 vendors we could do
> > > something along the lines of:
> > >
> > >    phys-bits >= 41                   -> valid
> > >    phys-bits == 40    + AuthenticAMD -> valid
> > >    phys-bits == 36,39 + GenuineIntel -> valid
> > >    everything else                   -> invalid
> > >
> > > Does that look sensible to you?
> > >
> > 
> > Yes, it does! Is phys-bits == 36 the same as invalid?
> 
> 'invalid' would continue to use the current guesswork codepath for
> phys-bits.  Which will end up with phys-bits = 36 for smaller VMs, but
> it can go beyond that in VMs with alot (32G or more) of memory.  That
> logic assumes that physical machines with enough RAM for 32G+ guests
> have a physical address space > 64G.
> 
> 'phys-bits = 36' would be a hard limit.
> 
> So, it's not exactly the same but small VMs wouldn't see a difference.
> 
> take care,
>   Gerd

I dropped the patch for now.

-- 
MST



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] x86: add etc/phys-bits fw_cfg file
  2022-10-07 13:44             ` Michael S. Tsirkin
@ 2022-10-10  7:30               ` Gerd Hoffmann
  0 siblings, 0 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2022-10-10  7:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Paolo Bonzini, Daniel P. Berrangé, qemu-devel, Sergio Lopez,
	Eduardo Habkost, Marcel Apfelbaum, Richard Henderson, kvm,
	Marcelo Tosatti

  Hi,

> > > > Given newer processors have more than 40 and for older ones we know
> > > > the possible values for the two relevant x86 vendors we could do
> > > > something along the lines of:
> > > >
> > > >    phys-bits >= 41                   -> valid
> > > >    phys-bits == 40    + AuthenticAMD -> valid
> > > >    phys-bits == 36,39 + GenuineIntel -> valid
> > > >    everything else                   -> invalid

> I dropped the patch for now.

You can drop it forever.

For the mail archives and anyone interested:  The approach outlined
above appears to work well, patches just landed in edk2 master branch.
Next edk2 stable tag (2022-11) will have it.

take care,
  Gerd



^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-10-10 12:22 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-22 10:14 [PATCH v4] x86: add etc/phys-bits fw_cfg file Gerd Hoffmann
2022-09-22 11:24 ` Daniel P. Berrangé
2022-09-22 11:56   ` Michael S. Tsirkin
2022-09-22 12:20   ` Gerd Hoffmann
2022-09-22 12:38     ` Paolo Bonzini
2022-09-22 14:16       ` Gerd Hoffmann
2022-09-22 17:13         ` Jim Mattson
2022-09-22 19:49           ` Paolo Bonzini
2022-09-22 20:33       ` Gerd Hoffmann
2022-09-23  6:00         ` Paolo Bonzini
2022-09-23  6:23           ` Gerd Hoffmann
2022-10-07 13:44             ` Michael S. Tsirkin
2022-10-10  7:30               ` Gerd Hoffmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).