[PATCH v3 0/6] x86/HVM: load state checking

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 0/6] x86/HVM: load state checking
@ 2023-11-28 10:32 Jan Beulich
  2023-11-28 10:33 ` [PATCH v3 1/6] x86/HVM: introduce hvm_get_entry() Jan Beulich
                   ` (5 more replies)
  0 siblings, 6 replies; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:32 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

With the request to convert bounding to actual refusal, and then
doing so in new hooks, the two previously separate patches now
need to be in a series, with infrastructure work done first.
Clearly the checking in other load handlers could (and likely
wants to be) moved to separate check handlers as well, down the
road.

1: HVM: introduce hvm_point_entry()
2: HVM: split restore state checking from state loading
3: HVM: adjust save/restore hook registration for optional check handler
4: vPIT: check values loaded from state save record
5: vPIC: vpic_elcr_mask() master bit 2 control
6: vPIC: check values loaded from state save record

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 1/6] x86/HVM: introduce hvm_get_entry()
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
@ 2023-11-28 10:33 ` Jan Beulich
  2023-11-28 10:34 ` [PATCH v3 2/6] x86/HVM: split restore state checking from state loading Jan Beulich
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:33 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

... to accompany hvm_read_entry() when actual copying isn't desirable.
This allows to remove open-coded stream accesses from hpet_load(),
along with using the helper in hvm_load() itself.

Since arch_hvm_load()'s declaration would need changing, and since the
function is not used from elsewhere, purge the declaration. With that it
makes little sense to keep arch_hvm_save()'s around; convert that
function to static then at the same time.

In hpet_load() simplify the specific case of error return that's in
context anyway: There's no need to hold the lock when only updating a
local variable.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v3: Rename to hvm_get_entry().
v2: New.

--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -637,7 +637,7 @@ static int cf_check hpet_save(struct vcp
 static int cf_check hpet_load(struct domain *d, hvm_domain_context_t *h)
 {
     HPETState *hp = domain_vhpet(d);
-    struct hvm_hw_hpet *rec;
+    const struct hvm_hw_hpet *rec;
     uint64_t cmp;
     uint64_t guest_time;
     int i;
@@ -645,17 +645,12 @@ static int cf_check hpet_load(struct dom
     if ( !has_vhpet(d) )
         return -ENODEV;
 
-    write_lock(&hp->lock);
-
     /* Reload the HPET registers */
-    if ( _hvm_check_entry(h, HVM_SAVE_CODE(HPET), HVM_SAVE_LENGTH(HPET), 1) )
-    {
-        write_unlock(&hp->lock);
+    rec = hvm_get_entry(HPET, h);
+    if ( !rec )
         return -EINVAL;
-    }
 
-    rec = (struct hvm_hw_hpet *)&h->data[h->cur];
-    h->cur += HVM_SAVE_LENGTH(HPET);
+    write_lock(&hp->lock);
 
 #define C(x) hp->hpet.x = rec->x
     C(capability);
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -15,7 +15,7 @@
 
 #include <public/hvm/save.h>
 
-void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
+static void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
 {
     uint32_t eax, ebx, ecx, edx;
 
@@ -30,7 +30,7 @@ void arch_hvm_save(struct domain *d, str
     d->arch.hvm.sync_tsc = rdtsc();
 }
 
-int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
+static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
 {
     uint32_t eax, ebx, ecx, edx;
 
@@ -277,7 +277,7 @@ int hvm_save(struct domain *d, hvm_domai
 
 int hvm_load(struct domain *d, hvm_domain_context_t *h)
 {
-    struct hvm_save_header hdr;
+    const struct hvm_save_header *hdr;
     struct hvm_save_descriptor *desc;
     hvm_load_handler handler;
     struct vcpu *v;
@@ -286,11 +286,12 @@ int hvm_load(struct domain *d, hvm_domai
     if ( d->is_dying )
         return -EINVAL;
 
-    /* Read the save header, which must be first */
-    if ( hvm_load_entry(HEADER, h, &hdr) != 0 )
+    /* Get at the save header, which must be first */
+    hdr = hvm_get_entry(HEADER, h);
+    if ( !hdr )
         return -ENODATA;
 
-    rc = arch_hvm_load(d, &hdr);
+    rc = arch_hvm_load(d, hdr);
     if ( rc )
         return rc;
 
--- a/xen/arch/x86/include/asm/hvm/save.h
+++ b/xen/arch/x86/include/asm/hvm/save.h
@@ -39,6 +39,21 @@ void _hvm_write_entry(struct hvm_domain_
 int _hvm_check_entry(struct hvm_domain_context *h,
                      uint16_t type, uint32_t len, bool strict_length);
 
+/*
+ * Unmarshalling: check, then return pointer. Evaluates to non-NULL on success.
+ * This macro requires the save entry to be the same size as the dest structure.
+ */
+#define hvm_get_entry(x, h) ({                                  \
+    const void *ptr = NULL;                                     \
+    BUILD_BUG_ON(HVM_SAVE_HAS_COMPAT(x));                       \
+    if ( _hvm_check_entry(h, HVM_SAVE_CODE(x),                  \
+                          HVM_SAVE_LENGTH(x), true) == 0 )      \
+    {                                                           \
+        ptr = &(h)->data[(h)->cur];                             \
+        h->cur += HVM_SAVE_LENGTH(x);                           \
+    }                                                           \
+    ptr; })
+
 /* Unmarshalling: copy the contents in a type-safe way */
 void _hvm_read_entry(struct hvm_domain_context *h,
                      void *dest, uint32_t dest_len);
@@ -127,9 +142,4 @@ int hvm_save_one(struct domain *d, unsig
                  XEN_GUEST_HANDLE_64(uint8) handle, uint64_t *bufsz);
 int hvm_load(struct domain *d, hvm_domain_context_t *h);
 
-/* Arch-specific definitions. */
-struct hvm_save_header;
-void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr);
-int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr);
-
 #endif /* __XEN_HVM_SAVE_H__ */



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
  2023-11-28 10:33 ` [PATCH v3 1/6] x86/HVM: introduce hvm_get_entry() Jan Beulich
@ 2023-11-28 10:34 ` Jan Beulich
  2023-12-04 17:27   ` Roger Pau Monné
  2023-11-28 10:34 ` [PATCH v3 3/6] x86/HVM: adjust save/restore hook registration for optional check handler Jan Beulich
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:34 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

..., at least as reasonably feasible without making a check hook
mandatory (in particular strict vs relaxed/zero-extend length checking
can't be done early this way).

Note that only one of the two uses of hvm_load() is accompanied with
hvm_check(). The other directly consumes hvm_save() output, which ought
to be well-formed. This means that while input data related checks don't
need repeating in the "load" function when already done by the "check"
one (albeit assertions to this effect may be desirable), domain state
related checks (e.g. has_xyz(d)) will be required in both places.

Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Do we really need all the copying involved in use of _hvm_read_entry()
(backing hvm_load_entry()? Zero-extending loads are likely easier to
handle that way, but for strict loads all we gain is a reduced risk of
unaligned accesses (compared to simply pointing into h->data[]).

Would the hvm_sr_handlers[] better use array_access_nospec()?
---
v2: New.

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -379,6 +379,10 @@ long arch_do_domctl(
         if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
             goto sethvmcontext_out;
 
+        ret = hvm_check(d, &c);
+        if ( ret )
+            goto sethvmcontext_out;
+
         domain_pause(d);
         ret = hvm_load(d, &c);
         domain_unpause(d);
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain
     d->arch.hvm.sync_tsc = rdtsc();
 }
 
-static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+static int arch_hvm_check(const struct domain *d,
+                          const struct hvm_save_header *hdr)
 {
     uint32_t eax, ebx, ecx, edx;
 
@@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain *
                "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
                d->domain_id, hdr->cpuid, eax);
 
+    return 0;
+}
+
+static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
+{
     /* Restore guest's preferred TSC frequency. */
     if ( hdr->gtsc_khz )
         d->arch.tsc_khz = hdr->gtsc_khz;
@@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain *
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
-
-    return 0;
 }
 
 /* List of handlers for various HVM save and restore types */
 static struct {
     hvm_save_handler save;
+    hvm_check_handler check;
     hvm_load_handler load;
     const char *name;
     size_t size;
@@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t
 {
     ASSERT(typecode <= HVM_SAVE_CODE_MAX);
     ASSERT(hvm_sr_handlers[typecode].save == NULL);
+    ASSERT(hvm_sr_handlers[typecode].check == NULL);
     ASSERT(hvm_sr_handlers[typecode].load == NULL);
     hvm_sr_handlers[typecode].save = save_state;
     hvm_sr_handlers[typecode].load = load_state;
@@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
     return 0;
 }
 
+int hvm_check(const struct domain *d, hvm_domain_context_t *h)
+{
+    const struct hvm_save_header *hdr;
+    int rc;
+
+    if ( d->is_dying )
+        return -EINVAL;
+
+    /* Get at the save header, which must be first. */
+    hdr = hvm_get_entry(HEADER, h);
+    if ( !hdr )
+        return -ENODATA;
+
+    rc = arch_hvm_check(d, hdr);
+    if ( rc )
+        return rc;
+
+    for ( ; ; )
+    {
+        const struct hvm_save_descriptor *desc;
+        hvm_check_handler handler;
+
+        if ( h->size - h->cur < sizeof(*desc) )
+        {
+            /* Run out of data */
+            printk(XENLOG_G_ERR
+                   "HVM restore %pd: save did not end with a null entry\n",
+                   d);
+            return -ENODATA;
+        }
+
+        /* Read the typecode of the next entry and check for the end-marker. */
+        desc = (const void *)&h->data[h->cur];
+        if ( desc->typecode == HVM_SAVE_CODE(END) )
+        {
+            /* Reset cursor for hvm_load(). */
+            h->cur = 0;
+            return 0;
+        }
+
+        /* Find the handler for this entry. */
+        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
+             !hvm_sr_handlers[desc->typecode].name ||
+             !hvm_sr_handlers[desc->typecode].load )
+        {
+            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
+                   d, desc->typecode);
+            return -EINVAL;
+        }
+
+        /* Check the entry. */
+        handler = hvm_sr_handlers[desc->typecode].check;
+        if ( !handler )
+        {
+            if ( desc->length > h->size - h->cur - sizeof(*desc) )
+                return -ENODATA;
+            h->cur += sizeof(*desc) + desc->length;
+        }
+        else if ( (rc = handler(d, h)) )
+        {
+            printk(XENLOG_G_ERR
+                   "HVM restore %pd: failed to check %s:%u rc %d\n",
+                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
+            return rc;
+        }
+
+        process_pending_softirqs();
+    }
+
+    /* Not reached */
+}
+
 int hvm_load(struct domain *d, hvm_domain_context_t *h)
 {
     const struct hvm_save_header *hdr;
@@ -291,9 +369,8 @@ int hvm_load(struct domain *d, hvm_domai
     if ( !hdr )
         return -ENODATA;
 
-    rc = arch_hvm_load(d, hdr);
-    if ( rc )
-        return rc;
+    ASSERT(!arch_hvm_check(d, hdr));
+    arch_hvm_load(d, hdr);
 
     /* Down all the vcpus: we only re-enable the ones that had state saved. */
     for_each_vcpu(d, v)
@@ -304,10 +381,7 @@ int hvm_load(struct domain *d, hvm_domai
     {
         if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
         {
-            /* Run out of data */
-            printk(XENLOG_G_ERR
-                   "HVM%d restore: save did not end with a null entry\n",
-                   d->domain_id);
+            ASSERT_UNREACHABLE();
             return -ENODATA;
         }
 
@@ -320,8 +394,7 @@ int hvm_load(struct domain *d, hvm_domai
         if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
              ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
         {
-            printk(XENLOG_G_ERR "HVM%d restore: unknown entry typecode %u\n",
-                   d->domain_id, desc->typecode);
+            ASSERT_UNREACHABLE();
             return -EINVAL;
         }
 
--- a/xen/arch/x86/include/asm/hvm/save.h
+++ b/xen/arch/x86/include/asm/hvm/save.h
@@ -103,6 +103,8 @@ static inline unsigned int hvm_load_inst
  * restoring.  Both return non-zero on error. */
 typedef int (*hvm_save_handler) (struct vcpu *v,
                                  hvm_domain_context_t *h);
+typedef int (*hvm_check_handler)(const struct domain *d,
+                                 hvm_domain_context_t *h);
 typedef int (*hvm_load_handler) (struct domain *d,
                                  hvm_domain_context_t *h);
 
@@ -140,6 +142,7 @@ size_t hvm_save_size(struct domain *d);
 int hvm_save(struct domain *d, hvm_domain_context_t *h);
 int hvm_save_one(struct domain *d, unsigned int typecode, unsigned int instance,
                  XEN_GUEST_HANDLE_64(uint8) handle, uint64_t *bufsz);
+int hvm_check(const struct domain *d, hvm_domain_context_t *h);
 int hvm_load(struct domain *d, hvm_domain_context_t *h);
 
 #endif /* __XEN_HVM_SAVE_H__ */



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 3/6] x86/HVM: adjust save/restore hook registration for optional check handler
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
  2023-11-28 10:33 ` [PATCH v3 1/6] x86/HVM: introduce hvm_get_entry() Jan Beulich
  2023-11-28 10:34 ` [PATCH v3 2/6] x86/HVM: split restore state checking from state loading Jan Beulich
@ 2023-11-28 10:34 ` Jan Beulich
  2023-11-28 10:35 ` [PATCH v3 4/6] x86/vPIT: check values loaded from state save record Jan Beulich
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:34 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné, Paul Durrant

Register NULL uniformly as a first step.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v2: New.

--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -374,7 +374,7 @@ static int cf_check vmce_load_vcpu_ctxt(
     return err ?: vmce_restore_vcpu(v, &ctxt);
 }
 
-HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt,
+HVM_REGISTER_SAVE_RESTORE(VMCE_VCPU, vmce_save_vcpu_ctxt, NULL,
                           vmce_load_vcpu_ctxt, 1, HVMSR_PER_VCPU);
 #endif
 
--- a/xen/arch/x86/emul-i8254.c
+++ b/xen/arch/x86/emul-i8254.c
@@ -458,7 +458,7 @@ static int cf_check pit_load(struct doma
     return rc;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM);
 #endif
 
 /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -692,7 +692,7 @@ static int cf_check hpet_load(struct dom
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, NULL, hpet_load, 1, HVMSR_PER_DOM);
 
 static void hpet_set(HPETState *h)
 {
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -793,7 +793,7 @@ static int cf_check hvm_load_tsc_adjust(
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust,
+HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, NULL,
                           hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU);
 
 static int cf_check hvm_save_cpu_ctxt(struct vcpu *v, hvm_domain_context_t *h)
@@ -1189,7 +1189,7 @@ static int cf_check hvm_load_cpu_ctxt(st
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1,
+HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, NULL, hvm_load_cpu_ctxt, 1,
                           HVMSR_PER_VCPU);
 
 #define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
@@ -1538,6 +1538,7 @@ static int __init cf_check hvm_register_
     hvm_register_savevm(CPU_XSAVE_CODE,
                         "CPU_XSAVE",
                         hvm_save_cpu_xsave_states,
+                        NULL,
                         hvm_load_cpu_xsave_states,
                         HVM_CPU_XSAVE_SIZE(xfeature_mask) +
                             sizeof(struct hvm_save_descriptor),
@@ -1546,6 +1547,7 @@ static int __init cf_check hvm_register_
     hvm_register_savevm(CPU_MSR_CODE,
                         "CPU_MSR",
                         hvm_save_cpu_msrs,
+                        NULL,
                         hvm_load_cpu_msrs,
                         HVM_CPU_MSR_SIZE(ARRAY_SIZE(msrs_to_send)) +
                             sizeof(struct hvm_save_descriptor),
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -784,9 +784,9 @@ static int cf_check irq_load_link(struct
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, irq_load_pci,
+HVM_REGISTER_SAVE_RESTORE(PCI_IRQ, irq_save_pci, NULL, irq_load_pci,
                           1, HVMSR_PER_DOM);
-HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, irq_load_isa,
+HVM_REGISTER_SAVE_RESTORE(ISA_IRQ, irq_save_isa, NULL, irq_load_isa,
                           1, HVMSR_PER_DOM);
-HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, irq_load_link,
+HVM_REGISTER_SAVE_RESTORE(PCI_LINK, irq_save_link, NULL, irq_load_link,
                           1, HVMSR_PER_DOM);
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -773,7 +773,7 @@ static int cf_check hvm_load_mtrr_msr(st
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr, 1,
+HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, NULL, hvm_load_mtrr_msr, 1,
                           HVMSR_PER_VCPU);
 
 void memory_type_changed(struct domain *d)
--- a/xen/arch/x86/hvm/pmtimer.c
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -300,7 +300,7 @@ static int cf_check acpi_load(struct dom
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, acpi_load,
+HVM_REGISTER_SAVE_RESTORE(PMTIMER, acpi_save, NULL, acpi_load,
                           1, HVMSR_PER_DOM);
 
 int pmtimer_change_ioport(struct domain *d, uint64_t version)
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -797,7 +797,7 @@ static int cf_check rtc_load(struct doma
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, rtc_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(RTC, rtc_save, NULL, rtc_load, 1, HVMSR_PER_DOM);
 
 void rtc_reset(struct domain *d)
 {
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -88,6 +88,7 @@ static struct {
 void __init hvm_register_savevm(uint16_t typecode,
                                 const char *name,
                                 hvm_save_handler save_state,
+                                hvm_check_handler check_state,
                                 hvm_load_handler load_state,
                                 size_t size, int kind)
 {
@@ -96,6 +97,7 @@ void __init hvm_register_savevm(uint16_t
     ASSERT(hvm_sr_handlers[typecode].check == NULL);
     ASSERT(hvm_sr_handlers[typecode].load == NULL);
     hvm_sr_handlers[typecode].save = save_state;
+    hvm_sr_handlers[typecode].check = check_state;
     hvm_sr_handlers[typecode].load = load_state;
     hvm_sr_handlers[typecode].name = name;
     hvm_sr_handlers[typecode].size = size;
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -631,7 +631,8 @@ static int cf_check ioapic_load(struct d
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(IOAPIC, ioapic_save, ioapic_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(IOAPIC, ioapic_save, NULL, ioapic_load, 1,
+                          HVMSR_PER_DOM);
 
 void vioapic_reset(struct domain *d)
 {
--- a/xen/arch/x86/hvm/viridian/viridian.c
+++ b/xen/arch/x86/hvm/viridian/viridian.c
@@ -1145,7 +1145,7 @@ static int cf_check viridian_load_domain
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_DOMAIN, viridian_save_domain_ctxt,
+HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_DOMAIN, viridian_save_domain_ctxt, NULL,
                           viridian_load_domain_ctxt, 1, HVMSR_PER_DOM);
 
 static int cf_check viridian_save_vcpu_ctxt(
@@ -1188,7 +1188,7 @@ static int cf_check viridian_load_vcpu_c
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_VCPU, viridian_save_vcpu_ctxt,
+HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_VCPU, viridian_save_vcpu_ctxt, NULL,
                           viridian_load_vcpu_ctxt, 1, HVMSR_PER_VCPU);
 
 static int __init cf_check parse_viridian_version(const char *arg)
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1591,9 +1591,9 @@ static int cf_check lapic_load_regs(stru
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(LAPIC, lapic_save_hidden,
+HVM_REGISTER_SAVE_RESTORE(LAPIC, lapic_save_hidden, NULL,
                           lapic_load_hidden, 1, HVMSR_PER_VCPU);
-HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs,
+HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs, NULL,
                           lapic_load_regs, 1, HVMSR_PER_VCPU);
 
 int vlapic_init(struct vcpu *v)
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -449,7 +449,7 @@ static int cf_check vpic_load(struct dom
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, vpic_load, 2, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, NULL, vpic_load, 2, HVMSR_PER_DOM);
 
 void vpic_reset(struct domain *d)
 {
--- a/xen/arch/x86/include/asm/hvm/save.h
+++ b/xen/arch/x86/include/asm/hvm/save.h
@@ -113,6 +113,7 @@ typedef int (*hvm_load_handler) (struct
 void hvm_register_savevm(uint16_t typecode,
                          const char *name, 
                          hvm_save_handler save_state,
+                         hvm_check_handler check_state,
                          hvm_load_handler load_state,
                          size_t size, int kind);
 
@@ -122,12 +123,13 @@ void hvm_register_savevm(uint16_t typeco
 
 /* Syntactic sugar around that function: specify the max number of
  * saves, and this calculates the size of buffer needed */
-#define HVM_REGISTER_SAVE_RESTORE(_x, _save, _load, _num, _k)             \
+#define HVM_REGISTER_SAVE_RESTORE(_x, _save, check, _load, _num, _k)      \
 static int __init cf_check __hvm_register_##_x##_save_and_restore(void)   \
 {                                                                         \
     hvm_register_savevm(HVM_SAVE_CODE(_x),                                \
                         #_x,                                              \
                         &_save,                                           \
+                        check,                                            \
                         &_load,                                           \
                         (_num) * (HVM_SAVE_LENGTH(_x)                     \
                                   + sizeof (struct hvm_save_descriptor)), \



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 4/6] x86/vPIT: check values loaded from state save record
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
                   ` (2 preceding siblings ...)
  2023-11-28 10:34 ` [PATCH v3 3/6] x86/HVM: adjust save/restore hook registration for optional check handler Jan Beulich
@ 2023-11-28 10:35 ` Jan Beulich
  2023-12-04 17:46   ` Roger Pau Monné
  2023-11-28 10:35 ` [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control Jan Beulich
  2023-11-28 10:36 ` [PATCH v3 6/6] x86/vPIC: check values loaded from state save record Jan Beulich
  5 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:35 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

In particular pit_latch_status() and speaker_ioport_read() perform
calculations which assume in-bounds values. Several of the state save
record fields can hold wider ranges, though. Refuse to load values which
cannot result from normal operation, except mode, the init state of
which (see also below) cannot otherwise be reached.

Note that ->gate should only be possible to be zero for channel 2;
enforce that as well.

Adjust pit_reset()'s writing of ->mode as well, to not unduly affect
the value pit_latch_status() may calculate. The chosen mode of 7 is
still one which cannot be established by writing the control word. Note
that with or without this adjustment effectively all switch() statements
using mode as the control expression aren't quite right when the PIT is
still in that init state; there is an apparent assumption that before
these can sensibly be invoked, the guest would init the PIT (i.e. in
particular set the mode).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
For mode we could refuse to load values in the [0x08,0xfe] range; I'm
not certain that's going to be overly helpful.

For count I was considering to clip the saved value to 16 bits (i.e. to
convert the internally used 0x10000 back to the architectural 0x0000),
but pit_save() doesn't easily lend itself to such a "fixup". If desired
perhaps better a separate change anyway.
---
v3: Slightly adjust two comments. Re-base over rename in earlier patch.
v2: Introduce separate checking function; switch to refusing to load
    bogus values. Re-base.

--- a/xen/arch/x86/emul-i8254.c
+++ b/xen/arch/x86/emul-i8254.c
@@ -47,6 +47,7 @@
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
+#define RW_STATE_NUM 5
 
 #define get_guest_time(v) \
    (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
@@ -427,6 +428,47 @@ static int cf_check pit_save(struct vcpu
     return rc;
 }
 
+static int cf_check pit_check(const struct domain *d, hvm_domain_context_t *h)
+{
+    const struct hvm_hw_pit *hw;
+    unsigned int i;
+
+    if ( !has_vpit(d) )
+        return -ENODEV;
+
+    hw = hvm_get_entry(PIT, h);
+    if ( !hw )
+        return -ENODATA;
+
+    /*
+     * Check to-be-loaded values are within valid range, for them to represent
+     * actually reachable state.  Uses of some of the values elsewhere assume
+     * this is the case.  Note that the channels' mode fields aren't checked;
+     * Xen prior to 4.19 might save them as 0xff.
+     */
+    if ( hw->speaker_data_on > 1 || hw->pad0 )
+        return -EDOM;
+
+    for ( i = 0; i < ARRAY_SIZE(hw->channels); ++i )
+    {
+        const struct hvm_hw_pit_channel *ch = &hw->channels[i];
+
+        if ( ch->count > 0x10000 ||
+             ch->count_latched >= RW_STATE_NUM ||
+             ch->read_state >= RW_STATE_NUM ||
+             ch->write_state >= RW_STATE_NUM ||
+             ch->rw_mode > RW_STATE_WORD0 ||
+             ch->gate > 1 ||
+             ch->bcd > 1 )
+            return -EDOM;
+
+        if ( i != 2 && !ch->gate )
+            return -EINVAL;
+    }
+
+    return 0;
+}
+
 static int cf_check pit_load(struct domain *d, hvm_domain_context_t *h)
 {
     PITState *pit = domain_vpit(d);
@@ -443,6 +485,14 @@ static int cf_check pit_load(struct doma
         goto out;
     }
     
+    for ( i = 0; i < ARRAY_SIZE(pit->hw.channels); ++i )
+    {
+        struct hvm_hw_pit_channel *ch = &pit->hw.channels[i];
+
+        if ( (ch->mode &= 7) > 5 )
+            ch->mode -= 4;
+    }
+
     /*
      * Recreate platform timers from hardware state.  There will be some 
      * time jitter here, but the wall-clock will have jumped massively, so 
@@ -458,7 +508,7 @@ static int cf_check pit_load(struct doma
     return rc;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, NULL, pit_load, 1, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIT, pit_save, pit_check, pit_load, 1, HVMSR_PER_DOM);
 #endif
 
 /* The intercept action for PIT DM retval: 0--not handled; 1--handled. */
@@ -575,7 +625,7 @@ void pit_reset(struct domain *d)
     for ( i = 0; i < 3; i++ )
     {
         s = &pit->hw.channels[i];
-        s->mode = 0xff; /* the init mode */
+        s->mode = 7; /* unreachable sentinel */
         s->gate = (i != 2);
         pit_load_count(pit, i, 0);
     }



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
                   ` (3 preceding siblings ...)
  2023-11-28 10:35 ` [PATCH v3 4/6] x86/vPIT: check values loaded from state save record Jan Beulich
@ 2023-11-28 10:35 ` Jan Beulich
  2023-12-05 17:29   ` Roger Pau Monné
  2023-11-28 10:36 ` [PATCH v3 6/6] x86/vPIC: check values loaded from state save record Jan Beulich
  5 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:35 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

Master bit 2 is treated specially: We force it set, but we don't expose
the bit being set to the guest. While right now the read and write
handling can easily use the fixed mask, the restore input checking that
is about to be put in place wants to use the inverted mask to prove that
no bits are unduly set. That will require master bit 2 to be set. Otoh
the read path requires the bit to be clear (the bit can have either
value for the use on the write path). Hence allow use sites control over
that bit.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: New, split from larger patch.
---
I'm certainly open to naming suggestions for the new macro parameter.
"mb2" can certainly be misleading as to Multiboot 2. Yet "master_bit_2"
it too long for my taste, not the least because of the macro then
needing to be split across lines.

--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -41,7 +41,7 @@
 #define vpic_lock(v)   spin_lock(__vpic_lock(v))
 #define vpic_unlock(v) spin_unlock(__vpic_lock(v))
 #define vpic_is_locked(v) spin_is_locked(__vpic_lock(v))
-#define vpic_elcr_mask(v) ((v)->is_master ? 0xf8 : 0xde)
+#define vpic_elcr_mask(v, mb2) ((v)->is_master ? 0xf8 | ((mb2) << 2) : 0xde)
 
 /* Return the highest priority found in mask. Return 8 if none. */
 #define VPIC_PRIO_NONE 8
@@ -387,7 +387,7 @@ static int cf_check vpic_intercept_elcr_
         if ( dir == IOREQ_WRITE )
         {
             /* Some IRs are always edge trig. Slave IR is always level trig. */
-            data = (*val >> shift) & vpic_elcr_mask(vpic);
+            data = (*val >> shift) & vpic_elcr_mask(vpic, 1);
             if ( vpic->is_master )
                 data |= 1 << 2;
             vpic->elcr = data;
@@ -395,7 +395,7 @@ static int cf_check vpic_intercept_elcr_
         else
         {
             /* Reader should not see hardcoded level-triggered slave IR. */
-            data = vpic->elcr & vpic_elcr_mask(vpic);
+            data = vpic->elcr & vpic_elcr_mask(vpic, 0);
             if ( !shift )
                 *val = data;
             else



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v3 6/6] x86/vPIC: check values loaded from state save record
  2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
                   ` (4 preceding siblings ...)
  2023-11-28 10:35 ` [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control Jan Beulich
@ 2023-11-28 10:36 ` Jan Beulich
  2023-12-05 17:41   ` Roger Pau Monné
  5 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-11-28 10:36 UTC (permalink / raw)
  To: xen-devel@lists.xenproject.org
  Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

Loading is_master from the state save record can lead to out-of-bounds
accesses via at least the two container_of() uses by vpic_domain() and
__vpic_lock(). Make sure the value is consistent with the instance being
loaded.

For ->int_output (which for whatever reason isn't a 1-bit bitfield),
besides bounds checking also take ->init_state into account.

For ELCR follow what vpic_intercept_elcr_io()'s write path and
vpic_reset() do, i.e. don't insist on the internal view of the value to
be saved.

Move the instance range check as well, leaving just an assertion in the
load handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: vpic_domain() fix and vpic_elcr_mask() adjustment split out. Re-base
    over rename in earlier patch.
v2: Introduce separate checking function; switch to refusing to load
    bogus values. Re-base.

--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -429,6 +429,38 @@ static int cf_check vpic_save(struct vcp
     return 0;
 }
 
+static int cf_check vpic_check(const struct domain *d, hvm_domain_context_t *h)
+{
+    unsigned int inst = hvm_load_instance(h);
+    const struct hvm_hw_vpic *s;
+
+    if ( !has_vpic(d) )
+        return -ENODEV;
+
+    /* Which PIC is this? */
+    if ( inst >= ARRAY_SIZE(d->arch.hvm.vpic) )
+        return -ENOENT;
+
+    s = hvm_get_entry(PIC, h);
+    if ( !s )
+        return -ENODATA;
+
+    /*
+     * Check to-be-loaded values are within valid range, for them to represent
+     * actually reachable state.  Uses of some of the values elsewhere assume
+     * this is the case.
+     */
+    if ( s->int_output > 1 )
+        return -EDOM;
+
+    if ( s->is_master != !inst ||
+         (s->int_output && s->init_state) ||
+         (s->elcr & ~vpic_elcr_mask(s, 1)) )
+        return -EINVAL;
+
+    return 0;
+}
+
 static int cf_check vpic_load(struct domain *d, hvm_domain_context_t *h)
 {
     struct hvm_hw_vpic *s;
@@ -438,18 +470,21 @@ static int cf_check vpic_load(struct dom
         return -ENODEV;
 
     /* Which PIC is this? */
-    if ( inst > 1 )
-        return -ENOENT;
+    ASSERT(inst < ARRAY_SIZE(d->arch.hvm.vpic));
     s = &d->arch.hvm.vpic[inst];
 
     /* Load the state */
     if ( hvm_load_entry(PIC, h, s) != 0 )
         return -EINVAL;
 
+    if ( s->is_master )
+        s->elcr |= 1 << 2;
+
     return 0;
 }
 
-HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, NULL, vpic_load, 2, HVMSR_PER_DOM);
+HVM_REGISTER_SAVE_RESTORE(PIC, vpic_save, vpic_check, vpic_load, 2,
+                          HVMSR_PER_DOM);
 
 void vpic_reset(struct domain *d)
 {



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-11-28 10:34 ` [PATCH v3 2/6] x86/HVM: split restore state checking from state loading Jan Beulich
@ 2023-12-04 17:27   ` Roger Pau Monné
  2023-12-05  8:52     ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-04 17:27 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> ..., at least as reasonably feasible without making a check hook
> mandatory (in particular strict vs relaxed/zero-extend length checking
> can't be done early this way).
> 
> Note that only one of the two uses of hvm_load() is accompanied with
> hvm_check(). The other directly consumes hvm_save() output, which ought
> to be well-formed. This means that while input data related checks don't
> need repeating in the "load" function when already done by the "check"
> one (albeit assertions to this effect may be desirable), domain state
> related checks (e.g. has_xyz(d)) will be required in both places.
> 
> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Do we really need all the copying involved in use of _hvm_read_entry()
> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> handle that way, but for strict loads all we gain is a reduced risk of
> unaligned accesses (compared to simply pointing into h->data[]).

See below, but I wonder whether the checks could be performed as part
of hvm_load() without having to introduce a separate handler and loop
over the context entries.

> Would the hvm_sr_handlers[] better use array_access_nospec()?

Maybe?  Given this is a domctl I do wonder whether a domain already
having access to such interface won't have easier ways to leak data
from Xen.  Maybe for a disaggregated setup.

> ---
> v2: New.
> 
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -379,6 +379,10 @@ long arch_do_domctl(
>          if ( copy_from_guest(c.data, domctl->u.hvmcontext.buffer, c.size) != 0 )
>              goto sethvmcontext_out;
>  
> +        ret = hvm_check(d, &c);
> +        if ( ret )
> +            goto sethvmcontext_out;
> +
>          domain_pause(d);
>          ret = hvm_load(d, &c);
>          domain_unpause(d);
> --- a/xen/arch/x86/hvm/save.c
> +++ b/xen/arch/x86/hvm/save.c
> @@ -30,7 +30,8 @@ static void arch_hvm_save(struct domain
>      d->arch.hvm.sync_tsc = rdtsc();
>  }
>  
> -static int arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +static int arch_hvm_check(const struct domain *d,
> +                          const struct hvm_save_header *hdr)
>  {
>      uint32_t eax, ebx, ecx, edx;
>  
> @@ -55,6 +56,11 @@ static int arch_hvm_load(struct domain *
>                 "(%#"PRIx32") and restored on another (%#"PRIx32").\n",
>                 d->domain_id, hdr->cpuid, eax);
>  
> +    return 0;
> +}
> +
> +static void arch_hvm_load(struct domain *d, const struct hvm_save_header *hdr)
> +{
>      /* Restore guest's preferred TSC frequency. */
>      if ( hdr->gtsc_khz )
>          d->arch.tsc_khz = hdr->gtsc_khz;
> @@ -66,13 +72,12 @@ static int arch_hvm_load(struct domain *
>  
>      /* VGA state is not saved/restored, so we nobble the cache. */
>      d->arch.hvm.stdvga.cache = STDVGA_CACHE_DISABLED;
> -
> -    return 0;
>  }
>  
>  /* List of handlers for various HVM save and restore types */
>  static struct {
>      hvm_save_handler save;
> +    hvm_check_handler check;
>      hvm_load_handler load;
>      const char *name;
>      size_t size;
> @@ -88,6 +93,7 @@ void __init hvm_register_savevm(uint16_t
>  {
>      ASSERT(typecode <= HVM_SAVE_CODE_MAX);
>      ASSERT(hvm_sr_handlers[typecode].save == NULL);
> +    ASSERT(hvm_sr_handlers[typecode].check == NULL);
>      ASSERT(hvm_sr_handlers[typecode].load == NULL);
>      hvm_sr_handlers[typecode].save = save_state;
>      hvm_sr_handlers[typecode].load = load_state;
> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>      return 0;
>  }
>  
> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> +{
> +    const struct hvm_save_header *hdr;
> +    int rc;
> +
> +    if ( d->is_dying )
> +        return -EINVAL;
> +
> +    /* Get at the save header, which must be first. */
> +    hdr = hvm_get_entry(HEADER, h);
> +    if ( !hdr )
> +        return -ENODATA;
> +
> +    rc = arch_hvm_check(d, hdr);
> +    if ( rc )
> +        return rc;
> +
> +    for ( ; ; )
> +    {
> +        const struct hvm_save_descriptor *desc;
> +        hvm_check_handler handler;
> +
> +        if ( h->size - h->cur < sizeof(*desc) )
> +        {
> +            /* Run out of data */
> +            printk(XENLOG_G_ERR
> +                   "HVM restore %pd: save did not end with a null entry\n",
> +                   d);
> +            return -ENODATA;
> +        }
> +
> +        /* Read the typecode of the next entry and check for the end-marker. */
> +        desc = (const void *)&h->data[h->cur];
> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> +        {
> +            /* Reset cursor for hvm_load(). */
> +            h->cur = 0;
> +            return 0;
> +        }
> +
> +        /* Find the handler for this entry. */
> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> +             !hvm_sr_handlers[desc->typecode].name ||
> +             !hvm_sr_handlers[desc->typecode].load )
> +        {
> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> +                   d, desc->typecode);
> +            return -EINVAL;
> +        }
> +
> +        /* Check the entry. */
> +        handler = hvm_sr_handlers[desc->typecode].check;
> +        if ( !handler )
> +        {
> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> +                return -ENODATA;
> +            h->cur += sizeof(*desc) + desc->length;
> +        }
> +        else if ( (rc = handler(d, h)) )
> +        {
> +            printk(XENLOG_G_ERR
> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> +            return rc;
> +        }
> +
> +        process_pending_softirqs();

Looking at this, won't it be better to call the check() hooks inside
the hvm_load() function instead of duplicating the loop?

I realize that you only perform the checks when the state is loaded
from a domctl, but still seems quite a lot of code duplication for
little benefit.

hvm_load() could gain an extra parameter to select whether the input
must be checked or not, and that would avoid having to iterate twice
over the context.

> +    }
> +
> +    /* Not reached */

ASSERT_UNREACHABLE() maybe?

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 4/6] x86/vPIT: check values loaded from state save record
  2023-11-28 10:35 ` [PATCH v3 4/6] x86/vPIT: check values loaded from state save record Jan Beulich
@ 2023-12-04 17:46   ` Roger Pau Monné
  0 siblings, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-04 17:46 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Nov 28, 2023 at 11:35:18AM +0100, Jan Beulich wrote:
> In particular pit_latch_status() and speaker_ioport_read() perform
> calculations which assume in-bounds values. Several of the state save
> record fields can hold wider ranges, though. Refuse to load values which
> cannot result from normal operation, except mode, the init state of
> which (see also below) cannot otherwise be reached.
> 
> Note that ->gate should only be possible to be zero for channel 2;
> enforce that as well.
> 
> Adjust pit_reset()'s writing of ->mode as well, to not unduly affect
> the value pit_latch_status() may calculate. The chosen mode of 7 is
> still one which cannot be established by writing the control word. Note
> that with or without this adjustment effectively all switch() statements
> using mode as the control expression aren't quite right when the PIT is
> still in that init state; there is an apparent assumption that before
> these can sensibly be invoked, the guest would init the PIT (i.e. in
> particular set the mode).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> For mode we could refuse to load values in the [0x08,0xfe] range; I'm

I'm missing something, why should we accept a 0xff mode?  Don't modes
go up to 7 at most (0b111, mode 3).

> not certain that's going to be overly helpful.

I don't have a strong opinion.  Could be done in a separate change
anyway.  I guess since we are at it it might be worth to check for as
much as we can, even if it's not going to affect the logic.

> For count I was considering to clip the saved value to 16 bits (i.e. to
> convert the internally used 0x10000 back to the architectural 0x0000),
> but pit_save() doesn't easily lend itself to such a "fixup". If desired
> perhaps better a separate change anyway.

I would prefer a separate change iff you want to implement this.

> ---
> v3: Slightly adjust two comments. Re-base over rename in earlier patch.
> v2: Introduce separate checking function; switch to refusing to load
>     bogus values. Re-base.
> 
> --- a/xen/arch/x86/emul-i8254.c
> +++ b/xen/arch/x86/emul-i8254.c
> @@ -47,6 +47,7 @@
>  #define RW_STATE_MSB 2
>  #define RW_STATE_WORD0 3
>  #define RW_STATE_WORD1 4
> +#define RW_STATE_NUM 5
>  
>  #define get_guest_time(v) \
>     (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
> @@ -427,6 +428,47 @@ static int cf_check pit_save(struct vcpu
>      return rc;
>  }
>  
> +static int cf_check pit_check(const struct domain *d, hvm_domain_context_t *h)
> +{
> +    const struct hvm_hw_pit *hw;
> +    unsigned int i;
> +
> +    if ( !has_vpit(d) )
> +        return -ENODEV;
> +
> +    hw = hvm_get_entry(PIT, h);
> +    if ( !hw )
> +        return -ENODATA;
> +
> +    /*
> +     * Check to-be-loaded values are within valid range, for them to represent
> +     * actually reachable state.  Uses of some of the values elsewhere assume
> +     * this is the case.  Note that the channels' mode fields aren't checked;
> +     * Xen prior to 4.19 might save them as 0xff.

Oh, OK, so that explains the weird 0xff mode.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-04 17:27   ` Roger Pau Monné
@ 2023-12-05  8:52     ` Jan Beulich
  2023-12-05 14:29       ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-12-05  8:52 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On 04.12.2023 18:27, Roger Pau Monné wrote:
> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>> ..., at least as reasonably feasible without making a check hook
>> mandatory (in particular strict vs relaxed/zero-extend length checking
>> can't be done early this way).
>>
>> Note that only one of the two uses of hvm_load() is accompanied with
>> hvm_check(). The other directly consumes hvm_save() output, which ought
>> to be well-formed. This means that while input data related checks don't
>> need repeating in the "load" function when already done by the "check"
>> one (albeit assertions to this effect may be desirable), domain state
>> related checks (e.g. has_xyz(d)) will be required in both places.
>>
>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Do we really need all the copying involved in use of _hvm_read_entry()
>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>> handle that way, but for strict loads all we gain is a reduced risk of
>> unaligned accesses (compared to simply pointing into h->data[]).
> 
> See below, but I wonder whether the checks could be performed as part
> of hvm_load() without having to introduce a separate handler and loop
> over the context entries.

Specifically not. State loading (in the longer run) would better not fail
once started. (Imo it should have been this way from the beginning.) Only
then will the vCPU still be in a predictable state even after a possible
error.

>> Would the hvm_sr_handlers[] better use array_access_nospec()?
> 
> Maybe?  Given this is a domctl I do wonder whether a domain already
> having access to such interface won't have easier ways to leak data
> from Xen.  Maybe for a disaggregated setup.

Hmm, now we're in the middle - Andrew effectively said "no need to".

>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>>      return 0;
>>  }
>>  
>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
>> +{
>> +    const struct hvm_save_header *hdr;
>> +    int rc;
>> +
>> +    if ( d->is_dying )
>> +        return -EINVAL;
>> +
>> +    /* Get at the save header, which must be first. */
>> +    hdr = hvm_get_entry(HEADER, h);
>> +    if ( !hdr )
>> +        return -ENODATA;
>> +
>> +    rc = arch_hvm_check(d, hdr);
>> +    if ( rc )
>> +        return rc;
>> +
>> +    for ( ; ; )
>> +    {
>> +        const struct hvm_save_descriptor *desc;
>> +        hvm_check_handler handler;
>> +
>> +        if ( h->size - h->cur < sizeof(*desc) )
>> +        {
>> +            /* Run out of data */
>> +            printk(XENLOG_G_ERR
>> +                   "HVM restore %pd: save did not end with a null entry\n",
>> +                   d);
>> +            return -ENODATA;
>> +        }
>> +
>> +        /* Read the typecode of the next entry and check for the end-marker. */
>> +        desc = (const void *)&h->data[h->cur];
>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
>> +        {
>> +            /* Reset cursor for hvm_load(). */
>> +            h->cur = 0;
>> +            return 0;
>> +        }
>> +
>> +        /* Find the handler for this entry. */
>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
>> +             !hvm_sr_handlers[desc->typecode].name ||
>> +             !hvm_sr_handlers[desc->typecode].load )
>> +        {
>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
>> +                   d, desc->typecode);
>> +            return -EINVAL;
>> +        }
>> +
>> +        /* Check the entry. */
>> +        handler = hvm_sr_handlers[desc->typecode].check;
>> +        if ( !handler )
>> +        {
>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
>> +                return -ENODATA;
>> +            h->cur += sizeof(*desc) + desc->length;
>> +        }
>> +        else if ( (rc = handler(d, h)) )
>> +        {
>> +            printk(XENLOG_G_ERR
>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
>> +            return rc;
>> +        }
>> +
>> +        process_pending_softirqs();
> 
> Looking at this, won't it be better to call the check() hooks inside
> the hvm_load() function instead of duplicating the loop?
> 
> I realize that you only perform the checks when the state is loaded
> from a domctl, but still seems quite a lot of code duplication for
> little benefit.
> 
> hvm_load() could gain an extra parameter to select whether the input
> must be checked or not, and that would avoid having to iterate twice
> over the context.

Well, see above.

>> +    }
>> +
>> +    /* Not reached */
> 
> ASSERT_UNREACHABLE() maybe?

Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
have such either, perhaps that's not a meaningful reference. Adding this
would make me fear introducing a Misra violation (adding dead code).

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-05  8:52     ` Jan Beulich
@ 2023-12-05 14:29       ` Roger Pau Monné
  2023-12-05 14:59         ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-05 14:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> On 04.12.2023 18:27, Roger Pau Monné wrote:
> > On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >> ..., at least as reasonably feasible without making a check hook
> >> mandatory (in particular strict vs relaxed/zero-extend length checking
> >> can't be done early this way).
> >>
> >> Note that only one of the two uses of hvm_load() is accompanied with
> >> hvm_check(). The other directly consumes hvm_save() output, which ought
> >> to be well-formed. This means that while input data related checks don't
> >> need repeating in the "load" function when already done by the "check"
> >> one (albeit assertions to this effect may be desirable), domain state
> >> related checks (e.g. has_xyz(d)) will be required in both places.
> >>
> >> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> Do we really need all the copying involved in use of _hvm_read_entry()
> >> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >> handle that way, but for strict loads all we gain is a reduced risk of
> >> unaligned accesses (compared to simply pointing into h->data[]).
> > 
> > See below, but I wonder whether the checks could be performed as part
> > of hvm_load() without having to introduce a separate handler and loop
> > over the context entries.
> 
> Specifically not. State loading (in the longer run) would better not fail
> once started. (Imo it should have been this way from the beginning.) Only
> then will the vCPU still be in a predictable state even after a possible
> error.

Looking at the callers, does such predictable state after failure
matter?

One caller is an hypercall used by the toolstack at domain create,
failing can just lead to the domain being destroyed.  The other caller
is vm fork, which will also lead to the fork being destroyed if
context loading fails.

Maybe I'm overlooking something.

> >> Would the hvm_sr_handlers[] better use array_access_nospec()?
> > 
> > Maybe?  Given this is a domctl I do wonder whether a domain already
> > having access to such interface won't have easier ways to leak data
> > from Xen.  Maybe for a disaggregated setup.
> 
> Hmm, now we're in the middle - Andrew effectively said "no need to".

I'm certainly not an expert on whether array_access_nospec() should be
used, so if Andrew says no need, that's likely better advice.

Maybe the xsm check used in such desegregated setups would already
stop speculation?

> >> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
> >>      return 0;
> >>  }
> >>  
> >> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> >> +{
> >> +    const struct hvm_save_header *hdr;
> >> +    int rc;
> >> +
> >> +    if ( d->is_dying )
> >> +        return -EINVAL;
> >> +
> >> +    /* Get at the save header, which must be first. */
> >> +    hdr = hvm_get_entry(HEADER, h);
> >> +    if ( !hdr )
> >> +        return -ENODATA;
> >> +
> >> +    rc = arch_hvm_check(d, hdr);
> >> +    if ( rc )
> >> +        return rc;
> >> +
> >> +    for ( ; ; )
> >> +    {
> >> +        const struct hvm_save_descriptor *desc;
> >> +        hvm_check_handler handler;
> >> +
> >> +        if ( h->size - h->cur < sizeof(*desc) )
> >> +        {
> >> +            /* Run out of data */
> >> +            printk(XENLOG_G_ERR
> >> +                   "HVM restore %pd: save did not end with a null entry\n",
> >> +                   d);
> >> +            return -ENODATA;
> >> +        }
> >> +
> >> +        /* Read the typecode of the next entry and check for the end-marker. */
> >> +        desc = (const void *)&h->data[h->cur];
> >> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> >> +        {
> >> +            /* Reset cursor for hvm_load(). */
> >> +            h->cur = 0;
> >> +            return 0;
> >> +        }
> >> +
> >> +        /* Find the handler for this entry. */
> >> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> >> +             !hvm_sr_handlers[desc->typecode].name ||
> >> +             !hvm_sr_handlers[desc->typecode].load )
> >> +        {
> >> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> >> +                   d, desc->typecode);
> >> +            return -EINVAL;
> >> +        }
> >> +
> >> +        /* Check the entry. */
> >> +        handler = hvm_sr_handlers[desc->typecode].check;
> >> +        if ( !handler )
> >> +        {
> >> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> >> +                return -ENODATA;
> >> +            h->cur += sizeof(*desc) + desc->length;
> >> +        }
> >> +        else if ( (rc = handler(d, h)) )
> >> +        {
> >> +            printk(XENLOG_G_ERR
> >> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> >> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> >> +            return rc;
> >> +        }
> >> +
> >> +        process_pending_softirqs();
> > 
> > Looking at this, won't it be better to call the check() hooks inside
> > the hvm_load() function instead of duplicating the loop?
> > 
> > I realize that you only perform the checks when the state is loaded
> > from a domctl, but still seems quite a lot of code duplication for
> > little benefit.
> > 
> > hvm_load() could gain an extra parameter to select whether the input
> > must be checked or not, and that would avoid having to iterate twice
> > over the context.
> 
> Well, see above.
> 
> >> +    }
> >> +
> >> +    /* Not reached */
> > 
> > ASSERT_UNREACHABLE() maybe?
> 
> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
> have such either, perhaps that's not a meaningful reference. Adding this
> would make me fear introducing a Misra violation (adding dead code).

But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
Misra will need an exception for all usage of ASSERT_UNREACHABLE()
already.

I think ASSERT_UNREACHABLE() is much better than a Not reached
comment: conveys the same information to readers of the code and has
a run-time consequence on debug builds.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-05 14:29       ` Roger Pau Monné
@ 2023-12-05 14:59         ` Jan Beulich
  2023-12-05 15:55           ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-12-05 14:59 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On 05.12.2023 15:29, Roger Pau Monné wrote:
> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>> ..., at least as reasonably feasible without making a check hook
>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>> can't be done early this way).
>>>>
>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>> to be well-formed. This means that while input data related checks don't
>>>> need repeating in the "load" function when already done by the "check"
>>>> one (albeit assertions to this effect may be desirable), domain state
>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>
>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>> ---
>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>
>>> See below, but I wonder whether the checks could be performed as part
>>> of hvm_load() without having to introduce a separate handler and loop
>>> over the context entries.
>>
>> Specifically not. State loading (in the longer run) would better not fail
>> once started. (Imo it should have been this way from the beginning.) Only
>> then will the vCPU still be in a predictable state even after a possible
>> error.
> 
> Looking at the callers, does such predictable state after failure
> matter?
> 
> One caller is an hypercall used by the toolstack at domain create,
> failing can just lead to the domain being destroyed.  The other caller
> is vm fork, which will also lead to the fork being destroyed if
> context loading fails.
> 
> Maybe I'm overlooking something.

You don't (I think), but existing callers necessarily have to behave the
way you describe. From an abstract perspective, though, failed state
loading would better allow a retry. And really I thought that when you
suggested to split checking from loading, you had exactly that in mind.

>>>> Would the hvm_sr_handlers[] better use array_access_nospec()?
>>>
>>> Maybe?  Given this is a domctl I do wonder whether a domain already
>>> having access to such interface won't have easier ways to leak data
>>> from Xen.  Maybe for a disaggregated setup.
>>
>> Hmm, now we're in the middle - Andrew effectively said "no need to".
> 
> I'm certainly not an expert on whether array_access_nospec() should be
> used, so if Andrew says no need, that's likely better advice.
> 
> Maybe the xsm check used in such desegregated setups would already
> stop speculation?

There's no XSM check anywhere near, and even if there was I don't see
how it would stop mis-speculation on those array accesses.

>>>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
>>>>      return 0;
>>>>  }
>>>>  
>>>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
>>>> +{
>>>> +    const struct hvm_save_header *hdr;
>>>> +    int rc;
>>>> +
>>>> +    if ( d->is_dying )
>>>> +        return -EINVAL;
>>>> +
>>>> +    /* Get at the save header, which must be first. */
>>>> +    hdr = hvm_get_entry(HEADER, h);
>>>> +    if ( !hdr )
>>>> +        return -ENODATA;
>>>> +
>>>> +    rc = arch_hvm_check(d, hdr);
>>>> +    if ( rc )
>>>> +        return rc;
>>>> +
>>>> +    for ( ; ; )
>>>> +    {
>>>> +        const struct hvm_save_descriptor *desc;
>>>> +        hvm_check_handler handler;
>>>> +
>>>> +        if ( h->size - h->cur < sizeof(*desc) )
>>>> +        {
>>>> +            /* Run out of data */
>>>> +            printk(XENLOG_G_ERR
>>>> +                   "HVM restore %pd: save did not end with a null entry\n",
>>>> +                   d);
>>>> +            return -ENODATA;
>>>> +        }
>>>> +
>>>> +        /* Read the typecode of the next entry and check for the end-marker. */
>>>> +        desc = (const void *)&h->data[h->cur];
>>>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
>>>> +        {
>>>> +            /* Reset cursor for hvm_load(). */
>>>> +            h->cur = 0;
>>>> +            return 0;
>>>> +        }
>>>> +
>>>> +        /* Find the handler for this entry. */
>>>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
>>>> +             !hvm_sr_handlers[desc->typecode].name ||
>>>> +             !hvm_sr_handlers[desc->typecode].load )
>>>> +        {
>>>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
>>>> +                   d, desc->typecode);
>>>> +            return -EINVAL;
>>>> +        }
>>>> +
>>>> +        /* Check the entry. */
>>>> +        handler = hvm_sr_handlers[desc->typecode].check;
>>>> +        if ( !handler )
>>>> +        {
>>>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
>>>> +                return -ENODATA;
>>>> +            h->cur += sizeof(*desc) + desc->length;
>>>> +        }
>>>> +        else if ( (rc = handler(d, h)) )
>>>> +        {
>>>> +            printk(XENLOG_G_ERR
>>>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
>>>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
>>>> +            return rc;
>>>> +        }
>>>> +
>>>> +        process_pending_softirqs();
>>>
>>> Looking at this, won't it be better to call the check() hooks inside
>>> the hvm_load() function instead of duplicating the loop?
>>>
>>> I realize that you only perform the checks when the state is loaded
>>> from a domctl, but still seems quite a lot of code duplication for
>>> little benefit.
>>>
>>> hvm_load() could gain an extra parameter to select whether the input
>>> must be checked or not, and that would avoid having to iterate twice
>>> over the context.
>>
>> Well, see above.
>>
>>>> +    }
>>>> +
>>>> +    /* Not reached */
>>>
>>> ASSERT_UNREACHABLE() maybe?
>>
>> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
>> have such either, perhaps that's not a meaningful reference. Adding this
>> would make me fear introducing a Misra violation (adding dead code).
> 
> But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
> Misra will need an exception for all usage of ASSERT_UNREACHABLE()
> already.
> 
> I think ASSERT_UNREACHABLE() is much better than a Not reached
> comment: conveys the same information to readers of the code and has
> a run-time consequence on debug builds.

I see a difference between uses on paths were we assert that a certain
state cannot be reached (if all our logic is right) vs a case like the
one here where the compiler (or another tool) can actually prove that
the loop can't be exited the "normal" way.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-05 14:59         ` Jan Beulich
@ 2023-12-05 15:55           ` Roger Pau Monné
  2023-12-06  7:27             ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-05 15:55 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> On 05.12.2023 15:29, Roger Pau Monné wrote:
> > On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>> ..., at least as reasonably feasible without making a check hook
> >>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>> can't be done early this way).
> >>>>
> >>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>> to be well-formed. This means that while input data related checks don't
> >>>> need repeating in the "load" function when already done by the "check"
> >>>> one (albeit assertions to this effect may be desirable), domain state
> >>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>
> >>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>> ---
> >>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>
> >>> See below, but I wonder whether the checks could be performed as part
> >>> of hvm_load() without having to introduce a separate handler and loop
> >>> over the context entries.
> >>
> >> Specifically not. State loading (in the longer run) would better not fail
> >> once started. (Imo it should have been this way from the beginning.) Only
> >> then will the vCPU still be in a predictable state even after a possible
> >> error.
> > 
> > Looking at the callers, does such predictable state after failure
> > matter?
> > 
> > One caller is an hypercall used by the toolstack at domain create,
> > failing can just lead to the domain being destroyed.  The other caller
> > is vm fork, which will also lead to the fork being destroyed if
> > context loading fails.
> > 
> > Maybe I'm overlooking something.
> 
> You don't (I think), but existing callers necessarily have to behave the
> way you describe. From an abstract perspective, though, failed state
> loading would better allow a retry. And really I thought that when you
> suggested to split checking from loading, you had exactly that in mind.

Not really TBH, because I didn't think that much on a possible
implementation when proposing it.

Maybe a suitable compromise would be to reset the state to the initial
(at domain build) one on failure?

I do dislike the duplicated loops, as it seems like a lot of duplicate
boilerplate code, and I have fears of it going out of sync.

> >>>> Would the hvm_sr_handlers[] better use array_access_nospec()?
> >>>
> >>> Maybe?  Given this is a domctl I do wonder whether a domain already
> >>> having access to such interface won't have easier ways to leak data
> >>> from Xen.  Maybe for a disaggregated setup.
> >>
> >> Hmm, now we're in the middle - Andrew effectively said "no need to".
> > 
> > I'm certainly not an expert on whether array_access_nospec() should be
> > used, so if Andrew says no need, that's likely better advice.
> > 
> > Maybe the xsm check used in such desegregated setups would already
> > stop speculation?
> 
> There's no XSM check anywhere near, and even if there was I don't see
> how it would stop mis-speculation on those array accesses.

This being a slow path anyway, I don't think the extra
array_access_nospec() would make much of an impact, but again I have
to admit it's unclear to me when those are actually required, so I
might suggest adding them out of precaution.

> >>>> @@ -275,6 +281,78 @@ int hvm_save(struct domain *d, hvm_domai
> >>>>      return 0;
> >>>>  }
> >>>>  
> >>>> +int hvm_check(const struct domain *d, hvm_domain_context_t *h)
> >>>> +{
> >>>> +    const struct hvm_save_header *hdr;
> >>>> +    int rc;
> >>>> +
> >>>> +    if ( d->is_dying )
> >>>> +        return -EINVAL;
> >>>> +
> >>>> +    /* Get at the save header, which must be first. */
> >>>> +    hdr = hvm_get_entry(HEADER, h);
> >>>> +    if ( !hdr )
> >>>> +        return -ENODATA;
> >>>> +
> >>>> +    rc = arch_hvm_check(d, hdr);
> >>>> +    if ( rc )
> >>>> +        return rc;
> >>>> +
> >>>> +    for ( ; ; )
> >>>> +    {
> >>>> +        const struct hvm_save_descriptor *desc;
> >>>> +        hvm_check_handler handler;
> >>>> +
> >>>> +        if ( h->size - h->cur < sizeof(*desc) )
> >>>> +        {
> >>>> +            /* Run out of data */
> >>>> +            printk(XENLOG_G_ERR
> >>>> +                   "HVM restore %pd: save did not end with a null entry\n",
> >>>> +                   d);
> >>>> +            return -ENODATA;
> >>>> +        }
> >>>> +
> >>>> +        /* Read the typecode of the next entry and check for the end-marker. */
> >>>> +        desc = (const void *)&h->data[h->cur];
> >>>> +        if ( desc->typecode == HVM_SAVE_CODE(END) )
> >>>> +        {
> >>>> +            /* Reset cursor for hvm_load(). */
> >>>> +            h->cur = 0;
> >>>> +            return 0;
> >>>> +        }
> >>>> +
> >>>> +        /* Find the handler for this entry. */
> >>>> +        if ( desc->typecode >= ARRAY_SIZE(hvm_sr_handlers) ||
> >>>> +             !hvm_sr_handlers[desc->typecode].name ||
> >>>> +             !hvm_sr_handlers[desc->typecode].load )
> >>>> +        {
> >>>> +            printk(XENLOG_G_ERR "HVM restore %pd: unknown entry typecode %u\n",
> >>>> +                   d, desc->typecode);
> >>>> +            return -EINVAL;
> >>>> +        }
> >>>> +
> >>>> +        /* Check the entry. */
> >>>> +        handler = hvm_sr_handlers[desc->typecode].check;
> >>>> +        if ( !handler )
> >>>> +        {
> >>>> +            if ( desc->length > h->size - h->cur - sizeof(*desc) )
> >>>> +                return -ENODATA;
> >>>> +            h->cur += sizeof(*desc) + desc->length;
> >>>> +        }
> >>>> +        else if ( (rc = handler(d, h)) )
> >>>> +        {
> >>>> +            printk(XENLOG_G_ERR
> >>>> +                   "HVM restore %pd: failed to check %s:%u rc %d\n",
> >>>> +                   d, hvm_sr_handlers[desc->typecode].name, desc->instance, rc);
> >>>> +            return rc;
> >>>> +        }
> >>>> +
> >>>> +        process_pending_softirqs();
> >>>
> >>> Looking at this, won't it be better to call the check() hooks inside
> >>> the hvm_load() function instead of duplicating the loop?
> >>>
> >>> I realize that you only perform the checks when the state is loaded
> >>> from a domctl, but still seems quite a lot of code duplication for
> >>> little benefit.
> >>>
> >>> hvm_load() could gain an extra parameter to select whether the input
> >>> must be checked or not, and that would avoid having to iterate twice
> >>> over the context.
> >>
> >> Well, see above.
> >>
> >>>> +    }
> >>>> +
> >>>> +    /* Not reached */
> >>>
> >>> ASSERT_UNREACHABLE() maybe?
> >>
> >> Hmm, I'd find it kind of odd to have such here. While hvm_load() doesn't
> >> have such either, perhaps that's not a meaningful reference. Adding this
> >> would make me fear introducing a Misra violation (adding dead code).
> > 
> > But isn't this the purpose of ASSERT_UNREACHABLE() exactly?  IOW:
> > Misra will need an exception for all usage of ASSERT_UNREACHABLE()
> > already.
> > 
> > I think ASSERT_UNREACHABLE() is much better than a Not reached
> > comment: conveys the same information to readers of the code and has
> > a run-time consequence on debug builds.
> 
> I see a difference between uses on paths were we assert that a certain
> state cannot be reached (if all our logic is right) vs a case like the
> one here where the compiler (or another tool) can actually prove that
> the loop can't be exited the "normal" way.

Can't be exited with the current code, but the purpose of
ASSERT_UNREACHABLE() is also to guarantee that further changes might
not break this condition.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control
  2023-11-28 10:35 ` [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control Jan Beulich
@ 2023-12-05 17:29   ` Roger Pau Monné
  2023-12-06  7:22     ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-05 17:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Nov 28, 2023 at 11:35:46AM +0100, Jan Beulich wrote:
> Master bit 2 is treated specially: We force it set, but we don't expose
> the bit being set to the guest. While right now the read and write
> handling can easily use the fixed mask, the restore input checking that
> is about to be put in place wants to use the inverted mask to prove that
> no bits are unduly set. That will require master bit 2 to be set. Otoh
> the read path requires the bit to be clear (the bit can have either
> value for the use on the write path). Hence allow use sites control over
> that bit.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: New, split from larger patch.
> ---
> I'm certainly open to naming suggestions for the new macro parameter.
> "mb2" can certainly be misleading as to Multiboot 2. Yet "master_bit_2"
> it too long for my taste, not the least because of the macro then
> needing to be split across lines.

Let's leave it as mb2, I think given the context it is difficult to
mislead this code as having anything to do with multiboot.

> 
> --- a/xen/arch/x86/hvm/vpic.c
> +++ b/xen/arch/x86/hvm/vpic.c
> @@ -41,7 +41,7 @@
>  #define vpic_lock(v)   spin_lock(__vpic_lock(v))
>  #define vpic_unlock(v) spin_unlock(__vpic_lock(v))
>  #define vpic_is_locked(v) spin_is_locked(__vpic_lock(v))
> -#define vpic_elcr_mask(v) ((v)->is_master ? 0xf8 : 0xde)
> +#define vpic_elcr_mask(v, mb2) ((v)->is_master ? 0xf8 | ((mb2) << 2) : 0xde)
>  
>  /* Return the highest priority found in mask. Return 8 if none. */
>  #define VPIC_PRIO_NONE 8
> @@ -387,7 +387,7 @@ static int cf_check vpic_intercept_elcr_
>          if ( dir == IOREQ_WRITE )
>          {
>              /* Some IRs are always edge trig. Slave IR is always level trig. */
> -            data = (*val >> shift) & vpic_elcr_mask(vpic);
> +            data = (*val >> shift) & vpic_elcr_mask(vpic, 1);

Not that it matters much, but I think you could use
vpic_elcr_mask(vpic, 0) to strictly keep the same behavior as
before?

>              if ( vpic->is_master )
>                  data |= 1 << 2;

Since the bit is forcefully set here anyway.

Regardless:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 6/6] x86/vPIC: check values loaded from state save record
  2023-11-28 10:36 ` [PATCH v3 6/6] x86/vPIC: check values loaded from state save record Jan Beulich
@ 2023-12-05 17:41   ` Roger Pau Monné
  0 siblings, 0 replies; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-05 17:41 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On Tue, Nov 28, 2023 at 11:36:40AM +0100, Jan Beulich wrote:
> Loading is_master from the state save record can lead to out-of-bounds
> accesses via at least the two container_of() uses by vpic_domain() and
> __vpic_lock(). Make sure the value is consistent with the instance being
> loaded.
> 
> For ->int_output (which for whatever reason isn't a 1-bit bitfield),
> besides bounds checking also take ->init_state into account.
> 
> For ELCR follow what vpic_intercept_elcr_io()'s write path and
> vpic_reset() do, i.e. don't insist on the internal view of the value to
> be saved.
> 
> Move the instance range check as well, leaving just an assertion in the
> load handler.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control
  2023-12-05 17:29   ` Roger Pau Monné
@ 2023-12-06  7:22     ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2023-12-06  7:22 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel@lists.xenproject.org, Andrew Cooper, Wei Liu

On 05.12.2023 18:29, Roger Pau Monné wrote:
> On Tue, Nov 28, 2023 at 11:35:46AM +0100, Jan Beulich wrote:
>> @@ -387,7 +387,7 @@ static int cf_check vpic_intercept_elcr_
>>          if ( dir == IOREQ_WRITE )
>>          {
>>              /* Some IRs are always edge trig. Slave IR is always level trig. */
>> -            data = (*val >> shift) & vpic_elcr_mask(vpic);
>> +            data = (*val >> shift) & vpic_elcr_mask(vpic, 1);
> 
> Not that it matters much, but I think you could use
> vpic_elcr_mask(vpic, 0) to strictly keep the same behavior as
> before?

Indeed, as also said in the description. Personally I view it as (slightly)
more logical to not mask off ...

>>              if ( vpic->is_master )
>>                  data |= 1 << 2;
> 
> Since the bit is forcefully set here anyway.

... and then set the bit, hence why I chose to go with 1.

> Regardless:
> 
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-05 15:55           ` Roger Pau Monné
@ 2023-12-06  7:27             ` Jan Beulich
  2023-12-11 10:46               ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-12-06  7:27 UTC (permalink / raw)
  To: Roger Pau Monné, Andrew Cooper
  Cc: xen-devel@lists.xenproject.org, Wei Liu

On 05.12.2023 16:55, Roger Pau Monné wrote:
> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>> can't be done early this way).
>>>>>>
>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>
>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>> ---
>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>
>>>>> See below, but I wonder whether the checks could be performed as part
>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>> over the context entries.
>>>>
>>>> Specifically not. State loading (in the longer run) would better not fail
>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>> then will the vCPU still be in a predictable state even after a possible
>>>> error.
>>>
>>> Looking at the callers, does such predictable state after failure
>>> matter?
>>>
>>> One caller is an hypercall used by the toolstack at domain create,
>>> failing can just lead to the domain being destroyed.  The other caller
>>> is vm fork, which will also lead to the fork being destroyed if
>>> context loading fails.
>>>
>>> Maybe I'm overlooking something.
>>
>> You don't (I think), but existing callers necessarily have to behave the
>> way you describe. From an abstract perspective, though, failed state
>> loading would better allow a retry. And really I thought that when you
>> suggested to split checking from loading, you had exactly that in mind.
> 
> Not really TBH, because I didn't think that much on a possible
> implementation when proposing it.

But what else did you think of then in terms of separating checking from
loading?

> Maybe a suitable compromise would be to reset the state to the initial
> (at domain build) one on failure?

That's an option, sure.

> I do dislike the duplicated loops, as it seems like a lot of duplicate
> boilerplate code, and I have fears of it going out of sync.

There's a certain risk, yes, but that exists similarly with the save and
load sides imo.

Andrew - before I go and undo the v2 changes, what is your view?

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-06  7:27             ` Jan Beulich
@ 2023-12-11 10:46               ` Roger Pau Monné
  2023-12-11 11:31                 ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-11 10:46 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, xen-devel@lists.xenproject.org, Wei Liu

On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
> On 05.12.2023 16:55, Roger Pau Monné wrote:
> > On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> >> On 05.12.2023 15:29, Roger Pau Monné wrote:
> >>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>>>> ..., at least as reasonably feasible without making a check hook
> >>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>>>> can't be done early this way).
> >>>>>>
> >>>>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>>>> to be well-formed. This means that while input data related checks don't
> >>>>>> need repeating in the "load" function when already done by the "check"
> >>>>>> one (albeit assertions to this effect may be desirable), domain state
> >>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>>>
> >>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>>>> ---
> >>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>>>
> >>>>> See below, but I wonder whether the checks could be performed as part
> >>>>> of hvm_load() without having to introduce a separate handler and loop
> >>>>> over the context entries.
> >>>>
> >>>> Specifically not. State loading (in the longer run) would better not fail
> >>>> once started. (Imo it should have been this way from the beginning.) Only
> >>>> then will the vCPU still be in a predictable state even after a possible
> >>>> error.
> >>>
> >>> Looking at the callers, does such predictable state after failure
> >>> matter?
> >>>
> >>> One caller is an hypercall used by the toolstack at domain create,
> >>> failing can just lead to the domain being destroyed.  The other caller
> >>> is vm fork, which will also lead to the fork being destroyed if
> >>> context loading fails.
> >>>
> >>> Maybe I'm overlooking something.
> >>
> >> You don't (I think), but existing callers necessarily have to behave the
> >> way you describe. From an abstract perspective, though, failed state
> >> loading would better allow a retry. And really I thought that when you
> >> suggested to split checking from loading, you had exactly that in mind.
> > 
> > Not really TBH, because I didn't think that much on a possible
> > implementation when proposing it.
> 
> But what else did you think of then in terms of separating checking from
> loading?

Just calling the check and load functions inside of the same loop was
my initial thought.

> > Maybe a suitable compromise would be to reset the state to the initial
> > (at domain build) one on failure?
> 
> That's an option, sure.
> 
> > I do dislike the duplicated loops, as it seems like a lot of duplicate
> > boilerplate code, and I have fears of it going out of sync.
> 
> There's a certain risk, yes, but that exists similarly with the save and
> load sides imo.

Hm, yes, albeit I have the feeling those are not as similar as the
proposed check and load loops.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-11 10:46               ` Roger Pau Monné
@ 2023-12-11 11:31                 ` Jan Beulich
  2023-12-11 12:43                   ` Roger Pau Monné
  0 siblings, 1 reply; 21+ messages in thread
From: Jan Beulich @ 2023-12-11 11:31 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Andrew Cooper, xen-devel@lists.xenproject.org, Wei Liu

On 11.12.2023 11:46, Roger Pau Monné wrote:
> On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
>> On 05.12.2023 16:55, Roger Pau Monné wrote:
>>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>>>> can't be done early this way).
>>>>>>>>
>>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>>>
>>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>>>> ---
>>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>>>
>>>>>>> See below, but I wonder whether the checks could be performed as part
>>>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>>>> over the context entries.
>>>>>>
>>>>>> Specifically not. State loading (in the longer run) would better not fail
>>>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>>>> then will the vCPU still be in a predictable state even after a possible
>>>>>> error.
>>>>>
>>>>> Looking at the callers, does such predictable state after failure
>>>>> matter?
>>>>>
>>>>> One caller is an hypercall used by the toolstack at domain create,
>>>>> failing can just lead to the domain being destroyed.  The other caller
>>>>> is vm fork, which will also lead to the fork being destroyed if
>>>>> context loading fails.
>>>>>
>>>>> Maybe I'm overlooking something.
>>>>
>>>> You don't (I think), but existing callers necessarily have to behave the
>>>> way you describe. From an abstract perspective, though, failed state
>>>> loading would better allow a retry. And really I thought that when you
>>>> suggested to split checking from loading, you had exactly that in mind.
>>>
>>> Not really TBH, because I didn't think that much on a possible
>>> implementation when proposing it.
>>
>> But what else did you think of then in terms of separating checking from
>> loading?
> 
> Just calling the check and load functions inside of the same loop was
> my initial thought.

Okay, I was meanwhile also guessing that this might have been what you
thought of. I can go that route, but I wouldn't want to make it "and", but
"or" then, depending on a new boolean parameter to be passed to hvm_load().
IOW I'd still like to do all checking before all loading (in the longer
run, that is i.e. after individual handlers have been adapted). Would that
be okay with you?

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-11 11:31                 ` Jan Beulich
@ 2023-12-11 12:43                   ` Roger Pau Monné
  2023-12-11 13:15                     ` Jan Beulich
  0 siblings, 1 reply; 21+ messages in thread
From: Roger Pau Monné @ 2023-12-11 12:43 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Andrew Cooper, xen-devel@lists.xenproject.org, Wei Liu

On Mon, Dec 11, 2023 at 12:31:11PM +0100, Jan Beulich wrote:
> On 11.12.2023 11:46, Roger Pau Monné wrote:
> > On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
> >> On 05.12.2023 16:55, Roger Pau Monné wrote:
> >>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
> >>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
> >>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
> >>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
> >>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
> >>>>>>>> ..., at least as reasonably feasible without making a check hook
> >>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
> >>>>>>>> can't be done early this way).
> >>>>>>>>
> >>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
> >>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
> >>>>>>>> to be well-formed. This means that while input data related checks don't
> >>>>>>>> need repeating in the "load" function when already done by the "check"
> >>>>>>>> one (albeit assertions to this effect may be desirable), domain state
> >>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
> >>>>>>>>
> >>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
> >>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >>>>>>>> ---
> >>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
> >>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
> >>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
> >>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
> >>>>>>>
> >>>>>>> See below, but I wonder whether the checks could be performed as part
> >>>>>>> of hvm_load() without having to introduce a separate handler and loop
> >>>>>>> over the context entries.
> >>>>>>
> >>>>>> Specifically not. State loading (in the longer run) would better not fail
> >>>>>> once started. (Imo it should have been this way from the beginning.) Only
> >>>>>> then will the vCPU still be in a predictable state even after a possible
> >>>>>> error.
> >>>>>
> >>>>> Looking at the callers, does such predictable state after failure
> >>>>> matter?
> >>>>>
> >>>>> One caller is an hypercall used by the toolstack at domain create,
> >>>>> failing can just lead to the domain being destroyed.  The other caller
> >>>>> is vm fork, which will also lead to the fork being destroyed if
> >>>>> context loading fails.
> >>>>>
> >>>>> Maybe I'm overlooking something.
> >>>>
> >>>> You don't (I think), but existing callers necessarily have to behave the
> >>>> way you describe. From an abstract perspective, though, failed state
> >>>> loading would better allow a retry. And really I thought that when you
> >>>> suggested to split checking from loading, you had exactly that in mind.
> >>>
> >>> Not really TBH, because I didn't think that much on a possible
> >>> implementation when proposing it.
> >>
> >> But what else did you think of then in terms of separating checking from
> >> loading?
> > 
> > Just calling the check and load functions inside of the same loop was
> > my initial thought.
> 
> Okay, I was meanwhile also guessing that this might have been what you
> thought of. I can go that route, but I wouldn't want to make it "and", but
> "or" then, depending on a new boolean parameter to be passed to hvm_load().
> IOW I'd still like to do all checking before all loading (in the longer
> run, that is i.e. after individual handlers have been adapted). Would that
> be okay with you?

Yes, that would be fine.  I assume you will introduce a 'dry run'
parameter then?

Thanks, Roger.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v3 2/6] x86/HVM: split restore state checking from state loading
  2023-12-11 12:43                   ` Roger Pau Monné
@ 2023-12-11 13:15                     ` Jan Beulich
  0 siblings, 0 replies; 21+ messages in thread
From: Jan Beulich @ 2023-12-11 13:15 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Andrew Cooper, xen-devel@lists.xenproject.org, Wei Liu

On 11.12.2023 13:43, Roger Pau Monné wrote:
> On Mon, Dec 11, 2023 at 12:31:11PM +0100, Jan Beulich wrote:
>> On 11.12.2023 11:46, Roger Pau Monné wrote:
>>> On Wed, Dec 06, 2023 at 08:27:59AM +0100, Jan Beulich wrote:
>>>> On 05.12.2023 16:55, Roger Pau Monné wrote:
>>>>> On Tue, Dec 05, 2023 at 03:59:13PM +0100, Jan Beulich wrote:
>>>>>> On 05.12.2023 15:29, Roger Pau Monné wrote:
>>>>>>> On Tue, Dec 05, 2023 at 09:52:31AM +0100, Jan Beulich wrote:
>>>>>>>> On 04.12.2023 18:27, Roger Pau Monné wrote:
>>>>>>>>> On Tue, Nov 28, 2023 at 11:34:04AM +0100, Jan Beulich wrote:
>>>>>>>>>> ..., at least as reasonably feasible without making a check hook
>>>>>>>>>> mandatory (in particular strict vs relaxed/zero-extend length checking
>>>>>>>>>> can't be done early this way).
>>>>>>>>>>
>>>>>>>>>> Note that only one of the two uses of hvm_load() is accompanied with
>>>>>>>>>> hvm_check(). The other directly consumes hvm_save() output, which ought
>>>>>>>>>> to be well-formed. This means that while input data related checks don't
>>>>>>>>>> need repeating in the "load" function when already done by the "check"
>>>>>>>>>> one (albeit assertions to this effect may be desirable), domain state
>>>>>>>>>> related checks (e.g. has_xyz(d)) will be required in both places.
>>>>>>>>>>
>>>>>>>>>> Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
>>>>>>>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>>>>>>>> ---
>>>>>>>>>> Do we really need all the copying involved in use of _hvm_read_entry()
>>>>>>>>>> (backing hvm_load_entry()? Zero-extending loads are likely easier to
>>>>>>>>>> handle that way, but for strict loads all we gain is a reduced risk of
>>>>>>>>>> unaligned accesses (compared to simply pointing into h->data[]).
>>>>>>>>>
>>>>>>>>> See below, but I wonder whether the checks could be performed as part
>>>>>>>>> of hvm_load() without having to introduce a separate handler and loop
>>>>>>>>> over the context entries.
>>>>>>>>
>>>>>>>> Specifically not. State loading (in the longer run) would better not fail
>>>>>>>> once started. (Imo it should have been this way from the beginning.) Only
>>>>>>>> then will the vCPU still be in a predictable state even after a possible
>>>>>>>> error.
>>>>>>>
>>>>>>> Looking at the callers, does such predictable state after failure
>>>>>>> matter?
>>>>>>>
>>>>>>> One caller is an hypercall used by the toolstack at domain create,
>>>>>>> failing can just lead to the domain being destroyed.  The other caller
>>>>>>> is vm fork, which will also lead to the fork being destroyed if
>>>>>>> context loading fails.
>>>>>>>
>>>>>>> Maybe I'm overlooking something.
>>>>>>
>>>>>> You don't (I think), but existing callers necessarily have to behave the
>>>>>> way you describe. From an abstract perspective, though, failed state
>>>>>> loading would better allow a retry. And really I thought that when you
>>>>>> suggested to split checking from loading, you had exactly that in mind.
>>>>>
>>>>> Not really TBH, because I didn't think that much on a possible
>>>>> implementation when proposing it.
>>>>
>>>> But what else did you think of then in terms of separating checking from
>>>> loading?
>>>
>>> Just calling the check and load functions inside of the same loop was
>>> my initial thought.
>>
>> Okay, I was meanwhile also guessing that this might have been what you
>> thought of. I can go that route, but I wouldn't want to make it "and", but
>> "or" then, depending on a new boolean parameter to be passed to hvm_load().
>> IOW I'd still like to do all checking before all loading (in the longer
>> run, that is i.e. after individual handlers have been adapted). Would that
>> be okay with you?
> 
> Yes, that would be fine.  I assume you will introduce a 'dry run'
> parameter then?

Something like that, yes. I considered and discarded (mentally) "dry run"
for naming though, as the functions performed really differ (to me "dry
run" would mean that all the same checking would be done again when doing
the "real" run). I was further considering "check", "check_only", "load",
and "real", but to be honest I don't really like any of them. So the
naming aspect is still pending.

Jan


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-12-11 13:15 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-28 10:32 [PATCH v3 0/6] x86/HVM: load state checking Jan Beulich
2023-11-28 10:33 ` [PATCH v3 1/6] x86/HVM: introduce hvm_get_entry() Jan Beulich
2023-11-28 10:34 ` [PATCH v3 2/6] x86/HVM: split restore state checking from state loading Jan Beulich
2023-12-04 17:27   ` Roger Pau Monné
2023-12-05  8:52     ` Jan Beulich
2023-12-05 14:29       ` Roger Pau Monné
2023-12-05 14:59         ` Jan Beulich
2023-12-05 15:55           ` Roger Pau Monné
2023-12-06  7:27             ` Jan Beulich
2023-12-11 10:46               ` Roger Pau Monné
2023-12-11 11:31                 ` Jan Beulich
2023-12-11 12:43                   ` Roger Pau Monné
2023-12-11 13:15                     ` Jan Beulich
2023-11-28 10:34 ` [PATCH v3 3/6] x86/HVM: adjust save/restore hook registration for optional check handler Jan Beulich
2023-11-28 10:35 ` [PATCH v3 4/6] x86/vPIT: check values loaded from state save record Jan Beulich
2023-12-04 17:46   ` Roger Pau Monné
2023-11-28 10:35 ` [PATCH v3 5/6] x86/vPIC: vpic_elcr_mask() master bit 2 control Jan Beulich
2023-12-05 17:29   ` Roger Pau Monné
2023-12-06  7:22     ` Jan Beulich
2023-11-28 10:36 ` [PATCH v3 6/6] x86/vPIC: check values loaded from state save record Jan Beulich
2023-12-05 17:41   ` Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.