All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] xen/livepatch: improvements to loading
@ 2024-09-20  9:36 Roger Pau Monne
  2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Roger Pau Monne @ 2024-09-20  9:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Roger Pau Monne, Ross Lagerwall, Jan Beulich, Andrew Cooper

Hello,

Following series attempts to improve the loading of livepatches,
specifically by doing the Xen build ID check ahead of processing any of
the sections in the livepatch payload (alternatives, bug frames...).

Thanks, Roger.

Roger Pau Monne (3):
  xen/livepatch: simplify and unify logic in prepare_payload()
  xen/livepatch: do build-id check earlier
  x86/alternatives: relax apply BUGs during runtime

 xen/arch/x86/alternative.c                 |  29 +++--
 xen/arch/x86/include/asm/alternative.h     |   3 +-
 xen/common/livepatch.c                     | 121 ++++++++++-----------
 xen/test/livepatch/Makefile                |   5 +
 xen/test/livepatch/xen_alternatives_fail.c |  29 +++++
 5 files changed, 114 insertions(+), 73 deletions(-)
 create mode 100644 xen/test/livepatch/xen_alternatives_fail.c

-- 
2.46.0



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-20  9:36 [PATCH 0/3] xen/livepatch: improvements to loading Roger Pau Monne
@ 2024-09-20  9:36 ` Roger Pau Monne
  2024-09-22  9:19   ` Andrew Cooper
  2024-09-23 11:01   ` Jan Beulich
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do Xen build-id check earlier Roger Pau Monne
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 20+ messages in thread
From: Roger Pau Monne @ 2024-09-20  9:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Roger Pau Monne, Ross Lagerwall

The following sections: .note.gnu.build-id, .livepatch.xen_depends and
.livepatch.depends are mandatory and ensured to be present by
check_special_sections() before prepare_payload() is called.

Simplify the logic in prepare_payload() by introducing a generic function to
parse the sections that contain a buildid.  Note the function assumes the
buildid related section to always be present.

No functional change intended.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/common/livepatch.c | 106 ++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 60 deletions(-)

diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index d93a556bcda2..cea47ffe4c84 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -647,15 +647,37 @@ static inline int livepatch_check_expectations(const struct payload *payload)
     nhooks = __sec->sec->sh_size / sizeof(*hook);                                         \
 } while (0)
 
+static int fetch_buildid(const struct livepatch_elf_sec *sec,
+                         struct livepatch_build_id *id)
+{
+    const Elf_Note *n = sec->load_addr;
+    int rc;
+
+    ASSERT(sec);
+
+    if ( sec->sec->sh_size <= sizeof(*n) )
+        return -EINVAL;
+
+    rc = xen_build_id_check(n, sec->sec->sh_size, &id->p, &id->len);
+    if ( rc )
+        return rc;
+
+    if ( !id->len || !id->p )
+        return -EINVAL;
+
+   return 0;
+}
+
 static int prepare_payload(struct payload *payload,
                            struct livepatch_elf *elf)
 {
     const struct livepatch_elf_sec *sec;
+    const struct payload *data;
     unsigned int i;
     struct livepatch_func *funcs;
     struct livepatch_func *f;
     struct virtual_region *region;
-    const Elf_Note *n;
+    int rc;
 
     sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_FUNC);
     if ( sec )
@@ -673,8 +695,6 @@ static int prepare_payload(struct payload *payload,
 
         for ( i = 0; i < payload->nfuncs; i++ )
         {
-            int rc;
-
             f = &(funcs[i]);
 
             if ( f->version != LIVEPATCH_PAYLOAD_VERSION )
@@ -717,69 +737,35 @@ static int prepare_payload(struct payload *payload,
     LIVEPATCH_ASSIGN_SINGLE_HOOK(elf, payload->hooks.revert.action, ELF_LIVEPATCH_REVERT_HOOK);
     LIVEPATCH_ASSIGN_SINGLE_HOOK(elf, payload->hooks.revert.post, ELF_LIVEPATCH_POSTREVERT_HOOK);
 
-    sec = livepatch_elf_sec_by_name(elf, ELF_BUILD_ID_NOTE);
-    if ( sec )
-    {
-        const struct payload *data;
-
-        n = sec->load_addr;
-
-        if ( sec->sec->sh_size <= sizeof(*n) )
-            return -EINVAL;
-
-        if ( xen_build_id_check(n, sec->sec->sh_size,
-                                &payload->id.p, &payload->id.len) )
-            return -EINVAL;
-
-        if ( !payload->id.len || !payload->id.p )
-            return -EINVAL;
+    rc = fetch_buildid(livepatch_elf_sec_by_name(elf, ELF_BUILD_ID_NOTE),
+                       &payload->id);
+    if ( rc )
+        return rc;
 
-        /* Make sure it is not a duplicate. */
-        list_for_each_entry ( data, &payload_list, list )
+    /* Make sure it is not a duplicate. */
+    list_for_each_entry ( data, &payload_list, list )
+    {
+        /* No way _this_ payload is on the list. */
+        ASSERT(data != payload);
+        if ( data->id.len == payload->id.len &&
+             !memcmp(data->id.p, payload->id.p, data->id.len) )
         {
-            /* No way _this_ payload is on the list. */
-            ASSERT(data != payload);
-            if ( data->id.len == payload->id.len &&
-                 !memcmp(data->id.p, payload->id.p, data->id.len) )
-            {
-                dprintk(XENLOG_DEBUG, LIVEPATCH "%s: Already loaded as %s!\n",
-                        elf->name, data->name);
-                return -EEXIST;
-            }
+            dprintk(XENLOG_DEBUG, LIVEPATCH "%s: Already loaded as %s!\n",
+                    elf->name, data->name);
+            return -EEXIST;
         }
     }
 
-    sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_DEPENDS);
-    if ( sec )
-    {
-        n = sec->load_addr;
-
-        if ( sec->sec->sh_size <= sizeof(*n) )
-            return -EINVAL;
-
-        if ( xen_build_id_check(n, sec->sec->sh_size,
-                                &payload->dep.p, &payload->dep.len) )
-            return -EINVAL;
-
-        if ( !payload->dep.len || !payload->dep.p )
-            return -EINVAL;
-    }
-
-    sec = livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_XEN_DEPENDS);
-    if ( sec )
-    {
-        n = sec->load_addr;
-
-        if ( sec->sec->sh_size <= sizeof(*n) )
-            return -EINVAL;
-
-        if ( xen_build_id_check(n, sec->sec->sh_size,
-                                &payload->xen_dep.p, &payload->xen_dep.len) )
-            return -EINVAL;
+    rc = fetch_buildid(livepatch_elf_sec_by_name(elf, ELF_LIVEPATCH_DEPENDS),
+                       &payload->dep);
+    if ( rc )
+        return rc;
 
-        if ( !payload->xen_dep.len || !payload->xen_dep.p )
-            return -EINVAL;
-    }
+    rc = fetch_buildid(livepatch_elf_sec_by_name(elf,
+                                                 ELF_LIVEPATCH_XEN_DEPENDS),
+                       &payload->xen_dep);
+    if ( rc )
+        return rc;
 
     /* Setup the virtual region with proper data. */
     region = &payload->region;
-- 
2.46.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] xen/livepatch: do Xen build-id check earlier
  2024-09-20  9:36 [PATCH 0/3] xen/livepatch: improvements to loading Roger Pau Monne
  2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
@ 2024-09-20  9:36 ` Roger Pau Monne
  2024-09-23 11:04   ` Andrew Cooper
  2024-09-23 11:04   ` Jan Beulich
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do " Roger Pau Monne
  2024-09-20  9:36 ` [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime Roger Pau Monne
  3 siblings, 2 replies; 20+ messages in thread
From: Roger Pau Monne @ 2024-09-20  9:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Roger Pau Monne, Ross Lagerwall

The check against the expected Xen build ID should be done ahead of attempting
to apply the alternatives contained in the livepatch.

If the CPUID in the alternatives patching data is out of the scope of the
running Xen featureset the BUG() in _apply_alternatives() will trigger thus
bringing the system down.  Note the layout of struct alt_instr could also
change between versions.  It's also possible for struct exception_table_entry
to have changed format, hence possibly leading to other errors.

Move the Xen build ID check to be done ahead of any processing of the livepatch
payload sections.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/common/livepatch.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index cea47ffe4c84..3e4fce036a1c 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -767,6 +767,11 @@ static int prepare_payload(struct payload *payload,
     if ( rc )
         return rc;
 
+    /* Perform the Xen build-id check ahead of doing any more processing. */
+    rc = xen_build_id_dep(payload);
+    if ( rc )
+        return rc;
+
     /* Setup the virtual region with proper data. */
     region = &payload->region;
 
@@ -1099,10 +1104,6 @@ static int load_payload_data(struct payload *payload, void *raw, size_t len)
     if ( rc )
         goto out;
 
-    rc = xen_build_id_dep(payload);
-    if ( rc )
-        goto out;
-
     rc = build_symbol_table(payload, &elf);
     if ( rc )
         goto out;
-- 
2.46.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] xen/livepatch: do build-id check earlier
  2024-09-20  9:36 [PATCH 0/3] xen/livepatch: improvements to loading Roger Pau Monne
  2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do Xen build-id check earlier Roger Pau Monne
@ 2024-09-20  9:36 ` Roger Pau Monne
  2024-09-20  9:56   ` Roger Pau Monné
  2024-09-20  9:36 ` [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime Roger Pau Monne
  3 siblings, 1 reply; 20+ messages in thread
From: Roger Pau Monne @ 2024-09-20  9:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Roger Pau Monne, Ross Lagerwall

The check against the expected Xen build ID should be done ahead of attempting
to apply the alternatives contained in the livepatch.

If the CPUID in the alternatives patching data is out of the scope of the
running Xen featureset the BUG() in _apply_alternatives() will trigger thus
bringing the system down.  Note the layout of struct alt_instr could also
change between versions.  It's also possible for struct exception_table_entry
to have changed format, hence possibly leading to other errors.

Move the Xen build ID check to be done ahead of any processing of the livepatch
payload sections.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/common/livepatch.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index 8043954ad0fc..a20f9fb2294d 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -765,6 +765,11 @@ static int prepare_payload(struct payload *payload,
     if ( rc )
         return rc;
 
+    /* Perform the Xen build-id check ahead of doing any more processing. */
+    rc = xen_build_id_dep(payload);
+    if ( rc )
+        return rc;
+
     /* Setup the virtual region with proper data. */
     region = &payload->region;
 
@@ -1097,10 +1102,6 @@ static int load_payload_data(struct payload *payload, void *raw, size_t len)
     if ( rc )
         goto out;
 
-    rc = xen_build_id_dep(payload);
-    if ( rc )
-        goto out;
-
     rc = build_symbol_table(payload, &elf);
     if ( rc )
         goto out;
-- 
2.46.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime
  2024-09-20  9:36 [PATCH 0/3] xen/livepatch: improvements to loading Roger Pau Monne
                   ` (2 preceding siblings ...)
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do " Roger Pau Monne
@ 2024-09-20  9:36 ` Roger Pau Monne
  2024-09-23 11:12   ` Jan Beulich
  2024-09-23 11:17   ` Andrew Cooper
  3 siblings, 2 replies; 20+ messages in thread
From: Roger Pau Monne @ 2024-09-20  9:36 UTC (permalink / raw)
  To: xen-devel
  Cc: Roger Pau Monne, Jan Beulich, Andrew Cooper, Ross Lagerwall,
	Roger Pau Monné

alternatives is used both at boot time, and when loading livepatch payloads.
While for the former it makes sense to panic, it's not useful for the later, as
for livepatches it's possible to fail to load the livepatch if alternatives
cannot be resolved and continue operating normally.

Relax the BUGs in _apply_alternatives() to instead return an error code if
alternatives are applied after boot.

Add a dummy livepatch test so the new logic can be assessed, with the change
applied the loading now fails with:

alt table ffff82d040602024 -> ffff82d040602032
livepatch: xen_alternatives_fail applying alternatives failed: -22

Signed-off-by: Roger Pau Monné <roge.rpau@citrix.com>
---
 xen/arch/x86/alternative.c                 | 29 ++++++++++++++++------
 xen/arch/x86/include/asm/alternative.h     |  3 ++-
 xen/common/livepatch.c                     | 10 +++++++-
 xen/test/livepatch/Makefile                |  5 ++++
 xen/test/livepatch/xen_alternatives_fail.c | 29 ++++++++++++++++++++++
 5 files changed, 66 insertions(+), 10 deletions(-)
 create mode 100644 xen/test/livepatch/xen_alternatives_fail.c

diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
index 7824053c9d33..c0912cb12ea5 100644
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -174,10 +174,13 @@ extern void *const __initdata_cf_clobber_end[];
  * The caller will set the "force" argument to true for the final
  * invocation, such that no CALLs/JMPs to NULL pointers will be left
  * around. See also the further comment below.
+ *
+ * Note the function cannot fail if system_state < SYS_STATE_active, it would
+ * panic instead.  The return value is only meaningful for runtime usage.
  */
-static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
-                                                  struct alt_instr *end,
-                                                  bool force)
+static int init_or_livepatch _apply_alternatives(struct alt_instr *start,
+                                                 struct alt_instr *end,
+                                                 bool force)
 {
     struct alt_instr *a, *base;
 
@@ -198,9 +201,17 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
         uint8_t buf[MAX_PATCH_LEN];
         unsigned int total_len = a->orig_len + a->pad_len;
 
-        BUG_ON(a->repl_len > total_len);
-        BUG_ON(total_len > sizeof(buf));
-        BUG_ON(a->cpuid >= NCAPINTS * 32);
+#define BUG_ON_BOOT(cond)                                       \
+    if ( system_state < SYS_STATE_active )                      \
+        BUG_ON(cond);                                           \
+    else if ( cond )                                            \
+        return -EINVAL;
+
+        BUG_ON_BOOT(a->repl_len > total_len);
+        BUG_ON_BOOT(total_len > sizeof(buf));
+        BUG_ON_BOOT(a->cpuid >= NCAPINTS * 32);
+
+#undef BUG_ON_BOOT
 
         /*
          * Detect sequences of alt_instr's patching the same origin site, and
@@ -356,12 +367,14 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
 
         printk("altcall: Optimised away %u endbr64 instructions\n", clobbered);
     }
+
+    return 0;
 }
 
 #ifdef CONFIG_LIVEPATCH
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+int apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
-    _apply_alternatives(start, end, true);
+    return _apply_alternatives(start, end, true);
 }
 #endif
 
diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
index a86eadfaecbd..83940e1849c6 100644
--- a/xen/arch/x86/include/asm/alternative.h
+++ b/xen/arch/x86/include/asm/alternative.h
@@ -24,7 +24,8 @@ struct __packed alt_instr {
 
 extern void add_nops(void *insns, unsigned int len);
 /* Similar to alternative_instructions except it can be run with IRQs enabled. */
-extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern int __must_check apply_alternatives(struct alt_instr *start,
+                                           struct alt_instr *end);
 extern void alternative_instructions(void);
 extern void alternative_branches(void);
 
diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
index 3e4fce036a1c..1b3a9dda52a7 100644
--- a/xen/common/livepatch.c
+++ b/xen/common/livepatch.c
@@ -882,7 +882,15 @@ static int prepare_payload(struct payload *payload,
                 return -EINVAL;
             }
         }
-        apply_alternatives(start, end);
+
+        rc = apply_alternatives(start, end);
+        if ( rc )
+        {
+            printk(XENLOG_ERR LIVEPATCH "%s applying alternatives failed: %d\n",
+                   elf->name, rc);
+            return rc;
+        }
+
     alt_done:;
 #else
         printk(XENLOG_ERR LIVEPATCH "%s: We don't support alternative patching\n",
diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile
index 4caa9e24324e..c29529b50338 100644
--- a/xen/test/livepatch/Makefile
+++ b/xen/test/livepatch/Makefile
@@ -125,6 +125,11 @@ $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h
 extra-y += xen_action_hooks_norevert.livepatch
 xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o
 
+$(obj)/xen_alternatives_fail.o: $(obj)/config.h
+
+extra-y += xen_alternatives_fail.livepatch
+xen_alternatives_fail-objs := xen_alternatives_fail.o note.o xen_note.o
+
 EXPECT_BYTES_COUNT := 8
 CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g')
 $(obj)/expect_config.h: $(objtree)/xen-syms
diff --git a/xen/test/livepatch/xen_alternatives_fail.c b/xen/test/livepatch/xen_alternatives_fail.c
new file mode 100644
index 000000000000..01d289095a31
--- /dev/null
+++ b/xen/test/livepatch/xen_alternatives_fail.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2024 Cloud Software Group.
+ *
+ */
+
+#include "config.h"
+#include <xen/lib.h>
+#include <xen/livepatch_payload.h>
+
+#include <asm/alternative.h>
+#include <asm/cpuid.h>
+
+void test_alternatives(void)
+{
+    alternative("", "", NCAPINTS * 32);
+}
+
+/* Set a hook so the loading logic in Xen don't consider the payload empty. */
+LIVEPATCH_LOAD_HOOK(test_alternatives);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.46.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] xen/livepatch: do build-id check earlier
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do " Roger Pau Monne
@ 2024-09-20  9:56   ` Roger Pau Monné
  0 siblings, 0 replies; 20+ messages in thread
From: Roger Pau Monné @ 2024-09-20  9:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Ross Lagerwall

Ignore this one, I forgot to cleanup my output folder before
re-generating the patch series.  The correct 2/3 is:

"xen/livepatch: do Xen build-id check earlier"

Thanks, Roger.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
@ 2024-09-22  9:19   ` Andrew Cooper
  2024-09-23  7:46     ` Roger Pau Monné
  2024-09-23 11:01   ` Jan Beulich
  1 sibling, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2024-09-22  9:19 UTC (permalink / raw)
  To: Roger Pau Monne, xen-devel; +Cc: Ross Lagerwall

On 20/09/2024 11:36 am, Roger Pau Monne wrote:
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index d93a556bcda2..cea47ffe4c84 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -647,15 +647,37 @@ static inline int livepatch_check_expectations(const struct payload *payload)
>      nhooks = __sec->sec->sh_size / sizeof(*hook);                                         \
>  } while (0)
>  
> +static int fetch_buildid(const struct livepatch_elf_sec *sec,
> +                         struct livepatch_build_id *id)

Is this really fetch?  I'd describe it as parse, more than fetch.

> +{
> +    const Elf_Note *n = sec->load_addr;
> +    int rc;
> +
> +    ASSERT(sec);

This needs to turn back into a runtime check.  Now, if a livepatch is
missing one of the sections, we'll dereference NULL below, rather than
leaving no data in the struct livepatch_build_id.

~Andrew


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-22  9:19   ` Andrew Cooper
@ 2024-09-23  7:46     ` Roger Pau Monné
  2024-09-23  9:43       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Roger Pau Monné @ 2024-09-23  7:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Ross Lagerwall

On Sun, Sep 22, 2024 at 11:19:01AM +0200, Andrew Cooper wrote:
> On 20/09/2024 11:36 am, Roger Pau Monne wrote:
> > diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> > index d93a556bcda2..cea47ffe4c84 100644
> > --- a/xen/common/livepatch.c
> > +++ b/xen/common/livepatch.c
> > @@ -647,15 +647,37 @@ static inline int livepatch_check_expectations(const struct payload *payload)
> >      nhooks = __sec->sec->sh_size / sizeof(*hook);                                         \
> >  } while (0)
> >  
> > +static int fetch_buildid(const struct livepatch_elf_sec *sec,
> > +                         struct livepatch_build_id *id)
> 
> Is this really fetch?  I'd describe it as parse, more than fetch.

I can indeed change the naming.  I've used fetch because it 'fetches'
the contents of livepatch_build_id.

> > +{
> > +    const Elf_Note *n = sec->load_addr;
> > +    int rc;
> > +
> > +    ASSERT(sec);
> 
> This needs to turn back into a runtime check.  Now, if a livepatch is
> missing one of the sections, we'll dereference NULL below, rather than
> leaving no data in the struct livepatch_build_id.

Loading should never get here without those sections being present,
check_special_sections() called earlier will return error if any of
the sections is not present, hence the ASSERT() is fine IMO.

I could do `if ( !sec ) { ASSERT_UNREACHABLE(); return -ENOENT; }`,
but given the code in check_special_sections() that checks the section
presence just ahead it seemed unnecessary convoluted.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-23  7:46     ` Roger Pau Monné
@ 2024-09-23  9:43       ` Andrew Cooper
  2024-09-23 11:00         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2024-09-23  9:43 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, Ross Lagerwall

On 23/09/2024 8:46 am, Roger Pau Monné wrote:
> On Sun, Sep 22, 2024 at 11:19:01AM +0200, Andrew Cooper wrote:
>> On 20/09/2024 11:36 am, Roger Pau Monne wrote:
>>> +{
>>> +    const Elf_Note *n = sec->load_addr;
>>> +    int rc;
>>> +
>>> +    ASSERT(sec);
>> This needs to turn back into a runtime check.  Now, if a livepatch is
>> missing one of the sections, we'll dereference NULL below, rather than
>> leaving no data in the struct livepatch_build_id.
> Loading should never get here without those sections being present,
> check_special_sections() called earlier will return error if any of
> the sections is not present, hence the ASSERT() is fine IMO.

Ah, in which case, can we please have:

/* Existence of note sections already confirmed in
check_special_sections() */
ASSERT(sec);

Just to show that someone did think about the provenance of the pointer,
and where to look to check.

With this and the rename, Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-23  9:43       ` Andrew Cooper
@ 2024-09-23 11:00         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2024-09-23 11:00 UTC (permalink / raw)
  To: Andrew Cooper, Roger Pau Monné; +Cc: xen-devel, Ross Lagerwall

On 23.09.2024 11:43, Andrew Cooper wrote:
> On 23/09/2024 8:46 am, Roger Pau Monné wrote:
>> On Sun, Sep 22, 2024 at 11:19:01AM +0200, Andrew Cooper wrote:
>>> On 20/09/2024 11:36 am, Roger Pau Monne wrote:
>>>> +{
>>>> +    const Elf_Note *n = sec->load_addr;
>>>> +    int rc;
>>>> +
>>>> +    ASSERT(sec);
>>> This needs to turn back into a runtime check.  Now, if a livepatch is
>>> missing one of the sections, we'll dereference NULL below, rather than
>>> leaving no data in the struct livepatch_build_id.
>> Loading should never get here without those sections being present,
>> check_special_sections() called earlier will return error if any of
>> the sections is not present, hence the ASSERT() is fine IMO.
> 
> Ah, in which case, can we please have:
> 
> /* Existence of note sections already confirmed in
> check_special_sections() */
> ASSERT(sec);
> 
> Just to show that someone did think about the provenance of the pointer,
> and where to look to check.

Yet then sec was de-referenced already ahead of the assertion, which
static checkers may have to say something about.

Jan



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
  2024-09-22  9:19   ` Andrew Cooper
@ 2024-09-23 11:01   ` Jan Beulich
  2024-09-23 13:10     ` Roger Pau Monné
  1 sibling, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2024-09-23 11:01 UTC (permalink / raw)
  To: Roger Pau Monne; +Cc: Ross Lagerwall, xen-devel

On 20.09.2024 11:36, Roger Pau Monne wrote:
> The following sections: .note.gnu.build-id, .livepatch.xen_depends and
> .livepatch.depends are mandatory and ensured to be present by
> check_special_sections() before prepare_payload() is called.
> 
> Simplify the logic in prepare_payload() by introducing a generic function to
> parse the sections that contain a buildid.  Note the function assumes the
> buildid related section to always be present.
> 
> No functional change intended.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/common/livepatch.c | 106 ++++++++++++++++++-----------------------
>  1 file changed, 46 insertions(+), 60 deletions(-)
> 
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index d93a556bcda2..cea47ffe4c84 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -647,15 +647,37 @@ static inline int livepatch_check_expectations(const struct payload *payload)
>      nhooks = __sec->sec->sh_size / sizeof(*hook);                                         \
>  } while (0)
>  
> +static int fetch_buildid(const struct livepatch_elf_sec *sec,
> +                         struct livepatch_build_id *id)
> +{
> +    const Elf_Note *n = sec->load_addr;
> +    int rc;
> +
> +    ASSERT(sec);
> +
> +    if ( sec->sec->sh_size <= sizeof(*n) )
> +        return -EINVAL;

Oh, after my reply to Andrew's reply, now looking at the actual change -
is it perhaps ASSERT(sec->sec) that was meant?

Jan


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] xen/livepatch: do Xen build-id check earlier
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do Xen build-id check earlier Roger Pau Monne
  2024-09-23 11:04   ` Andrew Cooper
@ 2024-09-23 11:04   ` Jan Beulich
  1 sibling, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2024-09-23 11:04 UTC (permalink / raw)
  To: Roger Pau Monne; +Cc: Ross Lagerwall, xen-devel

On 20.09.2024 11:36, Roger Pau Monne wrote:
> The check against the expected Xen build ID should be done ahead of attempting
> to apply the alternatives contained in the livepatch.
> 
> If the CPUID in the alternatives patching data is out of the scope of the
> running Xen featureset the BUG() in _apply_alternatives() will trigger thus
> bringing the system down.

Sounds like there wants to be a Fixes: tag then?

Jan


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] xen/livepatch: do Xen build-id check earlier
  2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do Xen build-id check earlier Roger Pau Monne
@ 2024-09-23 11:04   ` Andrew Cooper
  2024-09-23 11:55     ` Andrew Cooper
  2024-09-23 16:02     ` Roger Pau Monné
  2024-09-23 11:04   ` Jan Beulich
  1 sibling, 2 replies; 20+ messages in thread
From: Andrew Cooper @ 2024-09-23 11:04 UTC (permalink / raw)
  To: Roger Pau Monne, xen-devel; +Cc: Ross Lagerwall

On 20/09/2024 10:36 am, Roger Pau Monne wrote:
> The check against the expected Xen build ID should be done ahead of attempting
> to apply the alternatives contained in the livepatch.
>
> If the CPUID in the alternatives patching data is out of the scope of the
> running Xen featureset the BUG() in _apply_alternatives() will trigger thus
> bringing the system down.  Note the layout of struct alt_instr could also
> change between versions.  It's also possible for struct exception_table_entry
> to have changed format, hence possibly leading to other errors.
>
> Move the Xen build ID check to be done ahead of any processing of the livepatch
> payload sections.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/common/livepatch.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> index cea47ffe4c84..3e4fce036a1c 100644
> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -767,6 +767,11 @@ static int prepare_payload(struct payload *payload,
>      if ( rc )
>          return rc;
>  
> +    /* Perform the Xen build-id check ahead of doing any more processing. */
> +    rc = xen_build_id_dep(payload);
> +    if ( rc )
> +        return rc;
> +

While a step in the right direction, I think this needs to be moved far
earlier.  Even here, it's behind the processing of the livepatch func
state, which is something that can also change like alt_instr.

The buildid checks need to be as early as possible.  Looking through the
logic (which doesn't have great names/splits), I'd say the buildid
checks want to be between livepatch_elf_load() (which parses the
structure of the ELF), and move_payload() (which starts copying it into
place).

That would involve moving check_special_sections() too, but I think it's
the right thing to do.

Absolutely nothing good can come from continuing to process/setup a
livepatch identified as "not for this hypervisor".

~Andrew


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime
  2024-09-20  9:36 ` [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime Roger Pau Monne
@ 2024-09-23 11:12   ` Jan Beulich
  2024-09-23 11:17   ` Andrew Cooper
  1 sibling, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2024-09-23 11:12 UTC (permalink / raw)
  To: Roger Pau Monne; +Cc: Andrew Cooper, Ross Lagerwall, xen-devel

On 20.09.2024 11:36, Roger Pau Monne wrote:
> @@ -198,9 +201,17 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
>          uint8_t buf[MAX_PATCH_LEN];
>          unsigned int total_len = a->orig_len + a->pad_len;
>  
> -        BUG_ON(a->repl_len > total_len);
> -        BUG_ON(total_len > sizeof(buf));
> -        BUG_ON(a->cpuid >= NCAPINTS * 32);
> +#define BUG_ON_BOOT(cond)                                       \
> +    if ( system_state < SYS_STATE_active )                      \
> +        BUG_ON(cond);                                           \
> +    else if ( cond )                                            \
> +        return -EINVAL;
> +
> +        BUG_ON_BOOT(a->repl_len > total_len);
> +        BUG_ON_BOOT(total_len > sizeof(buf));
> +        BUG_ON_BOOT(a->cpuid >= NCAPINTS * 32);
> +
> +#undef BUG_ON_BOOT

BUG_ON() provides quite a bit of information to aid figuring what's wrong.
Without a log message in the livepatching case ...

> --- a/xen/common/livepatch.c
> +++ b/xen/common/livepatch.c
> @@ -882,7 +882,15 @@ static int prepare_payload(struct payload *payload,
>                  return -EINVAL;
>              }
>          }
> -        apply_alternatives(start, end);
> +
> +        rc = apply_alternatives(start, end);
> +        if ( rc )
> +        {
> +            printk(XENLOG_ERR LIVEPATCH "%s applying alternatives failed: %d\n",
> +                   elf->name, rc);

... this is all one would get. Since livepatching is a privileged operation,
log-spam also shouldn't be of concern. So I'd like to ask that at least some
detail (line number to start with) be provided.

Which however leads to a follow-on concern: Those string literals then
would also end up in LIVEPATCH=n binaries, so I'd like to further ask for
a suitable IS_ENABLED() check to prevent that happening.

Jan


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime
  2024-09-20  9:36 ` [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime Roger Pau Monne
  2024-09-23 11:12   ` Jan Beulich
@ 2024-09-23 11:17   ` Andrew Cooper
  2024-09-23 13:06     ` Roger Pau Monné
  1 sibling, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2024-09-23 11:17 UTC (permalink / raw)
  To: Roger Pau Monne, xen-devel
  Cc: Jan Beulich, Ross Lagerwall, Roger Pau Monné

On 20/09/2024 10:36 am, Roger Pau Monne wrote:
> alternatives is used both at boot time, and when loading livepatch payloads.
> While for the former it makes sense to panic, it's not useful for the later, as
> for livepatches it's possible to fail to load the livepatch if alternatives
> cannot be resolved and continue operating normally.
>
> Relax the BUGs in _apply_alternatives() to instead return an error code if
> alternatives are applied after boot.
>
> Add a dummy livepatch test so the new logic can be assessed, with the change
> applied the loading now fails with:
>
> alt table ffff82d040602024 -> ffff82d040602032
> livepatch: xen_alternatives_fail applying alternatives failed: -22
>
> Signed-off-by: Roger Pau Monné <roge.rpau@citrix.com>
> ---
>  xen/arch/x86/alternative.c                 | 29 ++++++++++++++++------
>  xen/arch/x86/include/asm/alternative.h     |  3 ++-
>  xen/common/livepatch.c                     | 10 +++++++-
>  xen/test/livepatch/Makefile                |  5 ++++
>  xen/test/livepatch/xen_alternatives_fail.c | 29 ++++++++++++++++++++++
>  5 files changed, 66 insertions(+), 10 deletions(-)
>  create mode 100644 xen/test/livepatch/xen_alternatives_fail.c
>
> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> index 7824053c9d33..c0912cb12ea5 100644
> --- a/xen/arch/x86/alternative.c
> +++ b/xen/arch/x86/alternative.c
> @@ -174,10 +174,13 @@ extern void *const __initdata_cf_clobber_end[];
>   * The caller will set the "force" argument to true for the final
>   * invocation, such that no CALLs/JMPs to NULL pointers will be left
>   * around. See also the further comment below.
> + *
> + * Note the function cannot fail if system_state < SYS_STATE_active, it would
> + * panic instead.  The return value is only meaningful for runtime usage.
>   */
> -static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
> -                                                  struct alt_instr *end,
> -                                                  bool force)
> +static int init_or_livepatch _apply_alternatives(struct alt_instr *start,
> +                                                 struct alt_instr *end,
> +                                                 bool force)
>  {
>      struct alt_instr *a, *base;
>  
> @@ -198,9 +201,17 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
>          uint8_t buf[MAX_PATCH_LEN];
>          unsigned int total_len = a->orig_len + a->pad_len;
>  
> -        BUG_ON(a->repl_len > total_len);
> -        BUG_ON(total_len > sizeof(buf));
> -        BUG_ON(a->cpuid >= NCAPINTS * 32);
> +#define BUG_ON_BOOT(cond)                                       \
> +    if ( system_state < SYS_STATE_active )                      \
> +        BUG_ON(cond);                                           \
> +    else if ( cond )                                            \
> +        return -EINVAL;
> +
> +        BUG_ON_BOOT(a->repl_len > total_len);
> +        BUG_ON_BOOT(total_len > sizeof(buf));
> +        BUG_ON_BOOT(a->cpuid >= NCAPINTS * 32);
> +
> +#undef BUG_ON_BOOT

The "error handling" before was horrible and this isn't really any better.

This function should always return int, printing more helpful info than
that (a printk() says a thousand things better than a BUG()), and
nmi_apply_alternatives can panic() rather than leaving these BUG()s here.

As a tangent, the __must_check doesn't seem to have been applied to
nmi_apply_alternatives(), but I'd suggest dropping the attribute; I
don't think it adds much.

> diff --git a/xen/test/livepatch/xen_alternatives_fail.c b/xen/test/livepatch/xen_alternatives_fail.c
> new file mode 100644
> index 000000000000..01d289095a31
> --- /dev/null
> +++ b/xen/test/livepatch/xen_alternatives_fail.c
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (c) 2024 Cloud Software Group.
> + *
> + */
> +
> +#include "config.h"
> +#include <xen/lib.h>
> +#include <xen/livepatch_payload.h>
> +
> +#include <asm/alternative.h>
> +#include <asm/cpuid.h>
> +
> +void test_alternatives(void)
> +{
> +    alternative("", "", NCAPINTS * 32);
> +}
> +
> +/* Set a hook so the loading logic in Xen don't consider the payload empty. */
> +LIVEPATCH_LOAD_HOOK(test_alternatives);
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */

The second half of the patch (new testcase) is all fine and good, but
should pass with patch 2 in place?  I'd suggest splitting it out.

~Andrew


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] xen/livepatch: do Xen build-id check earlier
  2024-09-23 11:04   ` Andrew Cooper
@ 2024-09-23 11:55     ` Andrew Cooper
  2024-09-23 16:02     ` Roger Pau Monné
  1 sibling, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2024-09-23 11:55 UTC (permalink / raw)
  To: Roger Pau Monne, xen-devel; +Cc: Ross Lagerwall

On 23/09/2024 12:04 pm, Andrew Cooper wrote:
> On 20/09/2024 10:36 am, Roger Pau Monne wrote:
>> The check against the expected Xen build ID should be done ahead of attempting
>> to apply the alternatives contained in the livepatch.
>>
>> If the CPUID in the alternatives patching data is out of the scope of the
>> running Xen featureset the BUG() in _apply_alternatives() will trigger thus
>> bringing the system down.  Note the layout of struct alt_instr could also
>> change between versions.  It's also possible for struct exception_table_entry
>> to have changed format, hence possibly leading to other errors.
>>
>> Move the Xen build ID check to be done ahead of any processing of the livepatch
>> payload sections.
>>
>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>> ---
>>  xen/common/livepatch.c | 9 +++++----
>>  1 file changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
>> index cea47ffe4c84..3e4fce036a1c 100644
>> --- a/xen/common/livepatch.c
>> +++ b/xen/common/livepatch.c
>> @@ -767,6 +767,11 @@ static int prepare_payload(struct payload *payload,
>>      if ( rc )
>>          return rc;
>>  
>> +    /* Perform the Xen build-id check ahead of doing any more processing. */
>> +    rc = xen_build_id_dep(payload);
>> +    if ( rc )
>> +        return rc;
>> +
> While a step in the right direction, I think this needs to be moved far
> earlier.  Even here, it's behind the processing of the livepatch func
> state, which is something that can also change like alt_instr.
>
> The buildid checks need to be as early as possible.  Looking through the
> logic (which doesn't have great names/splits), I'd say the buildid
> checks want to be between livepatch_elf_load() (which parses the
> structure of the ELF), and move_payload() (which starts copying it into
> place).
>
> That would involve moving check_special_sections() too, but I think it's
> the right thing to do.
>
> Absolutely nothing good can come from continuing to process/setup a
> livepatch identified as "not for this hypervisor".

Further, we should probably fuzz this (KFX style).  I bet the
alternatives aren't the only now-reachable BUG()'s.

While we don't expect people to pass bad livepatches, this mess was
discovered by our CI system getting confused and passing the wrong (of
several otherwise good) livepatch for the currently running hypervisor.

~Andrew


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime
  2024-09-23 11:17   ` Andrew Cooper
@ 2024-09-23 13:06     ` Roger Pau Monné
  2024-09-23 13:12       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Roger Pau Monné @ 2024-09-23 13:06 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: xen-devel, Jan Beulich, Ross Lagerwall, Roger Pau Monné

On Mon, Sep 23, 2024 at 12:17:51PM +0100, Andrew Cooper wrote:
> On 20/09/2024 10:36 am, Roger Pau Monne wrote:
> > alternatives is used both at boot time, and when loading livepatch payloads.
> > While for the former it makes sense to panic, it's not useful for the later, as
> > for livepatches it's possible to fail to load the livepatch if alternatives
> > cannot be resolved and continue operating normally.
> >
> > Relax the BUGs in _apply_alternatives() to instead return an error code if
> > alternatives are applied after boot.
> >
> > Add a dummy livepatch test so the new logic can be assessed, with the change
> > applied the loading now fails with:
> >
> > alt table ffff82d040602024 -> ffff82d040602032
> > livepatch: xen_alternatives_fail applying alternatives failed: -22
> >
> > Signed-off-by: Roger Pau Monné <roge.rpau@citrix.com>
> > ---
> >  xen/arch/x86/alternative.c                 | 29 ++++++++++++++++------
> >  xen/arch/x86/include/asm/alternative.h     |  3 ++-
> >  xen/common/livepatch.c                     | 10 +++++++-
> >  xen/test/livepatch/Makefile                |  5 ++++
> >  xen/test/livepatch/xen_alternatives_fail.c | 29 ++++++++++++++++++++++
> >  5 files changed, 66 insertions(+), 10 deletions(-)
> >  create mode 100644 xen/test/livepatch/xen_alternatives_fail.c
> >
> > diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> > index 7824053c9d33..c0912cb12ea5 100644
> > --- a/xen/arch/x86/alternative.c
> > +++ b/xen/arch/x86/alternative.c
> > @@ -174,10 +174,13 @@ extern void *const __initdata_cf_clobber_end[];
> >   * The caller will set the "force" argument to true for the final
> >   * invocation, such that no CALLs/JMPs to NULL pointers will be left
> >   * around. See also the further comment below.
> > + *
> > + * Note the function cannot fail if system_state < SYS_STATE_active, it would
> > + * panic instead.  The return value is only meaningful for runtime usage.
> >   */
> > -static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
> > -                                                  struct alt_instr *end,
> > -                                                  bool force)
> > +static int init_or_livepatch _apply_alternatives(struct alt_instr *start,
> > +                                                 struct alt_instr *end,
> > +                                                 bool force)
> >  {
> >      struct alt_instr *a, *base;
> >  
> > @@ -198,9 +201,17 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
> >          uint8_t buf[MAX_PATCH_LEN];
> >          unsigned int total_len = a->orig_len + a->pad_len;
> >  
> > -        BUG_ON(a->repl_len > total_len);
> > -        BUG_ON(total_len > sizeof(buf));
> > -        BUG_ON(a->cpuid >= NCAPINTS * 32);
> > +#define BUG_ON_BOOT(cond)                                       \
> > +    if ( system_state < SYS_STATE_active )                      \
> > +        BUG_ON(cond);                                           \
> > +    else if ( cond )                                            \
> > +        return -EINVAL;
> > +
> > +        BUG_ON_BOOT(a->repl_len > total_len);
> > +        BUG_ON_BOOT(total_len > sizeof(buf));
> > +        BUG_ON_BOOT(a->cpuid >= NCAPINTS * 32);
> > +
> > +#undef BUG_ON_BOOT
> 
> The "error handling" before was horrible and this isn't really any better.
> 
> This function should always return int, printing more helpful info than
> that (a printk() says a thousand things better than a BUG()), and
> nmi_apply_alternatives can panic() rather than leaving these BUG()s here.

OK, will rework the logic here so it's the caller that panics (or not)
as necessary, and _apply_alternatives() always prints some error
message.

> As a tangent, the __must_check doesn't seem to have been applied to
> nmi_apply_alternatives(), but I'd suggest dropping the attribute; I
> don't think it adds much.

I didn't see the value in adding the attribute to
nmi_apply_alternatives(), as in that context _apply_alternatives()
would unconditionally panic instead of returning an error code.

> > diff --git a/xen/test/livepatch/xen_alternatives_fail.c b/xen/test/livepatch/xen_alternatives_fail.c
> > new file mode 100644
> > index 000000000000..01d289095a31
> > --- /dev/null
> > +++ b/xen/test/livepatch/xen_alternatives_fail.c
> > @@ -0,0 +1,29 @@
> > +/*
> > + * Copyright (c) 2024 Cloud Software Group.
> > + *
> > + */
> > +
> > +#include "config.h"
> > +#include <xen/lib.h>
> > +#include <xen/livepatch_payload.h>
> > +
> > +#include <asm/alternative.h>
> > +#include <asm/cpuid.h>
> > +
> > +void test_alternatives(void)
> > +{
> > +    alternative("", "", NCAPINTS * 32);
> > +}
> > +
> > +/* Set a hook so the loading logic in Xen don't consider the payload empty. */
> > +LIVEPATCH_LOAD_HOOK(test_alternatives);
> > +
> > +/*
> > + * Local variables:
> > + * mode: C
> > + * c-file-style: "BSD"
> > + * c-basic-offset: 4
> > + * tab-width: 4
> > + * indent-tabs-mode: nil
> > + * End:
> > + */
> 
> The second half of the patch (new testcase) is all fine and good, but
> should pass with patch 2 in place?  I'd suggest splitting it out.

No, not really.  The Xen buildid for this patch will be correctly set
to match the running one, but the alternatives feature CPUID is
explicitly set to an out of range value (NCAPINTS * 32) to trigger the
BUG_ON condition.

Further thinking about it, I think we should add a build time assert
that the feature parameters in the alternative calls are smaller than
NCAPINTS * 32.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload()
  2024-09-23 11:01   ` Jan Beulich
@ 2024-09-23 13:10     ` Roger Pau Monné
  0 siblings, 0 replies; 20+ messages in thread
From: Roger Pau Monné @ 2024-09-23 13:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Ross Lagerwall, xen-devel

On Mon, Sep 23, 2024 at 01:01:57PM +0200, Jan Beulich wrote:
> On 20.09.2024 11:36, Roger Pau Monne wrote:
> > The following sections: .note.gnu.build-id, .livepatch.xen_depends and
> > .livepatch.depends are mandatory and ensured to be present by
> > check_special_sections() before prepare_payload() is called.
> > 
> > Simplify the logic in prepare_payload() by introducing a generic function to
> > parse the sections that contain a buildid.  Note the function assumes the
> > buildid related section to always be present.
> > 
> > No functional change intended.
> > 
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> >  xen/common/livepatch.c | 106 ++++++++++++++++++-----------------------
> >  1 file changed, 46 insertions(+), 60 deletions(-)
> > 
> > diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> > index d93a556bcda2..cea47ffe4c84 100644
> > --- a/xen/common/livepatch.c
> > +++ b/xen/common/livepatch.c
> > @@ -647,15 +647,37 @@ static inline int livepatch_check_expectations(const struct payload *payload)
> >      nhooks = __sec->sec->sh_size / sizeof(*hook);                                         \
> >  } while (0)
> >  
> > +static int fetch_buildid(const struct livepatch_elf_sec *sec,
> > +                         struct livepatch_build_id *id)
> > +{
> > +    const Elf_Note *n = sec->load_addr;
> > +    int rc;
> > +
> > +    ASSERT(sec);
> > +
> > +    if ( sec->sec->sh_size <= sizeof(*n) )
> > +        return -EINVAL;
> 
> Oh, after my reply to Andrew's reply, now looking at the actual change -
> is it perhaps ASSERT(sec->sec) that was meant?

The original check before moving the code was against 'sec', not
'sec->sec'.  That's what I intending to retain with the assert.

I can do the `n = sec->load_addr` assign after the assert if that's
better analysis wise.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime
  2024-09-23 13:06     ` Roger Pau Monné
@ 2024-09-23 13:12       ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2024-09-23 13:12 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel, Jan Beulich, Ross Lagerwall, Roger Pau Monné

On 23/09/2024 2:06 pm, Roger Pau Monné wrote:
> On Mon, Sep 23, 2024 at 12:17:51PM +0100, Andrew Cooper wrote:
>> On 20/09/2024 10:36 am, Roger Pau Monne wrote:
>>> alternatives is used both at boot time, and when loading livepatch payloads.
>>> While for the former it makes sense to panic, it's not useful for the later, as
>>> for livepatches it's possible to fail to load the livepatch if alternatives
>>> cannot be resolved and continue operating normally.
>>>
>>> Relax the BUGs in _apply_alternatives() to instead return an error code if
>>> alternatives are applied after boot.
>>>
>>> Add a dummy livepatch test so the new logic can be assessed, with the change
>>> applied the loading now fails with:
>>>
>>> alt table ffff82d040602024 -> ffff82d040602032
>>> livepatch: xen_alternatives_fail applying alternatives failed: -22
>>>
>>> Signed-off-by: Roger Pau Monné <roge.rpau@citrix.com>
>>> ---
>>>  xen/arch/x86/alternative.c                 | 29 ++++++++++++++++------
>>>  xen/arch/x86/include/asm/alternative.h     |  3 ++-
>>>  xen/common/livepatch.c                     | 10 +++++++-
>>>  xen/test/livepatch/Makefile                |  5 ++++
>>>  xen/test/livepatch/xen_alternatives_fail.c | 29 ++++++++++++++++++++++
>>>  5 files changed, 66 insertions(+), 10 deletions(-)
>>>  create mode 100644 xen/test/livepatch/xen_alternatives_fail.c
>>>
>>> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
>>> index 7824053c9d33..c0912cb12ea5 100644
>>> --- a/xen/arch/x86/alternative.c
>>> +++ b/xen/arch/x86/alternative.c
>>> @@ -174,10 +174,13 @@ extern void *const __initdata_cf_clobber_end[];
>>>   * The caller will set the "force" argument to true for the final
>>>   * invocation, such that no CALLs/JMPs to NULL pointers will be left
>>>   * around. See also the further comment below.
>>> + *
>>> + * Note the function cannot fail if system_state < SYS_STATE_active, it would
>>> + * panic instead.  The return value is only meaningful for runtime usage.
>>>   */
>>> -static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
>>> -                                                  struct alt_instr *end,
>>> -                                                  bool force)
>>> +static int init_or_livepatch _apply_alternatives(struct alt_instr *start,
>>> +                                                 struct alt_instr *end,
>>> +                                                 bool force)
>>>  {
>>>      struct alt_instr *a, *base;
>>>  
>>> @@ -198,9 +201,17 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
>>>          uint8_t buf[MAX_PATCH_LEN];
>>>          unsigned int total_len = a->orig_len + a->pad_len;
>>>  
>>> -        BUG_ON(a->repl_len > total_len);
>>> -        BUG_ON(total_len > sizeof(buf));
>>> -        BUG_ON(a->cpuid >= NCAPINTS * 32);
>>> +#define BUG_ON_BOOT(cond)                                       \
>>> +    if ( system_state < SYS_STATE_active )                      \
>>> +        BUG_ON(cond);                                           \
>>> +    else if ( cond )                                            \
>>> +        return -EINVAL;
>>> +
>>> +        BUG_ON_BOOT(a->repl_len > total_len);
>>> +        BUG_ON_BOOT(total_len > sizeof(buf));
>>> +        BUG_ON_BOOT(a->cpuid >= NCAPINTS * 32);
>>> +
>>> +#undef BUG_ON_BOOT
>> The "error handling" before was horrible and this isn't really any better.
>>
>> This function should always return int, printing more helpful info than
>> that (a printk() says a thousand things better than a BUG()), and
>> nmi_apply_alternatives can panic() rather than leaving these BUG()s here.
> OK, will rework the logic here so it's the caller that panics (or not)
> as necessary, and _apply_alternatives() always prints some error
> message.

Yes.  With alternatives now behind sensible checks in the livepatch
case, any failure here is relevant and wants printing.

>
>> As a tangent, the __must_check doesn't seem to have been applied to
>> nmi_apply_alternatives(), but I'd suggest dropping the attribute; I
>> don't think it adds much.
> I didn't see the value in adding the attribute to
> nmi_apply_alternatives(), as in that context _apply_alternatives()
> would unconditionally panic instead of returning an error code.

Ah, it was only apply_alternatives() you made __must_check, not
_apply_alternatives(), which is why there isn't a compile error in
nmi_apply_alternatives().

Still, I don't think the __must_check is much use.

>
>>> diff --git a/xen/test/livepatch/xen_alternatives_fail.c b/xen/test/livepatch/xen_alternatives_fail.c
>>> new file mode 100644
>>> index 000000000000..01d289095a31
>>> --- /dev/null
>>> +++ b/xen/test/livepatch/xen_alternatives_fail.c
>>> @@ -0,0 +1,29 @@
>>> +/*
>>> + * Copyright (c) 2024 Cloud Software Group.
>>> + *
>>> + */
>>> +
>>> +#include "config.h"
>>> +#include <xen/lib.h>
>>> +#include <xen/livepatch_payload.h>
>>> +
>>> +#include <asm/alternative.h>
>>> +#include <asm/cpuid.h>
>>> +
>>> +void test_alternatives(void)
>>> +{
>>> +    alternative("", "", NCAPINTS * 32);
>>> +}
>>> +
>>> +/* Set a hook so the loading logic in Xen don't consider the payload empty. */
>>> +LIVEPATCH_LOAD_HOOK(test_alternatives);
>>> +
>>> +/*
>>> + * Local variables:
>>> + * mode: C
>>> + * c-file-style: "BSD"
>>> + * c-basic-offset: 4
>>> + * tab-width: 4
>>> + * indent-tabs-mode: nil
>>> + * End:
>>> + */
>> The second half of the patch (new testcase) is all fine and good, but
>> should pass with patch 2 in place?  I'd suggest splitting it out.
> No, not really.  The Xen buildid for this patch will be correctly set
> to match the running one, but the alternatives feature CPUID is
> explicitly set to an out of range value (NCAPINTS * 32) to trigger the
> BUG_ON condition.

Ah yes.  Good point.

In which case it probably ought to stay in this patch.

> Further thinking about it, I think we should add a build time assert
> that the feature parameters in the alternative calls are smaller than
> NCAPINTS * 32.

A build check where?  It's quite hard to do in alternatives themselves.

~Andrew


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] xen/livepatch: do Xen build-id check earlier
  2024-09-23 11:04   ` Andrew Cooper
  2024-09-23 11:55     ` Andrew Cooper
@ 2024-09-23 16:02     ` Roger Pau Monné
  1 sibling, 0 replies; 20+ messages in thread
From: Roger Pau Monné @ 2024-09-23 16:02 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Ross Lagerwall

On Mon, Sep 23, 2024 at 12:04:30PM +0100, Andrew Cooper wrote:
> On 20/09/2024 10:36 am, Roger Pau Monne wrote:
> > The check against the expected Xen build ID should be done ahead of attempting
> > to apply the alternatives contained in the livepatch.
> >
> > If the CPUID in the alternatives patching data is out of the scope of the
> > running Xen featureset the BUG() in _apply_alternatives() will trigger thus
> > bringing the system down.  Note the layout of struct alt_instr could also
> > change between versions.  It's also possible for struct exception_table_entry
> > to have changed format, hence possibly leading to other errors.
> >
> > Move the Xen build ID check to be done ahead of any processing of the livepatch
> > payload sections.
> >
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> >  xen/common/livepatch.c | 9 +++++----
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c
> > index cea47ffe4c84..3e4fce036a1c 100644
> > --- a/xen/common/livepatch.c
> > +++ b/xen/common/livepatch.c
> > @@ -767,6 +767,11 @@ static int prepare_payload(struct payload *payload,
> >      if ( rc )
> >          return rc;
> >  
> > +    /* Perform the Xen build-id check ahead of doing any more processing. */
> > +    rc = xen_build_id_dep(payload);
> > +    if ( rc )
> > +        return rc;
> > +
> 
> While a step in the right direction, I think this needs to be moved far
> earlier.  Even here, it's behind the processing of the livepatch func
> state, which is something that can also change like alt_instr.
> 
> The buildid checks need to be as early as possible.  Looking through the
> logic (which doesn't have great names/splits), I'd say the buildid
> checks want to be between livepatch_elf_load() (which parses the
> structure of the ELF), and move_payload() (which starts copying it into
> place).
> 
> That would involve moving check_special_sections() too, but I think it's
> the right thing to do.

My plan would be to move check_special_sections() ahead and expand its
logic to also check that the expected buildid matches the running
hypervisor one.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2024-09-23 16:02 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-20  9:36 [PATCH 0/3] xen/livepatch: improvements to loading Roger Pau Monne
2024-09-20  9:36 ` [PATCH 1/3] xen/livepatch: simplify and unify logic in prepare_payload() Roger Pau Monne
2024-09-22  9:19   ` Andrew Cooper
2024-09-23  7:46     ` Roger Pau Monné
2024-09-23  9:43       ` Andrew Cooper
2024-09-23 11:00         ` Jan Beulich
2024-09-23 11:01   ` Jan Beulich
2024-09-23 13:10     ` Roger Pau Monné
2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do Xen build-id check earlier Roger Pau Monne
2024-09-23 11:04   ` Andrew Cooper
2024-09-23 11:55     ` Andrew Cooper
2024-09-23 16:02     ` Roger Pau Monné
2024-09-23 11:04   ` Jan Beulich
2024-09-20  9:36 ` [PATCH 2/3] xen/livepatch: do " Roger Pau Monne
2024-09-20  9:56   ` Roger Pau Monné
2024-09-20  9:36 ` [PATCH 3/3] x86/alternatives: relax apply BUGs during runtime Roger Pau Monne
2024-09-23 11:12   ` Jan Beulich
2024-09-23 11:17   ` Andrew Cooper
2024-09-23 13:06     ` Roger Pau Monné
2024-09-23 13:12       ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.