From mboxrd@z Thu Jan  1 00:00:00 1970
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Subject: Re: [PATCH v1 08/11] xsplice: Implement support for
 applying patches
Date: Thu, 5 Nov 2015 11:45:42 +0000
Message-ID: <563B4166.9000808@citrix.com>
References: <1446574568-9644-1-git-send-email-ross.lagerwall@citrix.com>
	<1446574568-9644-8-git-send-email-ross.lagerwall@citrix.com>
	<20151105031739.GE3949@x230.dumpdata.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
In-Reply-To: <20151105031739.GE3949@x230.dumpdata.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Kevin Tian <kevin.tian@intel.com>, Ian Campbell <ian.campbell@citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, xen-devel@lists.xen.org, Jan Beulich <jbeulich@suse.com>, Stefano Stabellini <stefano.stabellini@citrix.com>, Jun Nakajima <jun.nakajima@intel.com>, Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>, Boris Ostrovsky <boris.ostrovsky@oracle.com>, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
List-Id: xen-devel@lists.xenproject.org

On 11/05/2015 03:17 AM, Konrad Rzeszutek Wilk wrote:
snip
>> diff --git a/xen/arch/x86/xsplice.c b/xen/arch/x86/xsplice.c
>> index dbff0d5..31e4124 100644
>> --- a/xen/arch/x86/xsplice.c
>> +++ b/xen/arch/x86/xsplice.c
>> @@ -3,6 +3,25 @@
>>   #include <xen/xsplice_elf.h>
>>   #include <xen/xsplice.h>
>>
>> +#define PATCH_INSN_SIZE 5
>> +
>> +void xsplice_apply_jmp(struct xsplice_patch_func *func)
>
> Don't we want for it to be 'int'

Only if an error is expected.

>> +{
>> +    uint32_t val;
>> +    uint8_t *old_ptr;
>> +
>> +    old_ptr = (uint8_t *)func->old_addr;
>> +    memcpy(func->undo, old_ptr, PATCH_INSN_SIZE);
>
> And perhaps use something which can catch an exception (#GP) so that
> this can error out?

Why would this fail?

>> +    *old_ptr++ = 0xe9; /* Relative jump */
>> +    val = func->new_addr - func->old_addr - PATCH_INSN_SIZE;
>> +    memcpy(old_ptr, &val, sizeof val);
>> +}
>> +
>> +void xsplice_revert_jmp(struct xsplice_patch_func *func)
>> +{
>> +    memcpy((void *)func->old_addr, func->undo, PATCH_INSN_SIZE);
>> +}
>> +
>>   int xsplice_verify_elf(uint8_t *data, ssize_t len)
>>   {
>>
>> diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c
>> index 5e88c55..4476be5 100644
>> --- a/xen/common/xsplice.c
>> +++ b/xen/common/xsplice.c
>> @@ -11,16 +11,21 @@
>>   #include <xen/guest_access.h>
>>   #include <xen/stdbool.h>
>>   #include <xen/sched.h>
>> +#include <xen/softirq.h>
>>   #include <xen/lib.h>
>> +#include <xen/wait.h>
>>   #include <xen/xsplice_elf.h>
>>   #include <xen/xsplice.h>
>>   #include <public/sysctl.h>
>>
>>   #include <asm/event.h>
>> +#include <asm/nmi.h>
>>
>>   static DEFINE_SPINLOCK(payload_list_lock);
>>   static LIST_HEAD(payload_list);
>>
>> +static LIST_HEAD(applied_list);
>> +
>>   static unsigned int payload_cnt;
>>   static unsigned int payload_version = 1;
>>
>> @@ -29,15 +34,34 @@ struct payload {
>>       int32_t rc;         /* 0 or -EXX. */
>>
>>       struct list_head   list;   /* Linked to 'payload_list'. */
>> +    struct list_head   applied_list;   /* Linked to 'applied_list'. */
>>
>> +    struct xsplice_patch_func *funcs;
>> +    int nfuncs;
>
> unsigned int;
>
>>       void *module_address;
>>       size_t module_pages;
>>
>>       char  id[XEN_XSPLICE_NAME_SIZE + 1];          /* Name of it. */
>>   };
>>
>> +/* Defines an outstanding patching action. */
>> +struct xsplice_work
>> +{
>> +    atomic_t semaphore;          /* Used for rendezvous */
>> +    atomic_t irq_semaphore;      /* Used to signal all IRQs disabled */
>> +    struct payload *data;        /* The payload on which to act */
>> +    volatile bool_t do_work;     /* Signals work to do */
>> +    volatile bool_t ready;       /* Signals all CPUs synchronized */
>> +    uint32_t cmd;                /* Action request. XSPLICE_ACTION_* */
>
> Now since you have a pointer to 'data' can't you follow that for the
> cmd? Or at least the 'data->state'?

I moved cmd out of the payload and into xsplice_work since cmd is only 
needed when there is work to do.
data->state contains the current state of the payload (i.e. before the 
action has been performed) so it provides no indication of what command 
needs to be performed.

>
> Missing full stops.
>> +};
>> +
>> +static DEFINE_SPINLOCK(xsplice_work_lock);
>> +/* There can be only one outstanding patching action. */
>> +static struct xsplice_work xsplice_work;
>> +
>>   static int load_module(struct payload *payload, uint8_t *raw, ssize_t len);
>>   static void free_module(struct payload *payload);
>> +static int schedule_work(struct payload *data, uint32_t cmd);
>>
>>   static const char *state2str(int32_t state)
>>   {
>> @@ -341,28 +365,22 @@ static int xsplice_action(xen_sysctl_xsplice_action_t *action)
>>       case XSPLICE_ACTION_REVERT:
>>           if ( data->state == XSPLICE_STATE_APPLIED )
>>           {
>> -            /* No implementation yet. */
>> -            data->state = XSPLICE_STATE_CHECKED;
>> -            data->rc = 0;
>> -            rc = 0;
>> +            data->rc = -EAGAIN;
>> +            rc = schedule_work(data, action->cmd);
>>           }
>>           break;
>>       case XSPLICE_ACTION_APPLY:
>>           if ( (data->state == XSPLICE_STATE_CHECKED) )
>>           {
>> -            /* No implementation yet. */
>> -            data->state = XSPLICE_STATE_APPLIED;
>> -            data->rc = 0;
>> -            rc = 0;
>> +            data->rc = -EAGAIN;
>> +            rc = schedule_work(data, action->cmd);
>>           }
>>           break;
>>       case XSPLICE_ACTION_REPLACE:
>>           if ( data->state == XSPLICE_STATE_CHECKED )
>>           {
>> -            /* No implementation yet. */
>> -            data->state = XSPLICE_STATE_CHECKED;
>> -            data->rc = 0;
>> -            rc = 0;
>> +            data->rc = -EAGAIN;
>> +            rc = schedule_work(data, action->cmd);
>>           }
>>           break;
>>       default:
>> @@ -637,6 +655,24 @@ static int perform_relocs(struct xsplice_elf *elf)
>>       return 0;
>>   }
>>
>> +static int find_special_sections(struct payload *payload,
>> +                                 struct xsplice_elf *elf)
>> +{
>> +    struct xsplice_elf_sec *sec;
>> +
>> +    sec = xsplice_elf_sec_by_name(elf, ".xsplice.funcs");
>> +    if ( !sec )
>> +    {
>> +        printk(XENLOG_ERR ".xsplice.funcs is missing\n");
>> +        return -1;
>> +    }
>> +
>> +    payload->funcs = (struct xsplice_patch_func *)sec->load_addr;
>> +    payload->nfuncs = sec->sec->sh_size / (sizeof *payload->funcs);
>> +
>> +    return 0;
>> +}
>
> That looks like it should belong to another patch?

Why? The array of functions is specifically needed for applying a 
payload so the code belongs in this patch.

>> +
>>   static int load_module(struct payload *payload, uint8_t *raw, ssize_t len)
>>   {
>>       struct xsplice_elf elf;
>> @@ -662,6 +698,10 @@ static int load_module(struct payload *payload, uint8_t *raw, ssize_t len)
>>       if ( rc )
>>           goto err_module;
>>
>> +    rc = find_special_sections(payload, &elf);
>> +    if ( rc )
>> +        goto err_module;
>> +
>
> Ditto?
>>       return 0;
>>
>>    err_module:
>> @@ -672,6 +712,206 @@ static int load_module(struct payload *payload, uint8_t *raw, ssize_t len)
>>       return rc;
>>   }
>>
>> +
>> +/*
>> + * The following functions get the CPUs into an appropriate state and
>> + * apply (or revert) each of the module's functions.
>
> s/module/payload/
>
>> + */
>> +
>> +/*
>> + * This function is executed having all other CPUs with no stack and IRQs
>> + * disabled.
>
> Well, there is some stack. For example from 'cpu_idle' - you have the
> 'cpu_idle' on the stack.
>
>> + */
>> +static int apply_payload(struct payload *data)
>> +{
>> +    int i;
>
> unsigned int
>> +
>> +    printk(XENLOG_DEBUG "Applying payload: %s\n", data->id);
>> +
>> +    for ( i = 0; i < data->nfuncs; i++ )
>> +        xsplice_apply_jmp(data->funcs + i);
>
> And if this returns an error then we could skip adding
> it to the applied_list..

Only if an error is expected.

>> +
>
> Also the patching in Linux seems to do some icache purging.
> Should we use that?

I didn't see that when I looked for it. The alternatives patching in Xen 
doesn't purge the icache (afaict). We need feedback from an x86 
maintainer here.

>
>> +    list_add_tail(&data->applied_list, &applied_list);
>> +
>> +    return 0;
>> +}
>> +
>> +/*
>> + * This function is executed having all other CPUs with no stack and IRQs
>> + * disabled.
>> + */
>> +static int revert_payload(struct payload *data)
>> +{
>> +    int i;
>
> unsigned int i;
>> +
>> +    printk(XENLOG_DEBUG "Reverting payload: %s\n", data->id);
>> +
>> +    for ( i = 0; i < data->nfuncs; i++ )
>> +        xsplice_revert_jmp(data->funcs + i);
>> +
>> +    list_del(&data->applied_list);
>> +
>> +    return 0;
>> +}
>> +
>> +/* Must be holding the payload_list lock */
>
> Missing full stop.
>
> Should that lock be called something else now? (Because it is certainly
> not protecting the list anymore - but also the scheduling action).

Hmm...

>> +static int schedule_work(struct payload *data, uint32_t cmd)
>> +{
>> +    /* Fail if an operation is already scheduled */
>> +    if ( xsplice_work.do_work )
>> +        return -EAGAIN;
>> +
>
>> +    xsplice_work.cmd = cmd;
>> +    xsplice_work.data = data;
>> +    atomic_set(&xsplice_work.semaphore, 0);
>> +    atomic_set(&xsplice_work.irq_semaphore, 0);
>> +    xsplice_work.ready = false;
>> +    smp_mb();
>> +    xsplice_work.do_work = true;
>> +    smp_mb();
>
> So this is your 'GO GO' signal right? I think you may want
> to have 'smb_wmb()'

A full review of the memory barriers and synchronization is needed by 
someone more knowledgeable than me.

>> +
>> +    return 0;
>> +}
>> +
>
> /me laughs. What a way to 'fix' the NMI watchdog.

It comes directly from the alternatives code.

>
>> +static int mask_nmi_callback(const struct cpu_user_regs *regs, int cpu)
>> +{
>> +    return 1;
>> +}
>> +
>> +static void reschedule_fn(void *unused)
>> +{
>> +    smp_mb(); /* Synchronize with setting do_work */
>> +    raise_softirq(SCHEDULE_SOFTIRQ);
>> +}
>> +
>> +/*
>> + * The main function which manages the work of quiescing the system and
>> + * patching code.
>> + */
>> +void do_xsplice(void)
>> +{
>> +    int id;
>
> unsigned int id;
>> +    unsigned int total_cpus;
>> +    nmi_callback_t saved_nmi_callback;
>> +
>> +    /* Fast path: no work to do */
>
> Missing full stop.
>> +    if ( likely(!xsplice_work.do_work) )
>> +        return;
>> +
>> +    ASSERT(local_irq_is_enabled());
>> +
>> +    spin_lock(&xsplice_work_lock);
>> +    id = atomic_read(&xsplice_work.semaphore);
>> +    atomic_inc(&xsplice_work.semaphore);
>> +    spin_unlock(&xsplice_work_lock);
>
> Could you use 'atomic_inc_and_test' and then you can get
> rid of the spinlock.

OK.

>
>> +
>> +    total_cpus = num_online_cpus();
>
> Which could change across these invocations.. Perhaps
> during these calls we need to lock up CPU up/down code?

OK.

>
>> +
>> +    if ( id == 0 )
>> +    {
>
> Can you just make this its own function? Perhaps call it
> 'xsplice_do_single' or such?
>
>> +        s_time_t timeout, start;
>> +
>> +        /* Trigger other CPUs to execute do_xsplice */
>
> Missing full stop.
>> +        smp_call_function(reschedule_fn, NULL, 0);
>> +
>> +        /* Wait for other CPUs with a timeout */
>
> Missing full stop.
>> +        start = NOW();
>> +        timeout = start + MILLISECS(30);
>
> Nah. That should be gotten from the XSPLICE_ACTION_APPLY 'time'
> parameter - which has an 'timeout' in it.
>
>> +        while ( atomic_read(&xsplice_work.semaphore) != total_cpus &&
>> +                NOW() < timeout )
>> +            cpu_relax();
>> +
>> +        if ( atomic_read(&xsplice_work.semaphore) == total_cpus )
>> +        {
>> +            struct payload *data2;
>
> s/data2/data/ ?
>> +
>> +            /* "Mask" NMIs */
>> +            saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
>> +
>> +            /* All CPUs are waiting, now signal to disable IRQs */
>> +            xsplice_work.ready = true;
>> +            smp_mb();
>> +
>> +            /* Wait for irqs to be disabled */
>> +            while ( atomic_read(&xsplice_work.irq_semaphore) != (total_cpus - 1) )
>> +                cpu_relax();
>> +
>> +            local_irq_disable();
>> +            /* Now this function should be the only one on any stack.
>> +             * No need to lock the payload list or applied list. */
>> +            switch ( xsplice_work.cmd )
>> +            {
>> +                case XSPLICE_ACTION_APPLY:
>> +                        xsplice_work.data->rc = apply_payload(xsplice_work.data);
>> +                        if ( xsplice_work.data->rc == 0 )
>> +                            xsplice_work.data->state = XSPLICE_STATE_APPLIED;
>> +                        break;
>> +                case XSPLICE_ACTION_REVERT:
>> +                        xsplice_work.data->rc = revert_payload(xsplice_work.data);
>> +                        if ( xsplice_work.data->rc == 0 )
>> +                            xsplice_work.data->state = XSPLICE_STATE_CHECKED;
>> +                        break;
>> +                case XSPLICE_ACTION_REPLACE:
>> +                        list_for_each_entry ( data2, &payload_list, list )
>> +                        {
>> +                            if ( data2->state != XSPLICE_STATE_APPLIED )
>> +                                continue;
>> +
>> +                            data2->rc = revert_payload(data2);
>> +                            if ( data2->rc == 0 )
>> +                                data2->state = XSPLICE_STATE_CHECKED;
>> +                            else
>> +                            {
>> +                                xsplice_work.data->rc = -EINVAL;
>
> Why not copy the error code (from data2->rc?)

No. Reverting a different payload updates the error code for that 
payload. The payload to-be-applied has failed because a dependent action 
has failed. This is not the same as the original error. The original 
error is visible through data2->rc.

>> +                                break;
>> +                            }
>> +                        }
>> +                        if ( xsplice_work.data->rc != -EINVAL )
>
> And here you can just check for zero.

No, because xsplice_work.data->rc is originally -EAGAIN (in progress). I 
suppose I could check for xsplice_work.data->rc == -EAGAIN.

>> +                        {
>> +                            xsplice_work.data->rc = apply_payload(xsplice_work.data);
>> +                            if ( xsplice_work.data->rc == 0 )
>> +                                xsplice_work.data->state = XSPLICE_STATE_APPLIED;
>> +                        }
>> +                        break;
>> +                default:
>> +                        xsplice_work.data->rc = -EINVAL;
>> +                        break;
>> +            }
>> +
>> +            local_irq_enable();
>> +            set_nmi_callback(saved_nmi_callback);
>> +        }
>> +        else
>> +        {
>> +            xsplice_work.data->rc = -EBUSY;
>> +        }
>> +
>> +        xsplice_work.do_work = 0;
>> +        smp_mb(); /* Synchronize with waiting CPUs */
>
> Missing full stop.
>> +    }
>> +    else
>> +    {
>> +        /* Wait for all CPUs to rendezvous */
>
> Missing full stop
>> +        while ( xsplice_work.do_work && !xsplice_work.ready )
>> +        {
>> +            cpu_relax();
>> +            smp_mb();
>> +        }
>> +
>> +        /* Disable IRQs and signal */
>
> Missing full stop.
>> +        local_irq_disable();
>> +        atomic_inc(&xsplice_work.irq_semaphore);
>> +
>> +        /* Wait for patching to complete */
>
> Missing full stop.
>> +        while ( xsplice_work.do_work )
>> +        {
>> +            cpu_relax();
>> +            smp_mb();
>> +        }
>> +        local_irq_enable();
>> +    }
>> +}
>> +
>>   static int __init xsplice_init(void)
>>   {
>>       register_keyhandler('x', xsplice_printall, "print xsplicing info", 1);
>> diff --git a/xen/include/asm-arm/nmi.h b/xen/include/asm-arm/nmi.h
>> index a60587e..82aff35 100644
>> --- a/xen/include/asm-arm/nmi.h
>> +++ b/xen/include/asm-arm/nmi.h
>> @@ -4,6 +4,19 @@
>>   #define register_guest_nmi_callback(a)  (-ENOSYS)
>>   #define unregister_guest_nmi_callback() (-ENOSYS)
>>
>> +typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
>> +
>> +/**
>> + * set_nmi_callback
>> + *
>> + * Set a handler for an NMI. Only one handler may be
>> + * set. Return the old nmi callback handler.
>> + */
>> +static inline nmi_callback_t set_nmi_callback(nmi_callback_t callback)
>> +{
>> +    return NULL;
>> +}
>> +
>>   #endif /* ASM_NMI_H */
>>   /*
>>    * Local variables:
>> diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h
>> index a3946a3..507829c 100644
>> --- a/xen/include/xen/xsplice.h
>> +++ b/xen/include/xen/xsplice.h
>> @@ -11,7 +11,8 @@ struct xsplice_patch_func {
>>       unsigned long old_addr;
>>       unsigned long old_size;
>>       char *name;
>> -    unsigned char undo[8];
>> +    uint8_t undo[8];
>> +    uint8_t pad[56];
>
> This should be in a different patch. As part of the
> "xsplice: Implement payload loading"

Oops, that's what I intended.

-- 
Ross Lagerwall