[PATCH RFC V5 1/5] xen: Emulate with no writes

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH RFC V5 1/5] xen: Emulate with no writes
@ 2014-08-06 15:58 Razvan Cojocaru
  2014-08-06 15:58 ` [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-06 15:58 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, JBeulich, eddie.dong, jun.nakajima

Added support for emulating an instruction with no memory writes.
Additionally, introduced hvm_emulate_one_full(), which inspects
possible return values from the hvm_emulate_one() functions
(EXCEPTION, UNHANDLEABLE) and acts on them.

Changes since V1:
 - Removed the Linux code that computes the length of an instruction.
 - Unused function parameters are no longer marked.
 - Refactored the code to eliminate redundancy.
 - Made the exception passed on to the guest by hvm_emulate_one_full()
   configurable.

Changes since V2:
 - Renamed hvmemul_write_dummy() to hvmemul_write_discard().
 - Fixed a comment (Xen coding style rules).
 - Renamed hvm_emulate_one_with_ops() to _hvm_emulate_one().
 - Changed stack variable hvm_emulate_ops_no_write into a static const.
 - Modified struct hvm_emulate_ctxt ctx initialization syntax.
 - Renamed unhandleable_trapnr to trapnr and unhandleable_errcode
   to errcode.
 - Changed errcode's type to unsigned int.
 - Removed hvm_emulate_one() loop that went on until it returned
   something other than X86EMUL_RETRY (to prevent potential
   blocking against the time calibration rendezvous).

Changes since V3:
 - The rep_ins, rep_movs and cmpxchg handlers are
   now also inactive.

Changes since V4:
 - Also discarding IO reads (dummy read_io handler).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/hvm/emulate.c        |  134 ++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/emulate.h |    5 ++
 2 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index eac159f..4f04536 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -688,6 +688,60 @@ static int hvmemul_write(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_write_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    /* Discarding the write. */
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_ins_discard(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_movs_discard(
+   enum x86_segment src_seg,
+   unsigned long src_offset,
+   enum x86_segment dst_seg,
+   unsigned long dst_offset,
+   unsigned int bytes_per_rep,
+   unsigned long *reps,
+   struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cmpxchg_discard(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_old,
+    void *p_new,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_io_discard(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1138,8 +1192,33 @@ static const struct x86_emulate_ops hvm_emulate_ops = {
     .invlpg        = hvmemul_invlpg
 };
 
-int hvm_emulate_one(
-    struct hvm_emulate_ctxt *hvmemul_ctxt)
+static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
+    .read          = hvmemul_read,
+    .insn_fetch    = hvmemul_insn_fetch,
+    .write         = hvmemul_write_discard,
+    .cmpxchg       = hvmemul_cmpxchg_discard,
+    .rep_ins       = hvmemul_rep_ins_discard,
+    .rep_outs      = hvmemul_rep_outs,
+    .rep_movs      = hvmemul_rep_movs_discard,
+    .read_segment  = hvmemul_read_segment,
+    .write_segment = hvmemul_write_segment,
+    .read_io       = hvmemul_read_io_discard,
+    .write_io      = hvmemul_write_io,
+    .read_cr       = hvmemul_read_cr,
+    .write_cr      = hvmemul_write_cr,
+    .read_msr      = hvmemul_read_msr,
+    .write_msr     = hvmemul_write_msr,
+    .wbinvd        = hvmemul_wbinvd,
+    .cpuid         = hvmemul_cpuid,
+    .inject_hw_exception = hvmemul_inject_hw_exception,
+    .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
+    .get_fpu       = hvmemul_get_fpu,
+    .put_fpu       = hvmemul_put_fpu,
+    .invlpg        = hvmemul_invlpg
+};
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+    const struct x86_emulate_ops *ops)
 {
     struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
     struct vcpu *curr = current;
@@ -1191,7 +1270,7 @@ int hvm_emulate_one(
     vio->mmio_retrying = vio->mmio_retry;
     vio->mmio_retry = 0;
 
-    rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+    rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
 
     if ( rc == X86EMUL_OKAY && vio->mmio_retry )
         rc = X86EMUL_RETRY;
@@ -1239,6 +1318,55 @@ int hvm_emulate_one(
     return X86EMUL_OKAY;
 }
 
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
+}
+
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
+}
+
+void hvm_emulate_one_full(bool_t nowrite, unsigned int trapnr,
+    unsigned int errcode)
+{
+    struct hvm_emulate_ctxt ctx = {{ 0 }};
+    int rc;
+
+    hvm_emulate_prepare(&ctx, guest_cpu_user_regs());
+
+    if ( nowrite )
+        rc = hvm_emulate_one_no_write(&ctx);
+    else
+        rc = hvm_emulate_one(&ctx);
+
+    switch ( rc )
+    {
+    case X86EMUL_UNHANDLEABLE:
+        gdprintk(XENLOG_DEBUG, "Emulation failed @ %04x:%lx: "
+               "%02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+               hvmemul_get_seg_reg(x86_seg_cs, &ctx)->sel,
+               ctx.insn_buf_eip,
+               ctx.insn_buf[0], ctx.insn_buf[1],
+               ctx.insn_buf[2], ctx.insn_buf[3],
+               ctx.insn_buf[4], ctx.insn_buf[5],
+               ctx.insn_buf[6], ctx.insn_buf[7],
+               ctx.insn_buf[8], ctx.insn_buf[9]);
+        hvm_inject_hw_exception(trapnr, errcode);
+        break;
+    case X86EMUL_EXCEPTION:
+        if ( ctx.exn_pending )
+            hvm_inject_hw_exception(ctx.exn_vector, ctx.exn_error_code);
+        /* fall through */
+    default:
+        hvm_emulate_writeback(&ctx);
+        break;
+    }
+}
+
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs)
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 00a06cc..a74f310 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -37,6 +37,11 @@ struct hvm_emulate_ctxt {
 
 int hvm_emulate_one(
     struct hvm_emulate_ctxt *hvmemul_ctxt);
+int hvm_emulate_one_no_write(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+void hvm_emulate_one_full(bool_t nowrite,
+    unsigned int trapnr,
+    unsigned int errcode);
 void hvm_emulate_prepare(
     struct hvm_emulate_ctxt *hvmemul_ctxt,
     struct cpu_user_regs *regs);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state
  2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
@ 2014-08-06 15:58 ` Razvan Cojocaru
  2014-08-08 14:27   ` Jan Beulich
  2014-08-06 15:58 ` [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events Razvan Cojocaru
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-06 15:58 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, JBeulich, eddie.dong, jun.nakajima

Speed optimization for introspection purposes: a handful of registers
are sent along with each mem_event. This requires enlargement of the
mem_event_request / mem_event_response stuctures, and additional code
to fill in relevant values. Since the EPT event processing code needs
more data than CR3 or MSR event processors, hvm_mem_event_fill_regs()
fills in less data than p2m_mem_event_fill_regs(), in order to avoid
overhead. Struct hvm_hw_cpu has been considered instead of the custom
struct mem_event_regs_st, but its size would cause quick filling up
of the mem_event ring buffer.

Changes since V1:
 - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st
   structure.
 - Removed superfluous preprocessor check for __x86_64__ in
   p2m_mem_event_fill_regs().

Changes since V2:
 - Removed inline function compiler suggestions.
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Padded struct mem_event_regs_st.
 - Renamed mem_event_regs_t to mem_event_regs.
 - Readability changes.

Changes since V3:
 - Changed the name of VCPU variables referring to the current
   VCPU to "curr".
 - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it
   explicit.

Changes since V4:
 - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and
   removed a typedef.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/hvm/hvm.c         |   33 +++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c          |   57 ++++++++++++++++++++++++++++++++++++++++
 xen/include/public/mem_event.h |   39 +++++++++++++++++++++++++++
 3 files changed, 129 insertions(+)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index e834406..77f4b36 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -6127,6 +6127,38 @@ int hvm_debug_op(struct vcpu *v, int32_t op)
     return rc;
 }
 
+static void hvm_mem_event_fill_regs(mem_event_request_t *req)
+{
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct vcpu *curr = current;
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.msr_efer = curr->arch.hvm_vcpu.guest_efer;
+    req->x86_regs.cr0 = curr->arch.hvm_vcpu.guest_cr[0];
+    req->x86_regs.cr3 = curr->arch.hvm_vcpu.guest_cr[3];
+    req->x86_regs.cr4 = curr->arch.hvm_vcpu.guest_cr[4];
+}
+
 static int hvm_memory_event_traps(long p, uint32_t reason,
                                   unsigned long value, unsigned long old, 
                                   bool_t gla_valid, unsigned long gla) 
@@ -6171,6 +6203,7 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
         req.gla = old;
     }
     
+    hvm_mem_event_fill_regs(&req);
     mem_event_put_request(d, &d->mem_event->access, &req);
     
     return 1;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index bca9f0f..069e869 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1323,6 +1323,61 @@ void p2m_mem_paging_resume(struct domain *d)
     }
 }
 
+static void p2m_mem_event_fill_regs(mem_event_request_t *req)
+{
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct segment_register seg;
+    struct hvm_hw_cpu ctxt;
+    struct vcpu *curr = current;
+
+    /* Architecture-specific vmcs/vmcb bits */
+    hvm_funcs.save_cpu_ctxt(curr, &ctxt);
+
+    req->x86_regs.rax = regs->eax;
+    req->x86_regs.rcx = regs->ecx;
+    req->x86_regs.rdx = regs->edx;
+    req->x86_regs.rbx = regs->ebx;
+    req->x86_regs.rsp = regs->esp;
+    req->x86_regs.rbp = regs->ebp;
+    req->x86_regs.rsi = regs->esi;
+    req->x86_regs.rdi = regs->edi;
+
+    req->x86_regs.r8  = regs->r8;
+    req->x86_regs.r9  = regs->r9;
+    req->x86_regs.r10 = regs->r10;
+    req->x86_regs.r11 = regs->r11;
+    req->x86_regs.r12 = regs->r12;
+    req->x86_regs.r13 = regs->r13;
+    req->x86_regs.r14 = regs->r14;
+    req->x86_regs.r15 = regs->r15;
+
+    req->x86_regs.rflags = regs->eflags;
+    req->x86_regs.rip    = regs->eip;
+
+    req->x86_regs.dr7 = curr->arch.debugreg[7];
+    req->x86_regs.cr0 = ctxt.cr0;
+    req->x86_regs.cr2 = ctxt.cr2;
+    req->x86_regs.cr3 = ctxt.cr3;
+    req->x86_regs.cr4 = ctxt.cr4;
+
+    req->x86_regs.sysenter_cs = ctxt.sysenter_cs;
+    req->x86_regs.sysenter_esp = ctxt.sysenter_esp;
+    req->x86_regs.sysenter_eip = ctxt.sysenter_eip;
+
+    req->x86_regs.msr_efer = ctxt.msr_efer;
+    req->x86_regs.msr_star = ctxt.msr_star;
+    req->x86_regs.msr_lstar = ctxt.msr_lstar;
+
+    hvm_get_segment_register(curr, x86_seg_fs, &seg);
+    req->x86_regs.fs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_gs, &seg);
+    req->x86_regs.gs_base = seg.base;
+
+    hvm_get_segment_register(curr, x86_seg_cs, &seg);
+    req->x86_regs.cs_arbytes = seg.attr.bytes;
+}
+
 bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, 
                           bool_t access_r, bool_t access_w, bool_t access_x,
                           mem_event_request_t **req_ptr)
@@ -1410,6 +1465,8 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
         req->access_x = access_x;
     
         req->vcpu_id = v->vcpu_id;
+
+        p2m_mem_event_fill_regs(req);
     }
 
     /* Pause the current VCPU */
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index 3831b41..bbd1636 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -48,6 +48,44 @@
 #define MEM_EVENT_REASON_MSR         7    /* MSR was hit: gfn is MSR value, gla is MSR address;
                                              does NOT honour HVMPME_onchangeonly */
 
+/* Using a custom struct (not hvm_hw_cpu) so as to not fill
+ * the mem_event ring buffer too quickly. */
+struct mem_event_regs_x86 {
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbx;
+    uint64_t rsp;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+    uint64_t rflags;
+    uint64_t dr7;
+    uint64_t rip;
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+    uint64_t sysenter_cs;
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+    uint64_t msr_efer;
+    uint64_t msr_star;
+    uint64_t msr_lstar;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint32_t cs_arbytes;
+    uint32_t _pad;
+};
+
 typedef struct mem_event_st {
     uint32_t flags;
     uint32_t vcpu_id;
@@ -65,6 +103,7 @@ typedef struct mem_event_st {
     uint16_t available:12;
 
     uint16_t reason;
+    struct mem_event_regs_x86 x86_regs;
 } mem_event_request_t, mem_event_response_t;
 
 DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events
  2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-06 15:58 ` [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
@ 2014-08-06 15:58 ` Razvan Cojocaru
  2014-08-08 14:34   ` Jan Beulich
  2014-08-06 15:58 ` [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-06 15:58 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, JBeulich, eddie.dong, jun.nakajima

Vmx_disable_intercept_for_msr() will now refuse to disable interception of
MSRs needed for memory introspection. It is not possible to gate this on
mem_access being active for the domain, since by the time mem_access does
become active the interception for the interesting MSRs has already been
disabled (vmx_disable_intercept_for_msr() runs very early on).

Changes since V1:
 - Replaced printk() with gdprintk(XENLOG_DEBUG, ...).

Changes since V2:
 - Split a log line differently to keep it grepable.
 - Interception for relevant MSRs will be disabled only if
   mem_access is not enabled.
 - Since they end up being disabled early on (when mem_access
   is not enabled yet), re-enable interception when mem_access
   becomes active.

Changes since V3:
 - Removed log line stating that MSR interception cannot be disabled.
 - Removed superfluous #include <asm/hvm/vmx/vmcs.h>.
 - Moved VMX-specific code to vmx.c (as a new hvm_funcs member).

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/hvm/vmx/vmcs.c   |   20 ++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c    |   17 +++++++++++++++++
 xen/arch/x86/mm/mem_event.c   |    3 +++
 xen/include/asm-x86/hvm/hvm.h |    2 ++
 4 files changed, 42 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 8ffc562..2703c58 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -39,6 +39,7 @@
 #include <xen/keyhandler.h>
 #include <asm/shadow.h>
 #include <asm/tboot.h>
+#include <asm/mem_event.h>
 
 static bool_t __read_mostly opt_vpid_enabled = 1;
 boolean_param("vpid", opt_vpid_enabled);
@@ -695,11 +696,30 @@ static void vmx_set_host_env(struct vcpu *v)
 void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
 {
     unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+    struct domain *d = v->domain;
 
     /* VMX MSR bitmap supported? */
     if ( msr_bitmap == NULL )
         return;
 
+    if ( mem_event_check_ring(&d->mem_event->access) )
+    {
+        /* Filter out MSR-s needed for memory introspection */
+        switch ( msr )
+        {
+        case MSR_IA32_SYSENTER_EIP:
+        case MSR_IA32_SYSENTER_ESP:
+        case MSR_IA32_SYSENTER_CS:
+        case MSR_IA32_MC0_CTL:
+        case MSR_STAR:
+        case MSR_LSTAR:
+            return;
+
+        default:
+            break;
+        }
+    }
+
     /*
      * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
      * have the write-low and read-high bitmap offsets the wrong way round.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2caa04a..dfb0c95 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1682,6 +1682,22 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
         *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
 }
 
+static void vmx_enable_intro_msr_interception(struct domain *d)
+{
+    struct vcpu *v;
+
+    /* Enable interception for MSRs needed for memory introspection. */
+    for_each_vcpu ( d, v )
+    {
+        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, MSR_IA32_MC0_CTL, MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, MSR_STAR, MSR_TYPE_W);
+        vmx_enable_intercept_for_msr(v, MSR_LSTAR, MSR_TYPE_W);
+    }
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1740,6 +1756,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .handle_eoi           = vmx_handle_eoi,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
+    .enable_intro_msr_interception = vmx_enable_intro_msr_interception,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index ba7e71e..d5959d9 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -600,6 +600,9 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
                                     HVM_PARAM_ACCESS_RING_PFN,
                                     mem_access_notification);
+
+            if ( rc == 0 && hvm_funcs.enable_intro_msr_interception )
+                hvm_funcs.enable_intro_msr_interception(d);
         }
         break;
 
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0ebd478..2a87e9b 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -205,6 +205,8 @@ struct hvm_function_table {
     void (*hypervisor_cpuid_leaf)(uint32_t sub_idx,
                                   uint32_t *eax, uint32_t *ebx,
                                   uint32_t *ecx, uint32_t *edx);
+
+    void (*enable_intro_msr_interception)(struct domain *d);
 };
 
 extern struct hvm_function_table hvm_funcs;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
  2014-08-06 15:58 ` [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
  2014-08-06 15:58 ` [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events Razvan Cojocaru
@ 2014-08-06 15:58 ` Razvan Cojocaru
  2014-08-07  7:22   ` Razvan Cojocaru
  2014-08-08 14:48   ` Jan Beulich
  2014-08-06 15:58 ` [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
  2014-08-08 14:25 ` [PATCH RFC V5 1/5] xen: Emulate with no writes Jan Beulich
  4 siblings, 2 replies; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-06 15:58 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, JBeulich, eddie.dong, jun.nakajima

Added new XEN_DOMCTL_set_pagefault_info hypercall, used by libxc's
new xc_domain_set_pagefault_info() function to set per-domain page
fault injection information. All a call does is set per-domain info,
and nothing actually happens until VMENTRY time, and then only if
all conditions are met (the guest is in user mode, the set value
matches CR3, and there are no other pending traps).
This mechanism allows bringing in swapped-out pages for inspection.

Changes since V2:
 - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
 - Now checking SS.DPL instead of CS.DPL to see if the guest is in
   user mode.
 - Removed superfluous fault_info members initialization.
 - Now returning -EINVAL if !has_hvm_container_domain(d) on
   XEN_DOMCTL_set_pagefault_info.
 - Moved struct fault_info from struct domain to struct hvm_domain.
 - Collapsed explicit initialization of the fault_info members into a
   one-line assignment on the tools/libxc side.
 - Now using write_access as a proper bool instead of shifting and
   OR-ing it with PFEC_user_mode.

Changes since V3:
 - Now checking that is_hvm_domain(d) in do_domctl() (consistent with
   the test in the actual page fault injection code).
 - XEN_DOMCTL_set_pagefault_info now extends the proper range.
 - Added a likely() around the
   d->arch.hvm_domain.fault_info.virtual_address == 0 check.
 - Replaced hvm_funcs.save_cpu_ctxt() function call with lighter code.
 - Split the vmx.c page fault injection function into a check
   function and an inject function.

Changes since V4:
 - Added "valid" flag instead of relying on virtual address 0 as a
   special case.
 - Using #defines instead of magic constants in vmx_vmcs_save().
 - Padded struct xen_domctl_set_pagefault_info.
 - Coding style and readability changes.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 tools/libxc/xc_domain.c          |   16 +++++++++++
 tools/libxc/xenctrl.h            |    4 +++
 xen/arch/x86/hvm/vmx/vmx.c       |   56 ++++++++++++++++++++++++++++++++++++++
 xen/common/domctl.c              |   28 +++++++++++++++++++
 xen/include/asm-x86/hvm/domain.h |    8 ++++++
 xen/include/public/domctl.h      |   15 ++++++++++
 6 files changed, 127 insertions(+)

diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 0230c6c..49f0c61 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -506,6 +506,22 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
     return ret;
 }
 
+int xc_domain_set_pagefault_info(xc_interface *xch,
+                                 uint32_t domid,
+                                 xen_domctl_set_pagefault_info_t *info)
+{
+    DECLARE_DOMCTL;
+
+    if (info == NULL)
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_pagefault_info;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_pagefault_info = *info;
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_vcpu_getcontext(xc_interface *xch,
                        uint32_t domid,
                        uint32_t vcpu,
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 5beb846..a913f10 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -803,6 +803,10 @@ int xc_domain_hvm_setcontext(xc_interface *xch,
 const char *xc_domain_get_native_protocol(xc_interface *xch,
                                           uint32_t domid);
 
+int xc_domain_set_pagefault_info(xc_interface *xch,
+                                 uint32_t domid,
+                                 xen_domctl_set_pagefault_info_t *info);
+
 /**
  * This function returns information about the execution context of a
  * particular vcpu of a domain.
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index dfb0c95..99bdf11 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3114,6 +3114,59 @@ out:
         nvmx_idtv_handling();
 }
 
+static bool_t vmx_check_pf_injection(void)
+{
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
+    struct segment_register seg;
+    unsigned long ev;
+    uint32_t pending_event = 0;
+
+    if ( !is_hvm_domain(d) ||
+         likely(d->arch.hvm_domain.fault_info.valid == 0) )
+        return 0;
+
+    hvm_get_segment_register(curr, x86_seg_ss, &seg);
+
+    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
+        return 0;
+
+
+    if ( curr->arch.hvm_vcpu.guest_cr[3]
+         != d->arch.hvm_domain.fault_info.address_space )
+        return 0;
+
+    vmx_vmcs_enter(curr);
+    __vmread(VM_ENTRY_INTR_INFO, &ev);
+
+    if ( (ev & INTR_INFO_VALID_MASK) &&
+         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
+                                     ev & INTR_INFO_VECTOR_MASK) )
+        pending_event = ev;
+
+    vmx_vmcs_exit(curr);
+    return (pending_event == 0);
+}
+
+static void vmx_inject_pf(void)
+{
+    struct vcpu *curr = current;
+    struct domain *d = curr->domain;
+    int errcode = PFEC_user_mode;
+    uint64_t virtual_address = d->arch.hvm_domain.fault_info.virtual_address;
+    uint32_t write_access = d->arch.hvm_domain.fault_info.write_access;
+
+    d->arch.hvm_domain.fault_info.address_space = 0;
+    d->arch.hvm_domain.fault_info.virtual_address = 0;
+    d->arch.hvm_domain.fault_info.write_access = 0;
+    d->arch.hvm_domain.fault_info.valid = 0;
+
+    if ( write_access )
+        errcode |= PFEC_write_access;
+
+    hvm_inject_page_fault(errcode, virtual_address);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3154,6 +3207,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(need_flush) )
         vpid_sync_all();
 
+    if ( vmx_check_pf_injection() )
+        vmx_inject_pf();
+
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index c326aba..a86a6b7 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -967,6 +967,34 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     }
     break;
 
+    case XEN_DOMCTL_set_pagefault_info:
+    {
+        struct domain *d;
+
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(op->domain);
+        if ( d != NULL )
+        {
+            ret = -EINVAL;
+
+            if ( is_hvm_domain(d) )
+            {
+                d->arch.hvm_domain.fault_info.address_space =
+                    op->u.set_pagefault_info.address_space;
+                d->arch.hvm_domain.fault_info.virtual_address =
+                    op->u.set_pagefault_info.virtual_address;
+                d->arch.hvm_domain.fault_info.write_access =
+                    op->u.set_pagefault_info.write_access;
+                d->arch.hvm_domain.fault_info.valid = 1;
+
+                ret = 0;
+            }
+
+            rcu_unlock_domain(d);
+        }
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, d, u_domctl);
         break;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 291a2e0..b3c65a7 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -141,6 +141,14 @@ struct hvm_domain {
      */
     uint64_t sync_tsc;
 
+    /* Memory introspection page fault injection data. */
+    struct {
+        uint64_t address_space;
+        uint64_t virtual_address;
+        uint32_t write_access;
+        bool_t valid;
+    } fault_info;
+
     union {
         struct vmx_domain vmx;
         struct svm_domain svm;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..21bdb82 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -936,6 +936,19 @@ typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
 #endif
 
+/* XEN_DOMCTL_set_pagefault_info requests that a page fault occur at
+ * the next VMENTRY.
+ */
+struct xen_domctl_set_pagefault_info {
+    uint64_t address_space;
+    uint64_t virtual_address;
+    uint32_t write_access;
+    uint32_t _pad;
+};
+typedef struct xen_domctl_set_pagefault_info xen_domctl_set_pagefault_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_pagefault_info_t);
+
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -1008,6 +1021,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_get_vcpu_msrs                 72
 #define XEN_DOMCTL_set_vcpu_msrs                 73
+#define XEN_DOMCTL_set_pagefault_info            74
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1068,6 +1082,7 @@ struct xen_domctl {
         struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_set_pagefault_info set_pagefault_info;
         uint8_t                             pad[128];
     } u;
 };
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
                   ` (2 preceding siblings ...)
  2014-08-06 15:58 ` [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-06 15:58 ` Razvan Cojocaru
  2014-08-08 14:54   ` Jan Beulich
  2014-08-08 14:25 ` [PATCH RFC V5 1/5] xen: Emulate with no writes Jan Beulich
  4 siblings, 1 reply; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-06 15:58 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, Razvan Cojocaru, stefano.stabellini,
	andrew.cooper3, ian.jackson, JBeulich, eddie.dong, jun.nakajima

In a scenario where a page fault that triggered a mem_event occured,
p2m_mem_access_check() will now be able to either 1) emulate the
current instruction, or 2) emulate it, but don't allow it to perform
any writes.

Changes since V1:
 - Removed the 'skip' code which required computing the current
   instruction length.
 - Removed the set_ad_bits() code that attempted to modify the
   'accessed' and 'dirty' bits for instructions that the emulator
   can't handle at the moment.

Changes since V2:
 - Moved the __vmread(EXIT_QUALIFICATION, &exit_qualification); code
   in vmx.c, accessible via hvm_funcs.
 - Incorporated changes by Andrew Cooper ("[PATCH 1/2] Xen/mem_event:
   Validate the response vcpu_id before acting on it."

Changes since V3:
 - Collapsed verbose lines into a single "else if()".
 - Changed an int to bool_t.
 - Fixed a minor coding style issue.
 - Now computing the first parameter to hvm_emulate_one_full()
   (replaced an "if" with a single call).
 - Added code comments about eip and gla reset (clarity issue).
 - Removed duplicate code by Andrew Cooper (introduced in V2,
   since committed).

Changes since V4:
 - Renamed "exited_by_pagefault" to "exited_by_nested_pagefault".
 - Folded two long lines.
 - Combined two "if" statements.
 - Moved "violation = 0;" to the XENMEM_access_rwx case.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 xen/arch/x86/domain.c          |    3 ++
 xen/arch/x86/hvm/vmx/vmx.c     |   13 +++++++
 xen/arch/x86/mm/p2m.c          |   84 ++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/domain.h   |    9 +++++
 xen/include/asm-x86/hvm/hvm.h  |    2 +
 xen/include/public/mem_event.h |   12 +++---
 6 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f7e0e78..7b1dfe6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -415,6 +415,9 @@ int vcpu_initialise(struct vcpu *v)
 
     v->arch.flags = TF_kernel_mode;
 
+    /* By default, do not emulate */
+    v->arch.mem_event.emulate_flags = 0;
+
     rc = mapcache_vcpu_init(v);
     if ( rc )
         return rc;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 99bdf11..c4e9310 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1698,6 +1698,18 @@ static void vmx_enable_intro_msr_interception(struct domain *d)
     }
 }
 
+static bool_t vmx_exited_by_nested_pagefault(void)
+{
+    unsigned long exit_qualification;
+
+    __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
+        return 0;
+
+    return 1;
+}
+
 static struct hvm_function_table __initdata vmx_function_table = {
     .name                 = "VMX",
     .cpu_up_prepare       = vmx_cpu_up_prepare,
@@ -1757,6 +1769,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
     .enable_intro_msr_interception = vmx_enable_intro_msr_interception,
+    .exited_by_nested_pagefault  = vmx_exited_by_nested_pagefault,
 };
 
 const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 069e869..345b320 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1391,6 +1391,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
     p2m_access_t p2ma;
     mem_event_request_t *req;
     int rc;
+    unsigned long eip = guest_cpu_user_regs()->eip;
 
     /* First, handle rx2rw conversion automatically.
      * These calls to p2m->set_entry() must succeed: we have the gfn
@@ -1443,6 +1444,36 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
             return 1;
         }
     }
+    else if ( hvm_funcs.exited_by_nested_pagefault &&
+        !hvm_funcs.exited_by_nested_pagefault() &&
+        v->arch.mem_event.emulate_flags == 0 ) /* don't send a mem_event */
+    {
+        v->arch.mem_event.emulate_flags = MEM_EVENT_FLAG_EMULATE;
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    /* The previous mem_event reply does not match the current state. */
+    if ( v->arch.mem_event.gpa != gpa || v->arch.mem_event.eip != eip )
+    {
+        /* Don't emulate the current instruction, send a new mem_event. */
+        v->arch.mem_event.emulate_flags = 0;
+
+        /* Make sure to mark the current state to match it again against
+         * the new mem_event about to be sent. */
+        v->arch.mem_event.gpa = gpa;
+        v->arch.mem_event.eip = eip;
+    }
+
+    if ( v->arch.mem_event.emulate_flags )
+    {
+        hvm_emulate_one_full((v->arch.mem_event.emulate_flags &
+                              MEM_EVENT_FLAG_EMULATE_NOWRITE) != 0,
+                             TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
+
+        v->arch.mem_event.emulate_flags = 0;
+        return 1;
+    }
 
     *req_ptr = NULL;
     req = xzalloc(mem_event_request_t);
@@ -1495,6 +1526,59 @@ void p2m_mem_access_resume(struct domain *d)
 
         v = d->vcpu[rsp.vcpu_id];
 
+        /* Mark vcpu for skipping one instruction upon rescheduling */
+        if ( rsp.flags & MEM_EVENT_FLAG_EMULATE )
+        {
+            xenmem_access_t access;
+            bool_t violation = 1;
+
+            v->arch.mem_event.emulate_flags = 0;
+
+            if ( p2m_get_mem_access(d, rsp.gfn, &access) == 0 )
+            {
+                switch ( access )
+                {
+                case XENMEM_access_n:
+                case XENMEM_access_n2rwx:
+                default:
+                    violation = rsp.access_r || rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_r:
+                    violation = rsp.access_w || rsp.access_x;
+                    break;
+
+                case XENMEM_access_w:
+                    violation = rsp.access_r || rsp.access_x;
+                    break;
+
+                case XENMEM_access_x:
+                    violation = rsp.access_r || rsp.access_w;
+                    break;
+
+                case XENMEM_access_rx:
+                case XENMEM_access_rx2rw:
+                    violation = rsp.access_w;
+                    break;
+
+                case XENMEM_access_wx:
+                    violation = rsp.access_r;
+                    break;
+
+                case XENMEM_access_rw:
+                    violation = rsp.access_x;
+                    break;
+
+                case XENMEM_access_rwx:
+                    violation = 0;
+                    break;
+                }
+            }
+
+            if ( violation )
+                v->arch.mem_event.emulate_flags = rsp.flags;
+        }
+
         /* Unpause domain */
         if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
             mem_event_vcpu_unpause(v);
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 83329ed..440aa81 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -458,6 +458,15 @@ struct arch_vcpu
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
+
+    /* Should we emulate the next matching instruction on VCPU resume
+     * after a mem_event? */
+    struct {
+        uint32_t emulate_flags;
+        unsigned long gpa;
+        unsigned long eip;
+    } mem_event;
+
 } __cacheline_aligned;
 
 smap_check_policy_t smap_policy_change(struct vcpu *v,
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 2a87e9b..abc36dd 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -207,6 +207,8 @@ struct hvm_function_table {
                                   uint32_t *ecx, uint32_t *edx);
 
     void (*enable_intro_msr_interception)(struct domain *d);
+
+    bool_t (*exited_by_nested_pagefault)(void);
 };
 
 extern struct hvm_function_table hvm_funcs;
diff --git a/xen/include/public/mem_event.h b/xen/include/public/mem_event.h
index bbd1636..49f1b84 100644
--- a/xen/include/public/mem_event.h
+++ b/xen/include/public/mem_event.h
@@ -31,11 +31,13 @@
 #include "io/ring.h"
 
 /* Memory event flags */
-#define MEM_EVENT_FLAG_VCPU_PAUSED  (1 << 0)
-#define MEM_EVENT_FLAG_DROP_PAGE    (1 << 1)
-#define MEM_EVENT_FLAG_EVICT_FAIL   (1 << 2)
-#define MEM_EVENT_FLAG_FOREIGN      (1 << 3)
-#define MEM_EVENT_FLAG_DUMMY        (1 << 4)
+#define MEM_EVENT_FLAG_VCPU_PAUSED     (1 << 0)
+#define MEM_EVENT_FLAG_DROP_PAGE       (1 << 1)
+#define MEM_EVENT_FLAG_EVICT_FAIL      (1 << 2)
+#define MEM_EVENT_FLAG_FOREIGN         (1 << 3)
+#define MEM_EVENT_FLAG_DUMMY           (1 << 4)
+#define MEM_EVENT_FLAG_EMULATE         (1 << 5)
+#define MEM_EVENT_FLAG_EMULATE_NOWRITE (1 << 6)
 
 /* Reasons for the memory event request */
 #define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-06 15:58 ` [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
@ 2014-08-07  7:22   ` Razvan Cojocaru
  2014-08-08 14:48   ` Jan Beulich
  1 sibling, 0 replies; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-07  7:22 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, ian.campbell, stefano.stabellini, jun.nakajima,
	andrew.cooper3, ian.jackson, eddie.dong, JBeulich

On 08/06/2014 06:58 PM, Razvan Cojocaru wrote:
> Added new XEN_DOMCTL_set_pagefault_info hypercall, used by libxc's
> new xc_domain_set_pagefault_info() function to set per-domain page
> fault injection information. All a call does is set per-domain info,
> and nothing actually happens until VMENTRY time, and then only if
> all conditions are met (the guest is in user mode, the set value
> matches CR3, and there are no other pending traps).
> This mechanism allows bringing in swapped-out pages for inspection.
> 
> Changes since V2:
>  - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
>  - Now checking SS.DPL instead of CS.DPL to see if the guest is in
>    user mode.
>  - Removed superfluous fault_info members initialization.
>  - Now returning -EINVAL if !has_hvm_container_domain(d) on
>    XEN_DOMCTL_set_pagefault_info.
>  - Moved struct fault_info from struct domain to struct hvm_domain.
>  - Collapsed explicit initialization of the fault_info members into a
>    one-line assignment on the tools/libxc side.
>  - Now using write_access as a proper bool instead of shifting and
>    OR-ing it with PFEC_user_mode.
> 
> Changes since V3:
>  - Now checking that is_hvm_domain(d) in do_domctl() (consistent with
>    the test in the actual page fault injection code).
>  - XEN_DOMCTL_set_pagefault_info now extends the proper range.
>  - Added a likely() around the
>    d->arch.hvm_domain.fault_info.virtual_address == 0 check.
>  - Replaced hvm_funcs.save_cpu_ctxt() function call with lighter code.
>  - Split the vmx.c page fault injection function into a check
>    function and an inject function.
> 
> Changes since V4:
>  - Added "valid" flag instead of relying on virtual address 0 as a
>    special case.
>  - Using #defines instead of magic constants in vmx_vmcs_save().
>  - Padded struct xen_domctl_set_pagefault_info.
>  - Coding style and readability changes.
> 
> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

Sorry, forgot the

Acked-by: Ian Campbell <ian.campbell@citrix.com>

on this one.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 1/5] xen: Emulate with no writes
  2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
                   ` (3 preceding siblings ...)
  2014-08-06 15:58 ` [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
@ 2014-08-08 14:25 ` Jan Beulich
  2014-08-08 14:33   ` Razvan Cojocaru
  4 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 14:25 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
> Changes since V3:
>  - The rep_ins, rep_movs and cmpxchg handlers are
>    now also inactive.
> 
> Changes since V4:
>  - Also discarding IO reads (dummy read_io handler).

So you still allow I/O writes (individual and string)? Pretty odd I
would say.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state
  2014-08-06 15:58 ` [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
@ 2014-08-08 14:27   ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 14:27 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
> Speed optimization for introspection purposes: a handful of registers
> are sent along with each mem_event. This requires enlargement of the
> mem_event_request / mem_event_response stuctures, and additional code
> to fill in relevant values. Since the EPT event processing code needs
> more data than CR3 or MSR event processors, hvm_mem_event_fill_regs()
> fills in less data than p2m_mem_event_fill_regs(), in order to avoid
> overhead. Struct hvm_hw_cpu has been considered instead of the custom
> struct mem_event_regs_st, but its size would cause quick filling up
> of the mem_event ring buffer.
> 
> Changes since V1:
>  - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st
>    structure.
>  - Removed superfluous preprocessor check for __x86_64__ in
>    p2m_mem_event_fill_regs().
> 
> Changes since V2:
>  - Removed inline function compiler suggestions.
>  - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)).
>  - Padded struct mem_event_regs_st.
>  - Renamed mem_event_regs_t to mem_event_regs.
>  - Readability changes.
> 
> Changes since V3:
>  - Changed the name of VCPU variables referring to the current
>    VCPU to "curr".
>  - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it
>    explicit.
> 
> Changes since V4:
>  - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and
>    removed a typedef.

Looks pretty okay to me now; the one desire left would be for you to
make all the new code you add const-correct.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 1/5] xen: Emulate with no writes
  2014-08-08 14:25 ` [PATCH RFC V5 1/5] xen: Emulate with no writes Jan Beulich
@ 2014-08-08 14:33   ` Razvan Cojocaru
  2014-08-08 15:08     ` Jan Beulich
  0 siblings, 1 reply; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-08 14:33 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/08/2014 05:25 PM, Jan Beulich wrote:
>>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
>> Changes since V3:
>>  - The rep_ins, rep_movs and cmpxchg handlers are
>>    now also inactive.
>>
>> Changes since V4:
>>  - Also discarding IO reads (dummy read_io handler).
> 
> So you still allow I/O writes (individual and string)? Pretty odd I
> would say.

Thanks for the comment! I did indeed miss that one, I'll add a dummy
write_io handler as well.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events
  2014-08-06 15:58 ` [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events Razvan Cojocaru
@ 2014-08-08 14:34   ` Jan Beulich
  2014-08-08 14:47     ` Razvan Cojocaru
  2014-08-11  8:57     ` Razvan Cojocaru
  0 siblings, 2 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 14:34 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
> @@ -695,11 +696,30 @@ static void vmx_set_host_env(struct vcpu *v)
>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
>  {
>      unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
> +    struct domain *d = v->domain;
>  
>      /* VMX MSR bitmap supported? */
>      if ( msr_bitmap == NULL )
>          return;
>  
> +    if ( mem_event_check_ring(&d->mem_event->access) )
> +    {
> +        /* Filter out MSR-s needed for memory introspection */

I continue to be unconvinced that this code block's surrounding
conditional is as precise as possible: Your introspection code
surely isn't the only mem-event based mechanism. Yet you'd
impact guests in all other cases too.

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1682,6 +1682,22 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
>          *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
>  }
>  
> +static void vmx_enable_intro_msr_interception(struct domain *d)

The "intro" in the name is surely odd: For one, it implies that _only_
introspection might be interested in doing this. And then it may
(without reading the comments inside the function) well be an
abbreviation for something else, e.g. "introduction".

> +{
> +    struct vcpu *v;
> +
> +    /* Enable interception for MSRs needed for memory introspection. */
> +    for_each_vcpu ( d, v )
> +    {
> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_W);
> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_W);
> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_W);
> +        vmx_enable_intercept_for_msr(v, MSR_IA32_MC0_CTL, MSR_TYPE_W);
> +        vmx_enable_intercept_for_msr(v, MSR_STAR, MSR_TYPE_W);
> +        vmx_enable_intercept_for_msr(v, MSR_LSTAR, MSR_TYPE_W);

I also wonder whether the redundant enumeration of all these
MSRs couldn't be abstracted to just a single place.

> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -600,6 +600,9 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>              rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
>                                      HVM_PARAM_ACCESS_RING_PFN,
>                                      mem_access_notification);
> +
> +            if ( rc == 0 && hvm_funcs.enable_intro_msr_interception )
> +                hvm_funcs.enable_intro_msr_interception(d);

Isn't the sequence of operations wrong here (leaving a window in
time where mem events are already enabled but the necessary MSRs
aren't being intercepted yet? Or was it that guests are being paused
while all this takes place?

Plus of course the same remark regarding the insufficient conditional
as above applies here.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events
  2014-08-08 14:34   ` Jan Beulich
@ 2014-08-08 14:47     ` Razvan Cojocaru
  2014-08-08 15:11       ` Jan Beulich
  2014-08-11  8:57     ` Razvan Cojocaru
  1 sibling, 1 reply; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-08 14:47 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/08/2014 05:34 PM, Jan Beulich wrote:
>>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
>> @@ -695,11 +696,30 @@ static void vmx_set_host_env(struct vcpu *v)
>>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
>>  {
>>      unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
>> +    struct domain *d = v->domain;
>>  
>>      /* VMX MSR bitmap supported? */
>>      if ( msr_bitmap == NULL )
>>          return;
>>  
>> +    if ( mem_event_check_ring(&d->mem_event->access) )
>> +    {
>> +        /* Filter out MSR-s needed for memory introspection */
> 
> I continue to be unconvinced that this code block's surrounding
> conditional is as precise as possible: Your introspection code
> surely isn't the only mem-event based mechanism. Yet you'd
> impact guests in all other cases too.

I agree, however I can't think of a way to be more specific without
introducing a special new parameter / bit when enabling mem_access.
If you feel that that would not be a problem, I'll add one.

>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -1682,6 +1682,22 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
>>          *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
>>  }
>>  
>> +static void vmx_enable_intro_msr_interception(struct domain *d)
> 
> The "intro" in the name is surely odd: For one, it implies that _only_
> introspection might be interested in doing this. And then it may
> (without reading the comments inside the function) well be an
> abbreviation for something else, e.g. "introduction".

It's no problem to either drop "intro" or expand it into
"introspection". Would one be preferable to the other?

>> +{
>> +    struct vcpu *v;
>> +
>> +    /* Enable interception for MSRs needed for memory introspection. */
>> +    for_each_vcpu ( d, v )
>> +    {
>> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_W);
>> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_W);
>> +        vmx_enable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_W);
>> +        vmx_enable_intercept_for_msr(v, MSR_IA32_MC0_CTL, MSR_TYPE_W);
>> +        vmx_enable_intercept_for_msr(v, MSR_STAR, MSR_TYPE_W);
>> +        vmx_enable_intercept_for_msr(v, MSR_LSTAR, MSR_TYPE_W);
> 
> I also wonder whether the redundant enumeration of all these
> MSRs couldn't be abstracted to just a single place.

I'll add them to a const array and iterate through that.

>> --- a/xen/arch/x86/mm/mem_event.c
>> +++ b/xen/arch/x86/mm/mem_event.c
>> @@ -600,6 +600,9 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>>              rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
>>                                      HVM_PARAM_ACCESS_RING_PFN,
>>                                      mem_access_notification);
>> +
>> +            if ( rc == 0 && hvm_funcs.enable_intro_msr_interception )
>> +                hvm_funcs.enable_intro_msr_interception(d);
> 
> Isn't the sequence of operations wrong here (leaving a window in
> time where mem events are already enabled but the necessary MSRs
> aren't being intercepted yet? Or was it that guests are being paused
> while all this takes place?

The guest is paused, but that's a fair point. I'll look into it.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-06 15:58 ` [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
  2014-08-07  7:22   ` Razvan Cojocaru
@ 2014-08-08 14:48   ` Jan Beulich
  2014-08-08 14:55     ` Razvan Cojocaru
  2014-08-08 21:45     ` Andrei LUTAS
  1 sibling, 2 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 14:48 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3114,6 +3114,59 @@ out:
>          nvmx_idtv_handling();
>  }
>  
> +static bool_t vmx_check_pf_injection(void)
> +{
> +    struct vcpu *curr = current;
> +    struct domain *d = curr->domain;

currd please.

> +    struct segment_register seg;
> +    unsigned long ev;
> +    uint32_t pending_event = 0;
> +
> +    if ( !is_hvm_domain(d) ||
> +         likely(d->arch.hvm_domain.fault_info.valid == 0) )

The "valid" field being a boolean one, please use ! instead of == 0.

> +        return 0;
> +
> +    hvm_get_segment_register(curr, x86_seg_ss, &seg);

You can directly use the VMX variant here I think, or abstract out the
generic parts.

> +
> +    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
> +        return 0;
> +
> +

Double blank line.Didn't I ask you to clean up your patches in this
regard already?

> +    if ( curr->arch.hvm_vcpu.guest_cr[3]
> +         != d->arch.hvm_domain.fault_info.address_space )

Do you really mean to compare the full CR3 value here, rather than
just bits 12...51? In which case the address_space field likely would
better be a GPFN.

> +        return 0;
> +
> +    vmx_vmcs_enter(curr);
> +    __vmread(VM_ENTRY_INTR_INFO, &ev);
> +
> +    if ( (ev & INTR_INFO_VALID_MASK) &&
> +         hvm_event_needs_reinjection(MASK_EXTR(ev, INTR_INFO_INTR_TYPE_MASK),
> +                                     ev & INTR_INFO_VECTOR_MASK) )
> +        pending_event = ev;
> +
> +    vmx_vmcs_exit(curr);

If you pulled this up (I don't think it needs to be here), it would
perhaps indeed be worth abstracting out the non-VMX bits from
here to a generic function.

> +    return (pending_event == 0);

Pointless parentheses.

> +}
> +
> +static void vmx_inject_pf(void)
> +{
> +    struct vcpu *curr = current;
> +    struct domain *d = curr->domain;
> +    int errcode = PFEC_user_mode;
> +    uint64_t virtual_address = d->arch.hvm_domain.fault_info.virtual_address;
> +    uint32_t write_access = d->arch.hvm_domain.fault_info.write_access;
> +
> +    d->arch.hvm_domain.fault_info.address_space = 0;
> +    d->arch.hvm_domain.fault_info.virtual_address = 0;
> +    d->arch.hvm_domain.fault_info.write_access = 0;

Are these necessary? Because ...

> +    d->arch.hvm_domain.fault_info.valid = 0;

... I would hope that this one is properly guarding all uses of the
other fields.

> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -967,6 +967,34 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>      }
>      break;
>  
> +    case XEN_DOMCTL_set_pagefault_info:
> +    {
> +        struct domain *d;

There already is a suitable variable available throughout the entire
function.

> +
> +        ret = -ESRCH;
> +        d = rcu_lock_domain_by_id(op->domain);

And this is already being done at the function level too. If you're
porting patches from older versions, please pay attention to
changes in context other than just those immediately visible
during patch application.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply
  2014-08-06 15:58 ` [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
@ 2014-08-08 14:54   ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 14:54 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1698,6 +1698,18 @@ static void vmx_enable_intro_msr_interception(struct domain *d)
>      }
>  }
>  
> +static bool_t vmx_exited_by_nested_pagefault(void)
> +{
> +    unsigned long exit_qualification;
> +
> +    __vmread(EXIT_QUALIFICATION, &exit_qualification);
> +
> +    if ( (exit_qualification & EPT_GLA_FAULT) == 0 )
> +        return 0;
> +
> +    return 1;
> +}

I think this and associated code will become unnecessary with the
changes Tamas is proposing.

> @@ -1443,6 +1444,36 @@ bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla,
>              return 1;
>          }
>      }
> +    else if ( hvm_funcs.exited_by_nested_pagefault &&
> +        !hvm_funcs.exited_by_nested_pagefault() &&
> +        v->arch.mem_event.emulate_flags == 0 ) /* don't send a mem_event */

Indentation. Also the condition again strikes me as odd: If these really
are supposed to be to &&s, then the last condition should be checked
earlier (as being the cheaper check).

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-08 14:48   ` Jan Beulich
@ 2014-08-08 14:55     ` Razvan Cojocaru
  2014-08-08 21:45     ` Andrei LUTAS
  1 sibling, 0 replies; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-08 14:55 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/08/2014 05:48 PM, Jan Beulich wrote:
>> +    if ( seg.attr.fields.dpl != 3 ) /* Guest is not in user mode */
>> +        return 0;
>> +
>> +
> 
> Double blank line.Didn't I ask you to clean up your patches in this
> regard already?

Not me, but point taken. :)

>> +    if ( curr->arch.hvm_vcpu.guest_cr[3]
>> +         != d->arch.hvm_domain.fault_info.address_space )
> 
> Do you really mean to compare the full CR3 value here, rather than
> just bits 12...51? In which case the address_space field likely would
> better be a GPFN.

You're right, I'll compare the values shifted by PAGE_SHIFT.

>> +}
>> +
>> +static void vmx_inject_pf(void)
>> +{
>> +    struct vcpu *curr = current;
>> +    struct domain *d = curr->domain;
>> +    int errcode = PFEC_user_mode;
>> +    uint64_t virtual_address = d->arch.hvm_domain.fault_info.virtual_address;
>> +    uint32_t write_access = d->arch.hvm_domain.fault_info.write_access;
>> +
>> +    d->arch.hvm_domain.fault_info.address_space = 0;
>> +    d->arch.hvm_domain.fault_info.virtual_address = 0;
>> +    d->arch.hvm_domain.fault_info.write_access = 0;
> 
> Are these necessary? Because ...
> 
>> +    d->arch.hvm_domain.fault_info.valid = 0;
> 
> ... I would hope that this one is properly guarding all uses of the
> other fields.

No, they're not necessary anymore. Thank you for pointing that out.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 1/5] xen: Emulate with no writes
  2014-08-08 14:33   ` Razvan Cojocaru
@ 2014-08-08 15:08     ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 15:08 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 08.08.14 at 16:33, <rcojocaru@bitdefender.com> wrote:
> On 08/08/2014 05:25 PM, Jan Beulich wrote:
>>>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
>>> Changes since V3:
>>>  - The rep_ins, rep_movs and cmpxchg handlers are
>>>    now also inactive.
>>>
>>> Changes since V4:
>>>  - Also discarding IO reads (dummy read_io handler).
>> 
>> So you still allow I/O writes (individual and string)? Pretty odd I
>> would say.
> 
> Thanks for the comment! I did indeed miss that one, I'll add a dummy
> write_io handler as well.

And one for outs I suppose.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events
  2014-08-08 14:47     ` Razvan Cojocaru
@ 2014-08-08 15:11       ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2014-08-08 15:11 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

>>> On 08.08.14 at 16:47, <rcojocaru@bitdefender.com> wrote:
> On 08/08/2014 05:34 PM, Jan Beulich wrote:
>>>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
>>> @@ -695,11 +696,30 @@ static void vmx_set_host_env(struct vcpu *v)
>>>  void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type)
>>>  {
>>>      unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
>>> +    struct domain *d = v->domain;
>>>  
>>>      /* VMX MSR bitmap supported? */
>>>      if ( msr_bitmap == NULL )
>>>          return;
>>>  
>>> +    if ( mem_event_check_ring(&d->mem_event->access) )
>>> +    {
>>> +        /* Filter out MSR-s needed for memory introspection */
>> 
>> I continue to be unconvinced that this code block's surrounding
>> conditional is as precise as possible: Your introspection code
>> surely isn't the only mem-event based mechanism. Yet you'd
>> impact guests in all other cases too.
> 
> I agree, however I can't think of a way to be more specific without
> introducing a special new parameter / bit when enabling mem_access.
> If you feel that that would not be a problem, I'll add one.

I don't think it would cause a problem, but the mm/ maintainers
would be a better contact in that regard.

>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -1682,6 +1682,22 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx,
>>>          *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
>>>  }
>>>  
>>> +static void vmx_enable_intro_msr_interception(struct domain *d)
>> 
>> The "intro" in the name is surely odd: For one, it implies that _only_
>> introspection might be interested in doing this. And then it may
>> (without reading the comments inside the function) well be an
>> abbreviation for something else, e.g. "introduction".
> 
> It's no problem to either drop "intro" or expand it into
> "introspection". Would one be preferable to the other?

Neither seems very desirable. While I can suggest one, I think a
more generic term (collectively applicable to the group of MSRs)
would be needed.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc
  2014-08-08 14:48   ` Jan Beulich
  2014-08-08 14:55     ` Razvan Cojocaru
@ 2014-08-08 21:45     ` Andrei LUTAS
  1 sibling, 0 replies; 18+ messages in thread
From: Andrei LUTAS @ 2014-08-08 21:45 UTC (permalink / raw)
  To: Jan Beulich, Razvan Cojocaru
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 8/8/2014 5:48 PM, Jan Beulich wrote:
> Do you really mean to compare the full CR3 value here, rather than
> just bits 12...51? In which case the address_space field likely would
> better be a GPFN.
Hello,

That is a good point, with the additional observation that only bits
31..5 should be tested when in PAE mode and of course, 31..12 when
PAE and LM are both disabled.

Best regards,
Andrei.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events
  2014-08-08 14:34   ` Jan Beulich
  2014-08-08 14:47     ` Razvan Cojocaru
@ 2014-08-11  8:57     ` Razvan Cojocaru
  1 sibling, 0 replies; 18+ messages in thread
From: Razvan Cojocaru @ 2014-08-11  8:57 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, ian.campbell, stefano.stabellini, andrew.cooper3,
	eddie.dong, xen-devel, jun.nakajima, ian.jackson

On 08/08/2014 05:34 PM, Jan Beulich wrote:
>>>> On 06.08.14 at 17:58, <rcojocaru@bitdefender.com> wrote:
>> --- a/xen/arch/x86/mm/mem_event.c
>> +++ b/xen/arch/x86/mm/mem_event.c
>> @@ -600,6 +600,9 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>>              rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
>>                                      HVM_PARAM_ACCESS_RING_PFN,
>>                                      mem_access_notification);
>> +
>> +            if ( rc == 0 && hvm_funcs.enable_intro_msr_interception )
>> +                hvm_funcs.enable_intro_msr_interception(d);
> 
> Isn't the sequence of operations wrong here (leaving a window in
> time where mem events are already enabled but the necessary MSRs
> aren't being intercepted yet? Or was it that guests are being paused
> while all this takes place?

I've looked into how this is implemented on the libxc side, and it would
appear that xc_mem_event_enable() (in xc_mem_event.c) does explicitly
pause the domain before calling xc_mem_event_control() (which ends up in
the code above), so it should not be a problem.


Thanks,
Razvan Cojocaru

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2014-08-11  8:57 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-06 15:58 [PATCH RFC V5 1/5] xen: Emulate with no writes Razvan Cojocaru
2014-08-06 15:58 ` [PATCH RFC V5 2/5] xen: Optimize introspection access to guest state Razvan Cojocaru
2014-08-08 14:27   ` Jan Beulich
2014-08-06 15:58 ` [PATCH RFC V5 3/5] xen: Force-enable relevant MSR events; optimize the number of sent MSR events Razvan Cojocaru
2014-08-08 14:34   ` Jan Beulich
2014-08-08 14:47     ` Razvan Cojocaru
2014-08-08 15:11       ` Jan Beulich
2014-08-11  8:57     ` Razvan Cojocaru
2014-08-06 15:58 ` [PATCH RFC V5 4/5] xen, libxc: Request page fault injection via libxc Razvan Cojocaru
2014-08-07  7:22   ` Razvan Cojocaru
2014-08-08 14:48   ` Jan Beulich
2014-08-08 14:55     ` Razvan Cojocaru
2014-08-08 21:45     ` Andrei LUTAS
2014-08-06 15:58 ` [PATCH RFC V5 5/5] xen: Handle resumed instruction based on previous mem_event reply Razvan Cojocaru
2014-08-08 14:54   ` Jan Beulich
2014-08-08 14:25 ` [PATCH RFC V5 1/5] xen: Emulate with no writes Jan Beulich
2014-08-08 14:33   ` Razvan Cojocaru
2014-08-08 15:08     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.