From: Aubrey Li <aubrey.li@intel.com>
To: tglx@linutronix.de, mingo@redhat.com, peterz@infradead.org,
hpa@zytor.com
Cc: ak@linux.intel.com, tim.c.chen@linux.intel.com,
arjan@linux.intel.com, aubrey.li@intel.com,
linux-kernel@vger.kernel.org,
Aubrey Li <aubrey.li@linux.intel.com>
Subject: [RFC PATCH v1 1/2] x86/fpu: detect AVX task
Date: Wed, 7 Nov 2018 02:23:09 +0800 [thread overview]
Message-ID: <1541528590-30296-1-git-send-email-aubrey.li@intel.com> (raw)
XSAVES and its variants use init optimization to reduce the amount of
data that they save to memory during context switch. Init optimization
uses the state component bitmap to denote if a component is in its init
configuration. We use this information to detect if a task contains AVX
instructions.
Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
---
arch/x86/include/asm/fpu/internal.h | 97 +++++++++++++++++++++++++++----------
arch/x86/include/asm/fpu/types.h | 17 +++++++
2 files changed, 88 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a..5054a7d 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -74,6 +74,12 @@ static __always_inline __pure bool use_fxsr(void)
return static_cpu_has(X86_FEATURE_FXSR);
}
+static __always_inline __pure bool use_xgetbv1(void)
+{
+ return static_cpu_has(X86_FEATURE_OSXSAVE) &&
+ static_cpu_has(X86_FEATURE_XGETBV1);
+}
+
/*
* fpstate handling functions:
*/
@@ -103,6 +109,34 @@ static inline void fpstate_init_fxstate(struct fxregs_state *fx)
}
extern void fpstate_sanitize_xstate(struct fpu *fpu);
+/*
+ * MXCSR and XCR definitions:
+ */
+
+extern unsigned int mxcsr_feature_mask;
+
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+#define XINUSE_STATE_BITMAP_INDEX 0x00000001
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
+ : : "a" (eax), "d" (edx), "c" (index));
+}
+
#define user_insn(insn, output, input...) \
({ \
int err; \
@@ -275,6 +309,42 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
+#define AVX_STATE_DECAY_COUNT 3
+
+/*
+ * This function is called during context switch to update AVX component state
+ */
+static inline void update_avx_state(struct avx_state *avx)
+{
+ /*
+ * Check if XGETBV with ECX = 1 supported. XGETBV with ECX = 1
+ * returns the logical-AND of XCR0 and XINUSE. XINUSE is a bitmap
+ * by which the processor tracks the status of various components.
+ */
+ if (!use_xgetbv1()) {
+ avx->state = 0;
+ return;
+ }
+ /*
+ * XINUSE is dynamic to track component state because VZEROUPPER
+ * happens on every function end and reset the bitmap to the
+ * initial configuration.
+ *
+ * State decay is introduced to solve the race condition between
+ * context switch and a function end. State is aggressively set
+ * once it's detected but need to be cleared by decay 3 context
+ * switches
+ */
+ if (xgetbv(XINUSE_STATE_BITMAP_INDEX) & XFEATURE_MASK_Hi16_ZMM) {
+ avx->state = 1;
+ avx->decay_count = AVX_STATE_DECAY_COUNT;
+ } else {
+ if (!avx->decay_count)
+ avx->decay_count--;
+ else
+ avx->state = 0;
+ }
+}
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
@@ -411,6 +481,7 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
copy_xregs_to_kernel(&fpu->state.xsave);
+ update_avx_state(&fpu->avx);
return 1;
}
@@ -577,31 +648,5 @@ static inline void user_fpu_begin(void)
preempt_enable();
}
-/*
- * MXCSR and XCR definitions:
- */
-
-extern unsigned int mxcsr_feature_mask;
-
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
-
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
-}
#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c539..39d5bc2 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -274,6 +274,15 @@ union fpregs_state {
};
/*
+ * This is per task AVX state data structure that indicates
+ * whether the task uses AVX instructions.
+ */
+struct avx_state {
+ unsigned int state;
+ unsigned int decay_count;
+};
+
+/*
* Highest level per task FPU state data structure that
* contains the FPU register state plus various FPU
* state fields:
@@ -303,6 +312,14 @@ struct fpu {
unsigned char initialized;
/*
+ * @avx_state:
+ *
+ * This data structure indicates whether this context
+ * contains AVX states
+ */
+ struct avx_state avx;
+
+ /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
--
2.7.4
next reply other threads:[~2018-11-07 1:40 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-06 18:23 Aubrey Li [this message]
2018-11-06 18:23 ` [RFC PATCH v1 2/2] proc: add /proc/<pid>/thread_state Aubrey Li
2018-11-08 6:32 ` Ingo Molnar
2018-11-08 10:17 ` Peter Zijlstra
2018-11-08 14:03 ` Li, Aubrey
2018-11-12 5:31 ` Ingo Molnar
2018-11-12 7:53 ` Li, Aubrey
2018-11-12 8:56 ` Peter Zijlstra
2018-11-12 14:21 ` Arjan van de Ven
2018-11-07 17:41 ` [RFC PATCH v1 1/2] x86/fpu: detect AVX task Tim Chen
2018-11-08 0:31 ` Li, Aubrey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1541528590-30296-1-git-send-email-aubrey.li@intel.com \
--to=aubrey.li@intel.com \
--cc=ak@linux.intel.com \
--cc=arjan@linux.intel.com \
--cc=aubrey.li@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox