* Re: [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
@ 2014-03-20 17:02 ` Thomas Monjalon
2014-03-20 18:04 ` Neil Horman
` (4 subsequent siblings)
5 siblings, 0 replies; 21+ messages in thread
From: Thomas Monjalon @ 2014-03-20 17:02 UTC (permalink / raw)
To: Neil Horman, H. Peter Anvin; +Cc: dev-VfR2kkLFssw, H. Peter Anvin
Hi,
20/03/2014 12:39, Neil Horman :
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> >
> > However, the code is really way more complex than it needs to be. For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly. Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> >
> > Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
> > Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
>
> Acked-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
This is a RFC UNTESTED patch.
So should I understand with this acknowledgement that you have tested it?
As a shared library? in 32-bit mode?
--
Thomas
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [RFC UNTESTED PATCH] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
2014-03-20 17:02 ` Thomas Monjalon
@ 2014-03-20 18:04 ` Neil Horman
[not found] ` <20140320180443.GE7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
2014-03-21 14:49 ` [PATCH v2] " Neil Horman
` (3 subsequent siblings)
5 siblings, 1 reply; 21+ messages in thread
From: Neil Horman @ 2014-03-20 18:04 UTC (permalink / raw)
To: H. Peter Anvin; +Cc: dev-VfR2kkLFssw, H. Peter Anvin
On Thu, Mar 20, 2014 at 12:39:21PM -0400, Neil Horman wrote:
> On Thu, Mar 20, 2014 at 08:53:50AM -0700, H. Peter Anvin wrote:
> > Neil Horman reported that on x86-64 the upper half of %rbx would get
> > clobbered when the code was compiled PIC or PIE, because the
> > i386-specific code to preserve %ebx was incorrectly compiled.
> >
> > However, the code is really way more complex than it needs to be. For
> > one thing, the CPUID instruction only needs %eax (leaf) and %ecx
> > (subleaf) as parameters, and since we are testing for bits, we might
> > as well list the bits explicitly. Furthermore, we can use an array
> > rather than doing a switch statement inside a structure.
> >
> > Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
> > Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> Acked-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
>
Sorry, I'm just acking the proposed change, I've not tested it yet, though based
on our conversation, this is the right thing to do. I'll have test reports
shortly.
Neil
> > ---
> > lib/librte_eal/common/eal_common_cpuflags.c | 272 +++++++++++++---------------
> > 1 file changed, 121 insertions(+), 151 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
> > index 1ebf78cc2a48..bf66ad9d94ec 100644
> > --- a/lib/librte_eal/common/eal_common_cpuflags.c
> > +++ b/lib/librte_eal/common/eal_common_cpuflags.c
> > @@ -54,21 +54,12 @@
> > */
> > enum cpu_register_t {
> > REG_EAX = 0,
> > - REG_EBX,
> > REG_ECX,
> > REG_EDX,
> > + REG_EBX,
> > };
> >
> > -/**
> > - * Parameters for CPUID instruction
> > - */
> > -struct cpuid_parameters_t {
> > - uint32_t eax;
> > - uint32_t ebx;
> > - uint32_t ecx;
> > - uint32_t edx;
> > - enum cpu_register_t return_register;
> > -};
> > +typedef uint32_t cpuid_registers_t[4];
> >
> > #define CPU_FLAG_NAME_MAX_LEN 64
> >
> > @@ -78,8 +69,10 @@ struct cpuid_parameters_t {
> > struct feature_entry {
> > enum rte_cpu_flag_t feature; /**< feature name */
> > char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
> > - struct cpuid_parameters_t params; /**< cpuid parameters */
> > - uint32_t feature_mask; /**< bitmask for feature */
> > + uint32_t leaf; /**< cpuid leaf */
> > + uint32_t subleaf; /**< cpuid subleaf */
> > + uint32_t reg; /**< cpuid register */
> > + uint32_t bit; /**< cpuid register bit */
> > };
> >
> > #define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
> > @@ -88,97 +81,97 @@ struct feature_entry {
> > * An array that holds feature entries
> > */
> > static const struct feature_entry cpu_feature_table[] = {
> > - {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
> > - {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
> > - {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
> > - {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
> > - {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
> > - {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
> > - {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
> > - {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
> > - {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
> > - {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
> > - {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
> > - {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
> > - {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
> > - {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
> > - {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
> > - {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
> > - {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
> > - {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
> > - {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
> > - {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
> > - {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
> > - {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
> > - {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
> > - {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
> > - {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
> > - {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
> > - {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
> > - {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
> > - {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
> > -
> > - {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
> > - {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
> > - {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
> > - {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
> > - {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
> > - {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
> > - {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
> > - {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
> > - {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
> > - {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
> > - {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
> > - {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
> > - {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
> > - {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
> > - {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
> > - {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
> > - {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
> > - {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
> > - {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
> > - {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
> > - {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
> > - {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
> > - {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
> > - {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
> > - {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
> > - {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
> > - {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
> > - {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
> > - {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
> > -
> > - {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
> > - {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
> > - {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
> > - {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
> > - {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
> > - {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
> > -
> > - {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
> > - {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
> > - {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
> > -
> > - {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
> > - {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
> > - {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
> > - {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
> > - {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
> > - {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
> > - {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
> > - {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
> > - {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
> > -
> > - {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
> > - {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
> > -
> > - {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
> > - {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
> > - {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
> > - {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
> > - {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
> > -
> > - {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
> > + {FEAT_DEF(SSE3), 0x00000001, 0, REG_ECX, 0},
> > + {FEAT_DEF(PCLMULQDQ), 0x00000001, 0, REG_ECX, 1},
> > + {FEAT_DEF(DTES64), 0x00000001, 0, REG_ECX, 2},
> > + {FEAT_DEF(MONITOR), 0x00000001, 0, REG_ECX, 3},
> > + {FEAT_DEF(DS_CPL), 0x00000001, 0, REG_ECX, 4},
> > + {FEAT_DEF(VMX), 0x00000001, 0, REG_ECX, 5},
> > + {FEAT_DEF(SMX), 0x00000001, 0, REG_ECX, 6},
> > + {FEAT_DEF(EIST), 0x00000001, 0, REG_ECX, 7},
> > + {FEAT_DEF(TM2), 0x00000001, 0, REG_ECX, 8},
> > + {FEAT_DEF(SSSE3), 0x00000001, 0, REG_ECX, 9},
> > + {FEAT_DEF(CNXT_ID), 0x00000001, 0, REG_ECX, 10},
> > + {FEAT_DEF(FMA), 0x00000001, 0, REG_ECX, 12},
> > + {FEAT_DEF(CMPXCHG16B), 0x00000001, 0, REG_ECX, 13},
> > + {FEAT_DEF(XTPR), 0x00000001, 0, REG_ECX, 14},
> > + {FEAT_DEF(PDCM), 0x00000001, 0, REG_ECX, 15},
> > + {FEAT_DEF(PCID), 0x00000001, 0, REG_ECX, 17},
> > + {FEAT_DEF(DCA), 0x00000001, 0, REG_ECX, 18},
> > + {FEAT_DEF(SSE4_1), 0x00000001, 0, REG_ECX, 19},
> > + {FEAT_DEF(SSE4_2), 0x00000001, 0, REG_ECX, 20},
> > + {FEAT_DEF(X2APIC), 0x00000001, 0, REG_ECX, 21},
> > + {FEAT_DEF(MOVBE), 0x00000001, 0, REG_ECX, 22},
> > + {FEAT_DEF(POPCNT), 0x00000001, 0, REG_ECX, 23},
> > + {FEAT_DEF(TSC_DEADLINE), 0x00000001, 0, REG_ECX, 24},
> > + {FEAT_DEF(AES), 0x00000001, 0, REG_ECX, 25},
> > + {FEAT_DEF(XSAVE), 0x00000001, 0, REG_ECX, 26},
> > + {FEAT_DEF(OSXSAVE), 0x00000001, 0, REG_ECX, 27},
> > + {FEAT_DEF(AVX), 0x00000001, 0, REG_ECX, 28},
> > + {FEAT_DEF(F16C), 0x00000001, 0, REG_ECX, 29},
> > + {FEAT_DEF(RDRAND), 0x00000001, 0, REG_ECX, 30},
> > +
> > + {FEAT_DEF(FPU), 0x00000001, 0, REG_EDX, 0},
> > + {FEAT_DEF(VME), 0x00000001, 0, REG_EDX, 1},
> > + {FEAT_DEF(DE), 0x00000001, 0, REG_EDX, 2},
> > + {FEAT_DEF(PSE), 0x00000001, 0, REG_EDX, 3},
> > + {FEAT_DEF(TSC), 0x00000001, 0, REG_EDX, 4},
> > + {FEAT_DEF(MSR), 0x00000001, 0, REG_EDX, 5},
> > + {FEAT_DEF(PAE), 0x00000001, 0, REG_EDX, 6},
> > + {FEAT_DEF(MCE), 0x00000001, 0, REG_EDX, 7},
> > + {FEAT_DEF(CX8), 0x00000001, 0, REG_EDX, 8},
> > + {FEAT_DEF(APIC), 0x00000001, 0, REG_EDX, 9},
> > + {FEAT_DEF(SEP), 0x00000001, 0, REG_EDX, 11},
> > + {FEAT_DEF(MTRR), 0x00000001, 0, REG_EDX, 12},
> > + {FEAT_DEF(PGE), 0x00000001, 0, REG_EDX, 13},
> > + {FEAT_DEF(MCA), 0x00000001, 0, REG_EDX, 14},
> > + {FEAT_DEF(CMOV), 0x00000001, 0, REG_EDX, 15},
> > + {FEAT_DEF(PAT), 0x00000001, 0, REG_EDX, 16},
> > + {FEAT_DEF(PSE36), 0x00000001, 0, REG_EDX, 17},
> > + {FEAT_DEF(PSN), 0x00000001, 0, REG_EDX, 18},
> > + {FEAT_DEF(CLFSH), 0x00000001, 0, REG_EDX, 19},
> > + {FEAT_DEF(DS), 0x00000001, 0, REG_EDX, 21},
> > + {FEAT_DEF(ACPI), 0x00000001, 0, REG_EDX, 22},
> > + {FEAT_DEF(MMX), 0x00000001, 0, REG_EDX, 23},
> > + {FEAT_DEF(FXSR), 0x00000001, 0, REG_EDX, 24},
> > + {FEAT_DEF(SSE), 0x00000001, 0, REG_EDX, 25},
> > + {FEAT_DEF(SSE2), 0x00000001, 0, REG_EDX, 26},
> > + {FEAT_DEF(SS), 0x00000001, 0, REG_EDX, 27},
> > + {FEAT_DEF(HTT), 0x00000001, 0, REG_EDX, 28},
> > + {FEAT_DEF(TM), 0x00000001, 0, REG_EDX, 29},
> > + {FEAT_DEF(PBE), 0x00000001, 0, REG_EDX, 31},
> > +
> > + {FEAT_DEF(DIGTEMP), 0x00000006, 0, REG_EAX, 0},
> > + {FEAT_DEF(TRBOBST), 0x00000006, 0, REG_EAX, 1},
> > + {FEAT_DEF(ARAT), 0x00000006, 0, REG_EAX, 2},
> > + {FEAT_DEF(PLN), 0x00000006, 0, REG_EAX, 4},
> > + {FEAT_DEF(ECMD), 0x00000006, 0, REG_EAX, 5},
> > + {FEAT_DEF(PTM), 0x00000006, 0, REG_EAX, 6},
> > +
> > + {FEAT_DEF(MPERF_APERF_MSR), 0x00000006, 0, REG_ECX, 0},
> > + {FEAT_DEF(ACNT2), 0x00000006, 0, REG_ECX, 1},
> > + {FEAT_DEF(ENERGY_EFF), 0x00000006, 0, REG_ECX, 3},
> > +
> > + {FEAT_DEF(FSGSBASE), 0x00000007, 0, REG_EBX, 0},
> > + {FEAT_DEF(BMI1), 0x00000007, 0, REG_EBX, 2},
> > + {FEAT_DEF(HLE), 0x00000007, 0, REG_EBX, 4},
> > + {FEAT_DEF(AVX2), 0x00000007, 0, REG_EBX, 5},
> > + {FEAT_DEF(SMEP), 0x00000007, 0, REG_EBX, 6},
> > + {FEAT_DEF(BMI2), 0x00000007, 0, REG_EBX, 7},
> > + {FEAT_DEF(ERMS), 0x00000007, 0, REG_EBX, 8},
> > + {FEAT_DEF(INVPCID), 0x00000007, 0, REG_EBX, 10},
> > + {FEAT_DEF(RTM), 0x00000007, 0, REG_EBX, 11},
> > +
> > + {FEAT_DEF(LAHF_SAHF), 0x80000001, 0, REG_ECX, 0},
> > + {FEAT_DEF(LZCNT), 0x80000001, 0, REG_ECX, 4},
> > +
> > + {FEAT_DEF(SYSCALL), 0x80000001, 0, REG_EDX, 11},
> > + {FEAT_DEF(XD), 0x80000001, 0, REG_EDX, 20},
> > + {FEAT_DEF(1GB_PG), 0x80000001, 0, REG_EDX, 26},
> > + {FEAT_DEF(RDTSCP), 0x80000001, 0, REG_EDX, 27},
> > + {FEAT_DEF(EM64T), 0x80000001, 0, REG_EDX, 29},
> > +
> > + {FEAT_DEF(INVTSC), 0x80000007, 0, REG_EDX, 8},
> > };
> >
> > /*
> > @@ -187,51 +180,25 @@ static const struct feature_entry cpu_feature_table[] = {
> > * This function, when compiled with GCC, will generate architecture-neutral
> > * code, as per GCC manual.
> > */
> > -static inline int
> > -rte_cpu_get_features(struct cpuid_parameters_t params)
> > +static inline void
> > +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t *out)
> > {
> > - int eax, ebx, ecx, edx; /* registers */
> > -
> > -#ifndef __PIC__
> > - asm volatile ("cpuid"
> > - /* output */
> > - : "=a" (eax),
> > - "=b" (ebx),
> > - "=c" (ecx),
> > - "=d" (edx)
> > - /* input */
> > - : "a" (params.eax),
> > - "b" (params.ebx),
> > - "c" (params.ecx),
> > - "d" (params.edx));
> > +#if defined(__i386__) && defined(__PIC__)
> > + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
> > + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
> > + : "=r" (out[REG_EBX]),
> > + "=a" (out[REG_EAX]),
> > + "=c" (out[REG_ECX]),
> > + "=d" (out[REG_EDX])
> > + : "a" (leaf), "c" (subleaf));
> > #else
> > - asm volatile (
> > - "mov %%ebx, %%edi\n"
> > - "cpuid\n"
> > - "xchgl %%ebx, %%edi;\n"
> > - : "=a" (eax),
> > - "=D" (ebx),
> > - "=c" (ecx),
> > - "=d" (edx)
> > - /* input */
> > - : "a" (params.eax),
> > - "D" (params.ebx),
> > - "c" (params.ecx),
> > - "d" (params.edx));
> > + asm volatile("cpuid"
> > + : "=b" (out[REG_EBX]),
> > + "=a" (out[REG_EAX]),
> > + "=c" (out[REG_ECX]),
> > + "=d" (out[REG_EDX])
> > + : "a" (leaf), "c" (subleaf));
> > #endif
> > -
> > - switch (params.return_register) {
> > - case REG_EAX:
> > - return eax;
> > - case REG_EBX:
> > - return ebx;
> > - case REG_ECX:
> > - return ecx;
> > - case REG_EDX:
> > - return edx;
> > - default:
> > - return 0;
> > - }
> > }
> >
> > /*
> > @@ -240,17 +207,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
> > int
> > rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
> > {
> > - int value;
> > + const struct feature_entry *feat;
> > + cpu_registers_t regs;
> >
> > if (feature >= RTE_CPUFLAG_NUMFLAGS)
> > /* Flag does not match anything in the feature tables */
> > return -ENOENT;
> >
> > - /* get value of the register containing the desired feature */
> > - value = rte_cpu_get_features(cpu_feature_table[feature].params);
> > + feat = &cpu_feature_table[feature];
> > +
> > + /* get the cpuid leaf containing the desired feature */
> > + rte_cpu_get_features(feat->leaf, feat->subleaf, ®s);
> >
> > /* check if the feature is enabled */
> > - return (cpu_feature_table[feature].feature_mask & value) > 0;
> > + return (regs[feat->reg] >> feat->bit) & 1;
> > }
> >
> > /**
> > @@ -273,7 +243,7 @@ rte_cpu_check_supported(void)
> > unsigned i;
> >
> > for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
> > - if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
> > + if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
> > fprintf(stderr,
> > "ERROR: This system does not support \"%s\".\n"
> > "Please check that RTE_MACHINE is set correctly.\n",
> > --
> > 1.8.5.3
> >
> >
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
2014-03-20 17:02 ` Thomas Monjalon
2014-03-20 18:04 ` Neil Horman
@ 2014-03-21 14:49 ` Neil Horman
[not found] ` <1395413340-27392-1-git-send-email-nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
2014-03-24 17:44 ` [PATCH v3] " Neil Horman
` (2 subsequent siblings)
5 siblings, 1 reply; 21+ messages in thread
From: Neil Horman @ 2014-03-21 14:49 UTC (permalink / raw)
To: dev-VfR2kkLFssw; +Cc: H. Peter Anvin
From: "H. Peter Anvin" <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.
However, the code is really way more complex than it needs to be. For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly. Furthermore, we can use an array
rather than doing a switch statement inside a structure.
Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Tested-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo
---
lib/librte_eal/common/eal_common_cpuflags.c | 274 +++++++++++++---------------
1 file changed, 123 insertions(+), 151 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..438d9c5 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
*/
enum cpu_register_t {
REG_EAX = 0,
- REG_EBX,
REG_ECX,
REG_EDX,
+ REG_EBX,
};
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
- uint32_t eax;
- uint32_t ebx;
- uint32_t ecx;
- uint32_t edx;
- enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
#define CPU_FLAG_NAME_MAX_LEN 64
@@ -78,8 +69,10 @@ struct cpuid_parameters_t {
struct feature_entry {
enum rte_cpu_flag_t feature; /**< feature name */
char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
- struct cpuid_parameters_t params; /**< cpuid parameters */
- uint32_t feature_mask; /**< bitmask for feature */
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
};
#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
@@ -88,97 +81,97 @@ struct feature_entry {
* An array that holds feature entries
*/
static const struct feature_entry cpu_feature_table[] = {
- {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
- {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
- {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
- {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
- {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
- {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
- {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
- {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
- {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
- {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
- {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
- {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
- {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
- {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
- {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
- {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
- {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
- {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
- {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
- {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
- {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
- {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
- {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
- {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
- {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
- {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
- {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
-
- {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
- {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
- {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
- {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
- {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
- {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
- {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
- {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
- {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
- {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
- {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
- {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
- {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
- {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
- {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
- {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
- {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
- {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
- {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
- {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
- {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
- {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
- {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
- {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
- {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
- {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
-
- {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
- {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
- {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
- {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
- {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
- {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
-
- {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
-
- {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
- {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
- {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
- {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
- {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
- {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
- {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
- {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
- {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
-
- {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
-
- {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
- {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
-
- {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+ {FEAT_DEF(SSE3), 0x00000001, 0, REG_ECX, 0},
+ {FEAT_DEF(PCLMULQDQ), 0x00000001, 0, REG_ECX, 1},
+ {FEAT_DEF(DTES64), 0x00000001, 0, REG_ECX, 2},
+ {FEAT_DEF(MONITOR), 0x00000001, 0, REG_ECX, 3},
+ {FEAT_DEF(DS_CPL), 0x00000001, 0, REG_ECX, 4},
+ {FEAT_DEF(VMX), 0x00000001, 0, REG_ECX, 5},
+ {FEAT_DEF(SMX), 0x00000001, 0, REG_ECX, 6},
+ {FEAT_DEF(EIST), 0x00000001, 0, REG_ECX, 7},
+ {FEAT_DEF(TM2), 0x00000001, 0, REG_ECX, 8},
+ {FEAT_DEF(SSSE3), 0x00000001, 0, REG_ECX, 9},
+ {FEAT_DEF(CNXT_ID), 0x00000001, 0, REG_ECX, 10},
+ {FEAT_DEF(FMA), 0x00000001, 0, REG_ECX, 12},
+ {FEAT_DEF(CMPXCHG16B), 0x00000001, 0, REG_ECX, 13},
+ {FEAT_DEF(XTPR), 0x00000001, 0, REG_ECX, 14},
+ {FEAT_DEF(PDCM), 0x00000001, 0, REG_ECX, 15},
+ {FEAT_DEF(PCID), 0x00000001, 0, REG_ECX, 17},
+ {FEAT_DEF(DCA), 0x00000001, 0, REG_ECX, 18},
+ {FEAT_DEF(SSE4_1), 0x00000001, 0, REG_ECX, 19},
+ {FEAT_DEF(SSE4_2), 0x00000001, 0, REG_ECX, 20},
+ {FEAT_DEF(X2APIC), 0x00000001, 0, REG_ECX, 21},
+ {FEAT_DEF(MOVBE), 0x00000001, 0, REG_ECX, 22},
+ {FEAT_DEF(POPCNT), 0x00000001, 0, REG_ECX, 23},
+ {FEAT_DEF(TSC_DEADLINE), 0x00000001, 0, REG_ECX, 24},
+ {FEAT_DEF(AES), 0x00000001, 0, REG_ECX, 25},
+ {FEAT_DEF(XSAVE), 0x00000001, 0, REG_ECX, 26},
+ {FEAT_DEF(OSXSAVE), 0x00000001, 0, REG_ECX, 27},
+ {FEAT_DEF(AVX), 0x00000001, 0, REG_ECX, 28},
+ {FEAT_DEF(F16C), 0x00000001, 0, REG_ECX, 29},
+ {FEAT_DEF(RDRAND), 0x00000001, 0, REG_ECX, 30},
+
+ {FEAT_DEF(FPU), 0x00000001, 0, REG_EDX, 0},
+ {FEAT_DEF(VME), 0x00000001, 0, REG_EDX, 1},
+ {FEAT_DEF(DE), 0x00000001, 0, REG_EDX, 2},
+ {FEAT_DEF(PSE), 0x00000001, 0, REG_EDX, 3},
+ {FEAT_DEF(TSC), 0x00000001, 0, REG_EDX, 4},
+ {FEAT_DEF(MSR), 0x00000001, 0, REG_EDX, 5},
+ {FEAT_DEF(PAE), 0x00000001, 0, REG_EDX, 6},
+ {FEAT_DEF(MCE), 0x00000001, 0, REG_EDX, 7},
+ {FEAT_DEF(CX8), 0x00000001, 0, REG_EDX, 8},
+ {FEAT_DEF(APIC), 0x00000001, 0, REG_EDX, 9},
+ {FEAT_DEF(SEP), 0x00000001, 0, REG_EDX, 11},
+ {FEAT_DEF(MTRR), 0x00000001, 0, REG_EDX, 12},
+ {FEAT_DEF(PGE), 0x00000001, 0, REG_EDX, 13},
+ {FEAT_DEF(MCA), 0x00000001, 0, REG_EDX, 14},
+ {FEAT_DEF(CMOV), 0x00000001, 0, REG_EDX, 15},
+ {FEAT_DEF(PAT), 0x00000001, 0, REG_EDX, 16},
+ {FEAT_DEF(PSE36), 0x00000001, 0, REG_EDX, 17},
+ {FEAT_DEF(PSN), 0x00000001, 0, REG_EDX, 18},
+ {FEAT_DEF(CLFSH), 0x00000001, 0, REG_EDX, 19},
+ {FEAT_DEF(DS), 0x00000001, 0, REG_EDX, 21},
+ {FEAT_DEF(ACPI), 0x00000001, 0, REG_EDX, 22},
+ {FEAT_DEF(MMX), 0x00000001, 0, REG_EDX, 23},
+ {FEAT_DEF(FXSR), 0x00000001, 0, REG_EDX, 24},
+ {FEAT_DEF(SSE), 0x00000001, 0, REG_EDX, 25},
+ {FEAT_DEF(SSE2), 0x00000001, 0, REG_EDX, 26},
+ {FEAT_DEF(SS), 0x00000001, 0, REG_EDX, 27},
+ {FEAT_DEF(HTT), 0x00000001, 0, REG_EDX, 28},
+ {FEAT_DEF(TM), 0x00000001, 0, REG_EDX, 29},
+ {FEAT_DEF(PBE), 0x00000001, 0, REG_EDX, 31},
+
+ {FEAT_DEF(DIGTEMP), 0x00000006, 0, REG_EAX, 0},
+ {FEAT_DEF(TRBOBST), 0x00000006, 0, REG_EAX, 1},
+ {FEAT_DEF(ARAT), 0x00000006, 0, REG_EAX, 2},
+ {FEAT_DEF(PLN), 0x00000006, 0, REG_EAX, 4},
+ {FEAT_DEF(ECMD), 0x00000006, 0, REG_EAX, 5},
+ {FEAT_DEF(PTM), 0x00000006, 0, REG_EAX, 6},
+
+ {FEAT_DEF(MPERF_APERF_MSR), 0x00000006, 0, REG_ECX, 0},
+ {FEAT_DEF(ACNT2), 0x00000006, 0, REG_ECX, 1},
+ {FEAT_DEF(ENERGY_EFF), 0x00000006, 0, REG_ECX, 3},
+
+ {FEAT_DEF(FSGSBASE), 0x00000007, 0, REG_EBX, 0},
+ {FEAT_DEF(BMI1), 0x00000007, 0, REG_EBX, 2},
+ {FEAT_DEF(HLE), 0x00000007, 0, REG_EBX, 4},
+ {FEAT_DEF(AVX2), 0x00000007, 0, REG_EBX, 5},
+ {FEAT_DEF(SMEP), 0x00000007, 0, REG_EBX, 6},
+ {FEAT_DEF(BMI2), 0x00000007, 0, REG_EBX, 7},
+ {FEAT_DEF(ERMS), 0x00000007, 0, REG_EBX, 8},
+ {FEAT_DEF(INVPCID), 0x00000007, 0, REG_EBX, 10},
+ {FEAT_DEF(RTM), 0x00000007, 0, REG_EBX, 11},
+
+ {FEAT_DEF(LAHF_SAHF), 0x80000001, 0, REG_ECX, 0},
+ {FEAT_DEF(LZCNT), 0x80000001, 0, REG_ECX, 4},
+
+ {FEAT_DEF(SYSCALL), 0x80000001, 0, REG_EDX, 11},
+ {FEAT_DEF(XD), 0x80000001, 0, REG_EDX, 20},
+ {FEAT_DEF(1GB_PG), 0x80000001, 0, REG_EDX, 26},
+ {FEAT_DEF(RDTSCP), 0x80000001, 0, REG_EDX, 27},
+ {FEAT_DEF(EM64T), 0x80000001, 0, REG_EDX, 29},
+
+ {FEAT_DEF(INVTSC), 0x80000007, 0, REG_EDX, 8},
};
/*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
* This function, when compiled with GCC, will generate architecture-neutral
* code, as per GCC manual.
*/
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
{
- int eax, ebx, ecx, edx; /* registers */
-
-#ifndef __PIC__
- asm volatile ("cpuid"
- /* output */
- : "=a" (eax),
- "=b" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "b" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+ /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+ asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+ : "=r" (out[REG_EBX]),
+ "=a" (out[REG_EAX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
#else
- asm volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchgl %%ebx, %%edi;\n"
- : "=a" (eax),
- "=D" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "D" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
-#endif
- switch (params.return_register) {
- case REG_EAX:
- return eax;
- case REG_EBX:
- return ebx;
- case REG_ECX:
- return ecx;
- case REG_EDX:
- return edx;
- default:
- return 0;
- }
+ asm volatile("cpuid"
+ : "=a" (out[REG_EAX]),
+ "=b" (out[REG_EBX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
+
+#endif
}
/*
@@ -240,17 +209,20 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
- int value;
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
return -ENOENT;
- /* get value of the register containing the desired feature */
- value = rte_cpu_get_features(cpu_feature_table[feature].params);
+ feat = &cpu_feature_table[feature];
+
+ /* get the cpuid leaf containing the desired feature */
+ rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
/* check if the feature is enabled */
- return (cpu_feature_table[feature].feature_mask & value) > 0;
+ return (regs[feat->reg] >> feat->bit) & 1;
}
/**
@@ -273,7 +245,7 @@ rte_cpu_check_supported(void)
unsigned i;
for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
- if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+ if (!rte_cpu_get_flag_enabled(compile_time_flags[i])) {
fprintf(stderr,
"ERROR: This system does not support \"%s\".\n"
"Please check that RTE_MACHINE is set correctly.\n",
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v3] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
` (2 preceding siblings ...)
2014-03-21 14:49 ` [PATCH v2] " Neil Horman
@ 2014-03-24 17:44 ` Neil Horman
[not found] ` <1395683088-19687-1-git-send-email-nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
2014-03-25 17:03 ` [PATCH v4] " Neil Horman
2014-03-25 19:52 ` [PATCH v5] " Neil Horman
5 siblings, 1 reply; 21+ messages in thread
From: Neil Horman @ 2014-03-24 17:44 UTC (permalink / raw)
To: dev-VfR2kkLFssw; +Cc: H. Peter Anvin
Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.
However, the code is really way more complex than it needs to be. For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly. Furthermore, we can use an array
rather than doing a switch statement inside a structure.
Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo
v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored
---
lib/librte_eal/common/eal_common_cpuflags.c | 281 +++++++++++++---------------
1 file changed, 134 insertions(+), 147 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..9ee0490 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -53,22 +53,15 @@
* Enumeration of CPU registers
*/
enum cpu_register_t {
- REG_EAX = 0,
- REG_EBX,
+ REG_EAX = 1,
REG_ECX,
REG_EDX,
+ REG_EBX,
};
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
- uint32_t eax;
- uint32_t ebx;
- uint32_t ecx;
- uint32_t edx;
- enum cpu_register_t return_register;
-};
+#define CPUID_REG(reg) (reg - 1)
+
+typedef uint32_t cpuid_registers_t[4];
#define CPU_FLAG_NAME_MAX_LEN 64
@@ -76,109 +69,111 @@ struct cpuid_parameters_t {
* Struct to hold a processor feature entry
*/
struct feature_entry {
- enum rte_cpu_flag_t feature; /**< feature name */
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
- struct cpuid_parameters_t params; /**< cpuid parameters */
- uint32_t feature_mask; /**< bitmask for feature */
};
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+ [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
/**
* An array that holds feature entries
*/
static const struct feature_entry cpu_feature_table[] = {
- {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
- {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
- {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
- {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
- {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
- {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
- {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
- {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
- {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
- {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
- {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
- {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
- {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
- {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
- {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
- {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
- {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
- {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
- {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
- {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
- {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
- {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
- {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
- {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
- {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
- {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
- {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+ FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX, 0)
+ FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX, 1)
+ FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX, 2)
+ FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX, 3)
+ FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX, 4)
+ FEAT_DEF(VMX, 0x00000001, 0, REG_ECX, 5)
+ FEAT_DEF(SMX, 0x00000001, 0, REG_ECX, 6)
+ FEAT_DEF(EIST, 0x00000001, 0, REG_ECX, 7)
+ FEAT_DEF(TM2, 0x00000001, 0, REG_ECX, 8)
+ FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX, 9)
+ FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+ FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+ FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+ FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+ FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+ FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+ FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+ FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+ FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+ FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+ FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+ FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+ FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+ FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+ FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+ FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+ FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+ FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+ FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
- {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
- {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
- {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
- {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
- {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
- {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
- {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
- {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
- {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
- {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
- {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
- {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
- {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
- {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
- {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
- {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
- {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
- {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
- {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
- {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
- {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
- {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
- {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
- {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
- {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
- {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+ FEAT_DEF(FPU, 0x00000001, 0, REG_EDX, 0)
+ FEAT_DEF(VME, 0x00000001, 0, REG_EDX, 1)
+ FEAT_DEF(DE, 0x00000001, 0, REG_EDX, 2)
+ FEAT_DEF(PSE, 0x00000001, 0, REG_EDX, 3)
+ FEAT_DEF(TSC, 0x00000001, 0, REG_EDX, 4)
+ FEAT_DEF(MSR, 0x00000001, 0, REG_EDX, 5)
+ FEAT_DEF(PAE, 0x00000001, 0, REG_EDX, 6)
+ FEAT_DEF(MCE, 0x00000001, 0, REG_EDX, 7)
+ FEAT_DEF(CX8, 0x00000001, 0, REG_EDX, 8)
+ FEAT_DEF(APIC, 0x00000001, 0, REG_EDX, 9)
+ FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+ FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+ FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+ FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+ FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+ FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+ FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+ FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+ FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+ FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+ FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+ FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+ FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+ FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+ FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+ FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+ FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+ FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+ FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
- {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
- {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
- {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
- {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
- {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
- {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+ FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX, 0)
+ FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX, 1)
+ FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX, 2)
+ FEAT_DEF(PLN, 0x00000006, 0, REG_EAX, 4)
+ FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX, 5)
+ FEAT_DEF(PTM, 0x00000006, 0, REG_EAX, 6)
- {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+ FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX, 0)
+ FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX, 1)
+ FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX, 3)
- {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
- {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
- {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
- {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
- {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
- {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
- {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
- {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
- {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+ FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX, 0)
+ FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX, 2)
+ FEAT_DEF(HLE, 0x00000007, 0, REG_EBX, 4)
+ FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX, 5)
+ FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX, 6)
+ FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX, 7)
+ FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX, 8)
+ FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+ FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
- {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+ FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX, 0)
+ FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX, 4)
- {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
- {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+ FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+ FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+ FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+ FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+ FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
- {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+ FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX, 8)
};
/*
@@ -187,51 +182,27 @@ static const struct feature_entry cpu_feature_table[] = {
* This function, when compiled with GCC, will generate architecture-neutral
* code, as per GCC manual.
*/
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
{
- int eax, ebx, ecx, edx; /* registers */
-
-#ifndef __PIC__
- asm volatile ("cpuid"
- /* output */
- : "=a" (eax),
- "=b" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "b" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+ /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+ asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+ : "=r" (out[CPUID_REG(REG_EBX)]),
+ "=a" (out[CPUID_REG(REG_EAX)]),
+ "=c" (out[CPUID_REG(REG_ECX)]),
+ "=d" (out[CPUID_REG(REG_EDX)])
+ : "a" (leaf), "c" (subleaf));
#else
- asm volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchgl %%ebx, %%edi;\n"
- : "=a" (eax),
- "=D" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "D" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
-#endif
- switch (params.return_register) {
- case REG_EAX:
- return eax;
- case REG_EBX:
- return ebx;
- case REG_ECX:
- return ecx;
- case REG_EDX:
- return edx;
- default:
- return 0;
- }
+ asm volatile("cpuid"
+ : "=a" (out[CPUID_REG(REG_EAX)]),
+ "=b" (out[CPUID_REG(REG_EBX)]),
+ "=c" (out[CPUID_REG(REG_ECX)]),
+ "=d" (out[CPUID_REG(REG_EDX)])
+ : "a" (leaf), "c" (subleaf));
+
+#endif
}
/*
@@ -240,17 +211,24 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
- int value;
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
return -ENOENT;
- /* get value of the register containing the desired feature */
- value = rte_cpu_get_features(cpu_feature_table[feature].params);
+ feat = &cpu_feature_table[feature];
+
+ if (!feat->reg)
+ /* This entry in the table wasn't filled out! */
+ return -EFAULT;
+
+ /* get the cpuid leaf containing the desired feature */
+ rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
/* check if the feature is enabled */
- return (cpu_feature_table[feature].feature_mask & value) > 0;
+ return (regs[CPUID_REG(feat->reg)] >> feat->bit) & 1;
}
/**
@@ -271,9 +249,18 @@ rte_cpu_check_supported(void)
RTE_COMPILE_TIME_CPUFLAGS
};
unsigned i;
+ int ret;
for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
- if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ exit(1);
+ }
+ if (!ret) {
fprintf(stderr,
"ERROR: This system does not support \"%s\".\n"
"Please check that RTE_MACHINE is set correctly.\n",
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v4] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
` (3 preceding siblings ...)
2014-03-24 17:44 ` [PATCH v3] " Neil Horman
@ 2014-03-25 17:03 ` Neil Horman
[not found] ` <1395767000-28709-1-git-send-email-nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
2014-03-25 19:52 ` [PATCH v5] " Neil Horman
5 siblings, 1 reply; 21+ messages in thread
From: Neil Horman @ 2014-03-25 17:03 UTC (permalink / raw)
To: dev-VfR2kkLFssw; +Cc: H. Peter Anvin
Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.
However, the code is really way more complex than it needs to be. For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly. Furthermore, we can use an array
rather than doing a switch statement inside a structure.
Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo
v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored
v4)
* Fixed sanity checks to not offset feat->reg and just check !feat->reg
* Added a check for the sanity of the leaf node
---
lib/librte_eal/common/eal_common_cpuflags.c | 287 ++++++++++++++--------------
1 file changed, 141 insertions(+), 146 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..b61e271 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -54,21 +54,12 @@
*/
enum cpu_register_t {
REG_EAX = 0,
- REG_EBX,
REG_ECX,
REG_EDX,
+ REG_EBX,
};
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
- uint32_t eax;
- uint32_t ebx;
- uint32_t ecx;
- uint32_t edx;
- enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
#define CPU_FLAG_NAME_MAX_LEN 64
@@ -76,109 +67,111 @@ struct cpuid_parameters_t {
* Struct to hold a processor feature entry
*/
struct feature_entry {
- enum rte_cpu_flag_t feature; /**< feature name */
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
- struct cpuid_parameters_t params; /**< cpuid parameters */
- uint32_t feature_mask; /**< bitmask for feature */
};
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+ [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
/**
* An array that holds feature entries
*/
static const struct feature_entry cpu_feature_table[] = {
- {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
- {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
- {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
- {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
- {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
- {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
- {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
- {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
- {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
- {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
- {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
- {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
- {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
- {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
- {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
- {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
- {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
- {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
- {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
- {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
- {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
- {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
- {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
- {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
- {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
- {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
- {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+ FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX, 0)
+ FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX, 1)
+ FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX, 2)
+ FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX, 3)
+ FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX, 4)
+ FEAT_DEF(VMX, 0x00000001, 0, REG_ECX, 5)
+ FEAT_DEF(SMX, 0x00000001, 0, REG_ECX, 6)
+ FEAT_DEF(EIST, 0x00000001, 0, REG_ECX, 7)
+ FEAT_DEF(TM2, 0x00000001, 0, REG_ECX, 8)
+ FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX, 9)
+ FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+ FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+ FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+ FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+ FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+ FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+ FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+ FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+ FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+ FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+ FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+ FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+ FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+ FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+ FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+ FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+ FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+ FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+ FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
- {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
- {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
- {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
- {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
- {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
- {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
- {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
- {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
- {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
- {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
- {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
- {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
- {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
- {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
- {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
- {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
- {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
- {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
- {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
- {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
- {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
- {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
- {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
- {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
- {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
- {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+ FEAT_DEF(FPU, 0x00000001, 0, REG_EDX, 0)
+ FEAT_DEF(VME, 0x00000001, 0, REG_EDX, 1)
+ FEAT_DEF(DE, 0x00000001, 0, REG_EDX, 2)
+ FEAT_DEF(PSE, 0x00000001, 0, REG_EDX, 3)
+ FEAT_DEF(TSC, 0x00000001, 0, REG_EDX, 4)
+ FEAT_DEF(MSR, 0x00000001, 0, REG_EDX, 5)
+ FEAT_DEF(PAE, 0x00000001, 0, REG_EDX, 6)
+ FEAT_DEF(MCE, 0x00000001, 0, REG_EDX, 7)
+ FEAT_DEF(CX8, 0x00000001, 0, REG_EDX, 8)
+ FEAT_DEF(APIC, 0x00000001, 0, REG_EDX, 9)
+ FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+ FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+ FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+ FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+ FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+ FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+ FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+ FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+ FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+ FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+ FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+ FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+ FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+ FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+ FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+ FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+ FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+ FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+ FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
- {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
- {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
- {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
- {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
- {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
- {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+ FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX, 0)
+ FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX, 1)
+ FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX, 2)
+ FEAT_DEF(PLN, 0x00000006, 0, REG_EAX, 4)
+ FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX, 5)
+ FEAT_DEF(PTM, 0x00000006, 0, REG_EAX, 6)
- {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+ FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX, 0)
+ FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX, 1)
+ FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX, 3)
- {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
- {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
- {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
- {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
- {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
- {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
- {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
- {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
- {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+ FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX, 0)
+ FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX, 2)
+ FEAT_DEF(HLE, 0x00000007, 0, REG_EBX, 4)
+ FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX, 5)
+ FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX, 6)
+ FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX, 7)
+ FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX, 8)
+ FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+ FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
- {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+ FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX, 0)
+ FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX, 4)
- {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
- {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+ FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+ FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+ FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+ FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+ FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
- {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+ FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX, 8)
};
/*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
* This function, when compiled with GCC, will generate architecture-neutral
* code, as per GCC manual.
*/
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
{
- int eax, ebx, ecx, edx; /* registers */
-
-#ifndef __PIC__
- asm volatile ("cpuid"
- /* output */
- : "=a" (eax),
- "=b" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "b" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+ /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+ asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+ : "=r" (out[REG_EBX]),
+ "=a" (out[REG_EAX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
#else
- asm volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchgl %%ebx, %%edi;\n"
- : "=a" (eax),
- "=D" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "D" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
-#endif
- switch (params.return_register) {
- case REG_EAX:
- return eax;
- case REG_EBX:
- return ebx;
- case REG_ECX:
- return ecx;
- case REG_EDX:
- return edx;
- default:
- return 0;
- }
+ asm volatile("cpuid"
+ : "=a" (out[REG_EAX]),
+ "=b" (out[REG_EBX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
+
+#endif
}
/*
@@ -240,17 +209,34 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
- int value;
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
+ static uint32_t max_leaf = 0;
+
+ if (!max_leaf) {
+ /* Get the max input leaf for this processor */
+ rte_cpu_get_features(0, 0, regs);
+ max_leaf = regs[REG_EAX];
+ }
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
return -ENOENT;
- /* get value of the register containing the desired feature */
- value = rte_cpu_get_features(cpu_feature_table[feature].params);
+ feat = &cpu_feature_table[feature];
+
+ if (!feat->leaf)
+ /* This entry in the table wasn't filled out! */
+ return -EFAULT;
+
+ if (feat->leaf > max_leaf)
+ return -EINVAL;
+
+ /* get the cpuid leaf containing the desired feature */
+ rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
/* check if the feature is enabled */
- return (cpu_feature_table[feature].feature_mask & value) > 0;
+ return (regs[feat->reg] >> feat->bit) & 1;
}
/**
@@ -271,9 +257,18 @@ rte_cpu_check_supported(void)
RTE_COMPILE_TIME_CPUFLAGS
};
unsigned i;
+ int ret;
for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
- if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ exit(1);
+ }
+ if (!ret) {
fprintf(stderr,
"ERROR: This system does not support \"%s\".\n"
"Please check that RTE_MACHINE is set correctly.\n",
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v5] eal_common_cpuflags: Fix %rbx corruption, and simplify the code
[not found] ` <20140320163921.GC7721-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
` (4 preceding siblings ...)
2014-03-25 17:03 ` [PATCH v4] " Neil Horman
@ 2014-03-25 19:52 ` Neil Horman
[not found] ` <1395777179-4345-1-git-send-email-nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
5 siblings, 1 reply; 21+ messages in thread
From: Neil Horman @ 2014-03-25 19:52 UTC (permalink / raw)
To: dev-VfR2kkLFssw; +Cc: H. Peter Anvin
Neil Horman reported that on x86-64 the upper half of %rbx would get
clobbered when the code was compiled PIC or PIE, because the
i386-specific code to preserve %ebx was incorrectly compiled.
However, the code is really way more complex than it needs to be. For
one thing, the CPUID instruction only needs %eax (leaf) and %ecx
(subleaf) as parameters, and since we are testing for bits, we might
as well list the bits explicitly. Furthermore, we can use an array
rather than doing a switch statement inside a structure.
Reported-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
---
Change notes:
v2) Corrected build errors
Fixed cpuid_register_t reference passing
Fixed typedef name typo
v3)
* Modified feature_entry struct to drop the name field, as its unused
* Modified cpu_feature_table to use C99 initalizers
* Updated FEAT_DEF macro to include all feature_entry fields
* Modified cpuid_reg enum to start at 1 rather than zero
* Added CPUID_REG macro to drop enum value by 1 during access
* Added check on feat->reg use to detect missing entries
* Fixed a bug in rte_cpu_check_supported in which negative errors are ignored
v4)
* Fixed sanity checks to not offset feat->reg and just check !feat->reg
* Added a check for the sanity of the leaf node
v5)
* Fixed max leaf check to just return not supported rather than error
---
lib/librte_eal/common/eal_common_cpuflags.c | 281 ++++++++++++++--------------
1 file changed, 136 insertions(+), 145 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index 1ebf78c..f9c1840 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -59,16 +59,7 @@ enum cpu_register_t {
REG_EDX,
};
-/**
- * Parameters for CPUID instruction
- */
-struct cpuid_parameters_t {
- uint32_t eax;
- uint32_t ebx;
- uint32_t ecx;
- uint32_t edx;
- enum cpu_register_t return_register;
-};
+typedef uint32_t cpuid_registers_t[4];
#define CPU_FLAG_NAME_MAX_LEN 64
@@ -76,109 +67,111 @@ struct cpuid_parameters_t {
* Struct to hold a processor feature entry
*/
struct feature_entry {
- enum rte_cpu_flag_t feature; /**< feature name */
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
- struct cpuid_parameters_t params; /**< cpuid parameters */
- uint32_t feature_mask; /**< bitmask for feature */
};
-#define FEAT_DEF(f) RTE_CPUFLAG_##f, #f
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+ [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
/**
* An array that holds feature entries
*/
static const struct feature_entry cpu_feature_table[] = {
- {FEAT_DEF(SSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(PCLMULQDQ), {0x1, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(DTES64), {0x1, 0, 0, 0, REG_ECX}, 0x00000004},
- {FEAT_DEF(MONITOR), {0x1, 0, 0, 0, REG_ECX}, 0x00000008},
- {FEAT_DEF(DS_CPL), {0x1, 0, 0, 0, REG_ECX}, 0x00000010},
- {FEAT_DEF(VMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000020},
- {FEAT_DEF(SMX), {0x1, 0, 0, 0, REG_ECX}, 0x00000040},
- {FEAT_DEF(EIST), {0x1, 0, 0, 0, REG_ECX}, 0x00000080},
- {FEAT_DEF(TM2), {0x1, 0, 0, 0, REG_ECX}, 0x00000100},
- {FEAT_DEF(SSSE3), {0x1, 0, 0, 0, REG_ECX}, 0x00000200},
- {FEAT_DEF(CNXT_ID), {0x1, 0, 0, 0, REG_ECX}, 0x00000400},
- {FEAT_DEF(FMA), {0x1, 0, 0, 0, REG_ECX}, 0x00001000},
- {FEAT_DEF(CMPXCHG16B), {0x1, 0, 0, 0, REG_ECX}, 0x00002000},
- {FEAT_DEF(XTPR), {0x1, 0, 0, 0, REG_ECX}, 0x00004000},
- {FEAT_DEF(PDCM), {0x1, 0, 0, 0, REG_ECX}, 0x00008000},
- {FEAT_DEF(PCID), {0x1, 0, 0, 0, REG_ECX}, 0x00020000},
- {FEAT_DEF(DCA), {0x1, 0, 0, 0, REG_ECX}, 0x00040000},
- {FEAT_DEF(SSE4_1), {0x1, 0, 0, 0, REG_ECX}, 0x00080000},
- {FEAT_DEF(SSE4_2), {0x1, 0, 0, 0, REG_ECX}, 0x00100000},
- {FEAT_DEF(X2APIC), {0x1, 0, 0, 0, REG_ECX}, 0x00200000},
- {FEAT_DEF(MOVBE), {0x1, 0, 0, 0, REG_ECX}, 0x00400000},
- {FEAT_DEF(POPCNT), {0x1, 0, 0, 0, REG_ECX}, 0x00800000},
- {FEAT_DEF(TSC_DEADLINE), {0x1, 0, 0, 0, REG_ECX}, 0x01000000},
- {FEAT_DEF(AES), {0x1, 0, 0, 0, REG_ECX}, 0x02000000},
- {FEAT_DEF(XSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x04000000},
- {FEAT_DEF(OSXSAVE), {0x1, 0, 0, 0, REG_ECX}, 0x08000000},
- {FEAT_DEF(AVX), {0x1, 0, 0, 0, REG_ECX}, 0x10000000},
- {FEAT_DEF(F16C), {0x1, 0, 0, 0, REG_ECX}, 0x20000000},
- {FEAT_DEF(RDRAND), {0x1, 0, 0, 0, REG_ECX}, 0x40000000},
+ FEAT_DEF(SSE3, 0x00000001, 0, REG_ECX, 0)
+ FEAT_DEF(PCLMULQDQ, 0x00000001, 0, REG_ECX, 1)
+ FEAT_DEF(DTES64, 0x00000001, 0, REG_ECX, 2)
+ FEAT_DEF(MONITOR, 0x00000001, 0, REG_ECX, 3)
+ FEAT_DEF(DS_CPL, 0x00000001, 0, REG_ECX, 4)
+ FEAT_DEF(VMX, 0x00000001, 0, REG_ECX, 5)
+ FEAT_DEF(SMX, 0x00000001, 0, REG_ECX, 6)
+ FEAT_DEF(EIST, 0x00000001, 0, REG_ECX, 7)
+ FEAT_DEF(TM2, 0x00000001, 0, REG_ECX, 8)
+ FEAT_DEF(SSSE3, 0x00000001, 0, REG_ECX, 9)
+ FEAT_DEF(CNXT_ID, 0x00000001, 0, REG_ECX, 10)
+ FEAT_DEF(FMA, 0x00000001, 0, REG_ECX, 12)
+ FEAT_DEF(CMPXCHG16B, 0x00000001, 0, REG_ECX, 13)
+ FEAT_DEF(XTPR, 0x00000001, 0, REG_ECX, 14)
+ FEAT_DEF(PDCM, 0x00000001, 0, REG_ECX, 15)
+ FEAT_DEF(PCID, 0x00000001, 0, REG_ECX, 17)
+ FEAT_DEF(DCA, 0x00000001, 0, REG_ECX, 18)
+ FEAT_DEF(SSE4_1, 0x00000001, 0, REG_ECX, 19)
+ FEAT_DEF(SSE4_2, 0x00000001, 0, REG_ECX, 20)
+ FEAT_DEF(X2APIC, 0x00000001, 0, REG_ECX, 21)
+ FEAT_DEF(MOVBE, 0x00000001, 0, REG_ECX, 22)
+ FEAT_DEF(POPCNT, 0x00000001, 0, REG_ECX, 23)
+ FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, REG_ECX, 24)
+ FEAT_DEF(AES, 0x00000001, 0, REG_ECX, 25)
+ FEAT_DEF(XSAVE, 0x00000001, 0, REG_ECX, 26)
+ FEAT_DEF(OSXSAVE, 0x00000001, 0, REG_ECX, 27)
+ FEAT_DEF(AVX, 0x00000001, 0, REG_ECX, 28)
+ FEAT_DEF(F16C, 0x00000001, 0, REG_ECX, 29)
+ FEAT_DEF(RDRAND, 0x00000001, 0, REG_ECX, 30)
- {FEAT_DEF(FPU), {0x1, 0, 0, 0, REG_EDX}, 0x00000001},
- {FEAT_DEF(VME), {0x1, 0, 0, 0, REG_EDX}, 0x00000002},
- {FEAT_DEF(DE), {0x1, 0, 0, 0, REG_EDX}, 0x00000004},
- {FEAT_DEF(PSE), {0x1, 0, 0, 0, REG_EDX}, 0x00000008},
- {FEAT_DEF(TSC), {0x1, 0, 0, 0, REG_EDX}, 0x00000010},
- {FEAT_DEF(MSR), {0x1, 0, 0, 0, REG_EDX}, 0x00000020},
- {FEAT_DEF(PAE), {0x1, 0, 0, 0, REG_EDX}, 0x00000040},
- {FEAT_DEF(MCE), {0x1, 0, 0, 0, REG_EDX}, 0x00000080},
- {FEAT_DEF(CX8), {0x1, 0, 0, 0, REG_EDX}, 0x00000100},
- {FEAT_DEF(APIC), {0x1, 0, 0, 0, REG_EDX}, 0x00000200},
- {FEAT_DEF(SEP), {0x1, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(MTRR), {0x1, 0, 0, 0, REG_EDX}, 0x00001000},
- {FEAT_DEF(PGE), {0x1, 0, 0, 0, REG_EDX}, 0x00002000},
- {FEAT_DEF(MCA), {0x1, 0, 0, 0, REG_EDX}, 0x00004000},
- {FEAT_DEF(CMOV), {0x1, 0, 0, 0, REG_EDX}, 0x00008000},
- {FEAT_DEF(PAT), {0x1, 0, 0, 0, REG_EDX}, 0x00010000},
- {FEAT_DEF(PSE36), {0x1, 0, 0, 0, REG_EDX}, 0x00020000},
- {FEAT_DEF(PSN), {0x1, 0, 0, 0, REG_EDX}, 0x00040000},
- {FEAT_DEF(CLFSH), {0x1, 0, 0, 0, REG_EDX}, 0x00080000},
- {FEAT_DEF(DS), {0x1, 0, 0, 0, REG_EDX}, 0x00200000},
- {FEAT_DEF(ACPI), {0x1, 0, 0, 0, REG_EDX}, 0x00400000},
- {FEAT_DEF(MMX), {0x1, 0, 0, 0, REG_EDX}, 0x00800000},
- {FEAT_DEF(FXSR), {0x1, 0, 0, 0, REG_EDX}, 0x01000000},
- {FEAT_DEF(SSE), {0x1, 0, 0, 0, REG_EDX}, 0x02000000},
- {FEAT_DEF(SSE2), {0x1, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(SS), {0x1, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(HTT), {0x1, 0, 0, 0, REG_EDX}, 0x10000000},
- {FEAT_DEF(TM), {0x1, 0, 0, 0, REG_EDX}, 0x20000000},
- {FEAT_DEF(PBE), {0x1, 0, 0, 0, REG_EDX}, 0x80000000},
+ FEAT_DEF(FPU, 0x00000001, 0, REG_EDX, 0)
+ FEAT_DEF(VME, 0x00000001, 0, REG_EDX, 1)
+ FEAT_DEF(DE, 0x00000001, 0, REG_EDX, 2)
+ FEAT_DEF(PSE, 0x00000001, 0, REG_EDX, 3)
+ FEAT_DEF(TSC, 0x00000001, 0, REG_EDX, 4)
+ FEAT_DEF(MSR, 0x00000001, 0, REG_EDX, 5)
+ FEAT_DEF(PAE, 0x00000001, 0, REG_EDX, 6)
+ FEAT_DEF(MCE, 0x00000001, 0, REG_EDX, 7)
+ FEAT_DEF(CX8, 0x00000001, 0, REG_EDX, 8)
+ FEAT_DEF(APIC, 0x00000001, 0, REG_EDX, 9)
+ FEAT_DEF(SEP, 0x00000001, 0, REG_EDX, 11)
+ FEAT_DEF(MTRR, 0x00000001, 0, REG_EDX, 12)
+ FEAT_DEF(PGE, 0x00000001, 0, REG_EDX, 13)
+ FEAT_DEF(MCA, 0x00000001, 0, REG_EDX, 14)
+ FEAT_DEF(CMOV, 0x00000001, 0, REG_EDX, 15)
+ FEAT_DEF(PAT, 0x00000001, 0, REG_EDX, 16)
+ FEAT_DEF(PSE36, 0x00000001, 0, REG_EDX, 17)
+ FEAT_DEF(PSN, 0x00000001, 0, REG_EDX, 18)
+ FEAT_DEF(CLFSH, 0x00000001, 0, REG_EDX, 19)
+ FEAT_DEF(DS, 0x00000001, 0, REG_EDX, 21)
+ FEAT_DEF(ACPI, 0x00000001, 0, REG_EDX, 22)
+ FEAT_DEF(MMX, 0x00000001, 0, REG_EDX, 23)
+ FEAT_DEF(FXSR, 0x00000001, 0, REG_EDX, 24)
+ FEAT_DEF(SSE, 0x00000001, 0, REG_EDX, 25)
+ FEAT_DEF(SSE2, 0x00000001, 0, REG_EDX, 26)
+ FEAT_DEF(SS, 0x00000001, 0, REG_EDX, 27)
+ FEAT_DEF(HTT, 0x00000001, 0, REG_EDX, 28)
+ FEAT_DEF(TM, 0x00000001, 0, REG_EDX, 29)
+ FEAT_DEF(PBE, 0x00000001, 0, REG_EDX, 31)
- {FEAT_DEF(DIGTEMP), {0x6, 0, 0, 0, REG_EAX}, 0x00000001},
- {FEAT_DEF(TRBOBST), {0x6, 0, 0, 0, REG_EAX}, 0x00000002},
- {FEAT_DEF(ARAT), {0x6, 0, 0, 0, REG_EAX}, 0x00000004},
- {FEAT_DEF(PLN), {0x6, 0, 0, 0, REG_EAX}, 0x00000010},
- {FEAT_DEF(ECMD), {0x6, 0, 0, 0, REG_EAX}, 0x00000020},
- {FEAT_DEF(PTM), {0x6, 0, 0, 0, REG_EAX}, 0x00000040},
+ FEAT_DEF(DIGTEMP, 0x00000006, 0, REG_EAX, 0)
+ FEAT_DEF(TRBOBST, 0x00000006, 0, REG_EAX, 1)
+ FEAT_DEF(ARAT, 0x00000006, 0, REG_EAX, 2)
+ FEAT_DEF(PLN, 0x00000006, 0, REG_EAX, 4)
+ FEAT_DEF(ECMD, 0x00000006, 0, REG_EAX, 5)
+ FEAT_DEF(PTM, 0x00000006, 0, REG_EAX, 6)
- {FEAT_DEF(MPERF_APERF_MSR), {0x6, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(ACNT2), {0x6, 0, 0, 0, REG_ECX}, 0x00000002},
- {FEAT_DEF(ENERGY_EFF), {0x6, 0, 0, 0, REG_ECX}, 0x00000008},
+ FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, REG_ECX, 0)
+ FEAT_DEF(ACNT2, 0x00000006, 0, REG_ECX, 1)
+ FEAT_DEF(ENERGY_EFF, 0x00000006, 0, REG_ECX, 3)
- {FEAT_DEF(FSGSBASE), {0x7, 0, 0, 0, REG_EBX}, 0x00000001},
- {FEAT_DEF(BMI1), {0x7, 0, 0, 0, REG_EBX}, 0x00000004},
- {FEAT_DEF(HLE), {0x7, 0, 0, 0, REG_EBX}, 0x00000010},
- {FEAT_DEF(AVX2), {0x7, 0, 0, 0, REG_EBX}, 0x00000020},
- {FEAT_DEF(SMEP), {0x7, 0, 0, 0, REG_EBX}, 0x00000040},
- {FEAT_DEF(BMI2), {0x7, 0, 0, 0, REG_EBX}, 0x00000080},
- {FEAT_DEF(ERMS), {0x7, 0, 0, 0, REG_EBX}, 0x00000100},
- {FEAT_DEF(INVPCID), {0x7, 0, 0, 0, REG_EBX}, 0x00000400},
- {FEAT_DEF(RTM), {0x7, 0, 0, 0, REG_EBX}, 0x00000800},
+ FEAT_DEF(FSGSBASE, 0x00000007, 0, REG_EBX, 0)
+ FEAT_DEF(BMI1, 0x00000007, 0, REG_EBX, 2)
+ FEAT_DEF(HLE, 0x00000007, 0, REG_EBX, 4)
+ FEAT_DEF(AVX2, 0x00000007, 0, REG_EBX, 5)
+ FEAT_DEF(SMEP, 0x00000007, 0, REG_EBX, 6)
+ FEAT_DEF(BMI2, 0x00000007, 0, REG_EBX, 7)
+ FEAT_DEF(ERMS, 0x00000007, 0, REG_EBX, 8)
+ FEAT_DEF(INVPCID, 0x00000007, 0, REG_EBX, 10)
+ FEAT_DEF(RTM, 0x00000007, 0, REG_EBX, 11)
- {FEAT_DEF(LAHF_SAHF), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000001},
- {FEAT_DEF(LZCNT), {0x80000001, 0, 0, 0, REG_ECX}, 0x00000010},
+ FEAT_DEF(LAHF_SAHF, 0x80000001, 0, REG_ECX, 0)
+ FEAT_DEF(LZCNT, 0x80000001, 0, REG_ECX, 4)
- {FEAT_DEF(SYSCALL), {0x80000001, 0, 0, 0, REG_EDX}, 0x00000800},
- {FEAT_DEF(XD), {0x80000001, 0, 0, 0, REG_EDX}, 0x00100000},
- {FEAT_DEF(1GB_PG), {0x80000001, 0, 0, 0, REG_EDX}, 0x04000000},
- {FEAT_DEF(RDTSCP), {0x80000001, 0, 0, 0, REG_EDX}, 0x08000000},
- {FEAT_DEF(EM64T), {0x80000001, 0, 0, 0, REG_EDX}, 0x20000000},
+ FEAT_DEF(SYSCALL, 0x80000001, 0, REG_EDX, 11)
+ FEAT_DEF(XD, 0x80000001, 0, REG_EDX, 20)
+ FEAT_DEF(1GB_PG, 0x80000001, 0, REG_EDX, 26)
+ FEAT_DEF(RDTSCP, 0x80000001, 0, REG_EDX, 27)
+ FEAT_DEF(EM64T, 0x80000001, 0, REG_EDX, 29)
- {FEAT_DEF(INVTSC), {0x80000007, 0, 0, 0, REG_EDX}, 0x00000100},
+ FEAT_DEF(INVTSC, 0x80000007, 0, REG_EDX, 8)
};
/*
@@ -187,51 +180,27 @@ static const struct feature_entry cpu_feature_table[] = {
* This function, when compiled with GCC, will generate architecture-neutral
* code, as per GCC manual.
*/
-static inline int
-rte_cpu_get_features(struct cpuid_parameters_t params)
+static inline void
+rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
{
- int eax, ebx, ecx, edx; /* registers */
-
-#ifndef __PIC__
- asm volatile ("cpuid"
- /* output */
- : "=a" (eax),
- "=b" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "b" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
+#if defined(__i386__) && defined(__PIC__)
+ /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
+ asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
+ : "=r" (out[REG_EBX]),
+ "=a" (out[REG_EAX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
#else
- asm volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchgl %%ebx, %%edi;\n"
- : "=a" (eax),
- "=D" (ebx),
- "=c" (ecx),
- "=d" (edx)
- /* input */
- : "a" (params.eax),
- "D" (params.ebx),
- "c" (params.ecx),
- "d" (params.edx));
-#endif
- switch (params.return_register) {
- case REG_EAX:
- return eax;
- case REG_EBX:
- return ebx;
- case REG_ECX:
- return ecx;
- case REG_EDX:
- return edx;
- default:
- return 0;
- }
+ asm volatile("cpuid"
+ : "=a" (out[REG_EAX]),
+ "=b" (out[REG_EBX]),
+ "=c" (out[REG_ECX]),
+ "=d" (out[REG_EDX])
+ : "a" (leaf), "c" (subleaf));
+
+#endif
}
/*
@@ -240,17 +209,30 @@ rte_cpu_get_features(struct cpuid_parameters_t params)
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
- int value;
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
+
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
return -ENOENT;
- /* get value of the register containing the desired feature */
- value = rte_cpu_get_features(cpu_feature_table[feature].params);
+ feat = &cpu_feature_table[feature];
+
+ if (!feat->leaf)
+ /* This entry in the table wasn't filled out! */
+ return -EFAULT;
+
+ rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
+ if (((regs[REG_EAX] ^ feat->leaf) & 0xffff0000) ||
+ regs[REG_EAX] < feat->leaf)
+ return 0;
+
+ /* get the cpuid leaf containing the desired feature */
+ rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
/* check if the feature is enabled */
- return (cpu_feature_table[feature].feature_mask & value) > 0;
+ return (regs[feat->reg] >> feat->bit) & 1;
}
/**
@@ -271,9 +253,18 @@ rte_cpu_check_supported(void)
RTE_COMPILE_TIME_CPUFLAGS
};
unsigned i;
+ int ret;
for (i = 0; i < sizeof(compile_time_flags)/sizeof(compile_time_flags[0]); i++)
- if (rte_cpu_get_flag_enabled(compile_time_flags[i]) < 1) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ exit(1);
+ }
+ if (!ret) {
fprintf(stderr,
"ERROR: This system does not support \"%s\".\n"
"Please check that RTE_MACHINE is set correctly.\n",
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread