[meta-oe][PATCH 2/2] opencv: disable broken Intel FP16 detection

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Randy MacLeod <Randy.MacLeod@windriver.com>
To: <openembedded-devel@lists.openembedded.org>
Subject: [meta-oe][PATCH 2/2] opencv: disable broken Intel FP16 detection
Date: Tue, 2 May 2017 14:46:55 -0400	[thread overview]
Message-ID: <20170502184655.25127-2-Randy.MacLeod@windriver.com> (raw)
In-Reply-To: <20170502184655.25127-1-Randy.MacLeod@windriver.com>

With opencv-3.2, the configuration detects if the target supports
half-precision floating-point format. This fails to compile for some
Intel targets such as skylake with an error such as:
   error: '_mm_cvtph_ps' was not declared in this scope

The configuration used to work in opencv-3.1 so revert two commits to
drop the FP16 detection even though it may make opencv slower.
The only change in the configure log is:
   - FP16: Compiler support is available
   + FP16: Compiler support is not available

The patch should be dropped when a newer version of opencv that includes commit:
   https://github.com/opencv/opencv/commit/e5d9b608c47d54e43496041595025fa282fa9de5
is available. Backporting that fix was complicated.

Signed-off-by: Randy MacLeod <Randy.MacLeod@windriver.com>
---
 .../0001-Revert-cuda-fix-fp16-compilation.patch    |  27 +++
 ...vert-check-FP16-build-condition-correctly.patch | 245 +++++++++++++++++++++
 meta-oe/recipes-support/opencv/opencv_3.2.bb       |   2 +
 3 files changed, 274 insertions(+)
 create mode 100644 meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
 create mode 100644 meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch

diff --git a/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
new file mode 100644
index 0000000..507d796
--- /dev/null
+++ b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
@@ -0,0 +1,27 @@
+From 69f9707678190f6a0948a547dce948251f972676 Mon Sep 17 00:00:00 2001
+From: Randy MacLeod <Randy.MacLeod@windriver.com>
+Date: Wed, 26 Apr 2017 14:57:30 -0400
+Subject: [PATCH 1/2] Revert "cuda: fix fp16 compilation"
+
+This reverts commit 12e00827be40576b686ea4438a6e6ef85208743d.
+---
+ modules/core/include/opencv2/core/cvdef.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
+index 699b166..efc24ca 100644
+--- a/modules/core/include/opencv2/core/cvdef.h
++++ b/modules/core/include/opencv2/core/cvdef.h
+@@ -303,8 +303,7 @@ enum CpuFeatures {
+ #define CV_2PI 6.283185307179586476925286766559
+ #define CV_LOG2 0.69314718055994530941723212145818
+ 
+-#if defined __ARM_FP16_FORMAT_IEEE \
+-    && !defined __CUDACC__
++#if defined (__ARM_FP16_FORMAT_IEEE)
+ #  define CV_FP16_TYPE 1
+ #else
+ #  define CV_FP16_TYPE 0
+-- 
+2.9.3
+
diff --git a/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch
new file mode 100644
index 0000000..d1950a9
--- /dev/null
+++ b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch
@@ -0,0 +1,245 @@
+From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001
+From: Randy MacLeod <Randy.MacLeod@windriver.com>
+Date: Wed, 26 Apr 2017 15:00:32 -0400
+Subject: [PATCH 2/2] Revert "check FP16 build condition correctly"
+
+This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13.
+
+Fix up revert conflicts to take previous behaviour.
+---
+ cmake/OpenCVCompilerOptions.cmake         | 45 +++++++++--------------
+ modules/core/include/opencv2/core/cvdef.h |  2 +-
+ modules/core/src/convert.cpp              | 11 +++---
+ modules/core/test/test_intrin.cpp         | 60 ++++++++++++++-----------------
+ 4 files changed, 48 insertions(+), 70 deletions(-)
+
+diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
+index 5bb0479..4b19fdb 100644
+--- a/cmake/OpenCVCompilerOptions.cmake
++++ b/cmake/OpenCVCompilerOptions.cmake
+@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
+     add_extra_compiler_option("-mfp16-format=ieee")
+   endif(ARM)
+   if(ENABLE_NEON)
+-    add_extra_compiler_option("-mfpu=neon")
++    add_extra_compiler_option("-mfpu=neon-fp16")
+   endif()
+   if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
+     add_extra_compiler_option("-mfpu=vfpv3")
+@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
+   add_extra_compiler_option(-fvisibility-inlines-hidden)
+ endif()
+ 
+-if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
+-  if(ARM AND ENABLE_NEON)
+-    set(FP16_OPTION "-mfpu=neon-fp16")
+-  elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
+-    set(FP16_OPTION "-mf16c")
+-  endif()
+-  try_compile(__VALID_FP16
+-    "${OpenCV_BINARY_DIR}"
+-    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
+-    COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
+-    OUTPUT_VARIABLE TRY_OUT
+-    )
+-  if(NOT __VALID_FP16)
+-    if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
+-      # GCC enables AVX when mf16c is passed
+-      message(STATUS "FP16: Feature disabled")
+-    else()
+-      message(STATUS "FP16: Compiler support is not available")
+-    endif()
+-  else()
+-    message(STATUS "FP16: Compiler support is available")
+-    set(HAVE_FP16 1)
+-    if(NOT ${FP16_OPTION} STREQUAL "")
+-      add_extra_compiler_option(${FP16_OPTION})
+-    endif()
+-  endif()
+-endif()
+-
+ #combine all "extra" options
+ set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS         "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
+@@ -450,6 +422,21 @@ if(MSVC)
+   endif()
+ endif()
+ 
++if(NOT OPENCV_FP16_DISABLE)
++  try_compile(__VALID_FP16
++    "${OpenCV_BINARY_DIR}"
++    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
++    COMPILE_DEFINITIONS "-DCHECK_FP16"
++    OUTPUT_VARIABLE TRY_OUT
++    )
++  if(NOT __VALID_FP16)
++    message(STATUS "FP16: Compiler support is not available")
++  else()
++    message(STATUS "FP16: Compiler support is available")
++    set(HAVE_FP16 1)
++  endif()
++endif()
++
+ if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
+   link_directories("/usr/local/lib")
+ endif()
+diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
+index efc24ca..a10936b 100644
+--- a/modules/core/include/opencv2/core/cvdef.h
++++ b/modules/core/include/opencv2/core/cvdef.h
+@@ -312,7 +312,7 @@ enum CpuFeatures {
+ typedef union Cv16suf
+ {
+     short i;
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+     __fp16 h;
+ #endif
+     struct _fp16Format
+diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
+index e04d89e..46db26f 100644
+--- a/modules/core/src/convert.cpp
++++ b/modules/core/src/convert.cpp
+@@ -44,7 +44,6 @@
+ #include "precomp.hpp"
+ 
+ #include "opencl_kernels_core.hpp"
+-#include "opencv2/core/hal/intrin.hpp"
+ 
+ #include "opencv2/core/openvx/ovx_defs.hpp"
+ 
+@@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int>
+ 
+ #endif
+ 
+-#if !CV_FP16_TYPE
++#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
+ // const numbers for floating points format
+ const unsigned int kShiftSignificand    = 13;
+ const unsigned int kMaskFp16Significand = 0x3ff;
+@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent    = 15;
+ const unsigned int kBiasFp32Exponent    = 127;
+ #endif
+ 
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+ static float convertFp16SW(short fp16)
+ {
+     // Fp16 -> Fp32
+@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16)
+ }
+ #endif
+ 
+-#if CV_FP16_TYPE
++#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
+ static short convertFp16SW(float fp32)
+ {
+     // Fp32 -> Fp16
+@@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
+             if ( ( (intptr_t)dst & 0xf ) == 0 )
+ #endif
+             {
+-#if CV_FP16 && CV_SIMD128
++#if CV_FP16
+                 for ( ; x <= size.width - 4; x += 4)
+                 {
+                     v_float32x4 v_src = v_load(src + x);
+@@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
+             if ( ( (intptr_t)src & 0xf ) == 0 )
+ #endif
+             {
+-#if CV_FP16 && CV_SIMD128
++#if CV_FP16
+                 for ( ; x <= size.width - 4; x += 4)
+                 {
+                     v_float16x4 v_src = v_load_f16(src + x);
+diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
+index 66b2083..7349d48 100644
+--- a/modules/core/test/test_intrin.cpp
++++ b/modules/core/test/test_intrin.cpp
+@@ -729,56 +729,48 @@ template<typename R> struct TheTest
+         return *this;
+     }
+ 
++#if CV_FP16
+     TheTest & test_loadstore_fp16()
+     {
+-#if CV_FP16
+         AlignedData<R> data;
+         AlignedData<R> out;
+ 
+-        if(checkHardwareSupport(CV_CPU_FP16))
+-        {
+-            // check if addresses are aligned and unaligned respectively
+-            EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
+-            EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
+-            EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
+-            EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
+-
+-            // check some initialization methods
+-            R r1 = data.u;
+-            R r2 = v_load_f16(data.a.d);
+-            R r3(r2);
+-            EXPECT_EQ(data.u[0], r1.get0());
+-            EXPECT_EQ(data.a[0], r2.get0());
+-            EXPECT_EQ(data.a[0], r3.get0());
+-
+-            // check some store methods
+-            out.a.clear();
+-            v_store_f16(out.a.d, r1);
+-            EXPECT_EQ(data.a, out.a);
+-        }
++        // check if addresses are aligned and unaligned respectively
++        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
++        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
++        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
++        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
++
++        // check some initialization methods
++        R r1 = data.u;
++        R r2 = v_load_f16(data.a.d);
++        R r3(r2);
++        EXPECT_EQ(data.u[0], r1.get0());
++        EXPECT_EQ(data.a[0], r2.get0());
++        EXPECT_EQ(data.a[0], r3.get0());
++
++        // check some store methods
++        out.a.clear();
++        v_store_f16(out.a.d, r1);
++        EXPECT_EQ(data.a, out.a);
+ 
+         return *this;
+-#endif
+     }
+ 
+     TheTest & test_float_cvt_fp16()
+     {
+-#if CV_FP16
+         AlignedData<v_float32x4> data;
+ 
+-        if(checkHardwareSupport(CV_CPU_FP16))
+-        {
+-            // check conversion
+-            v_float32x4 r1 = v_load(data.a.d);
+-            v_float16x4 r2 = v_cvt_f16(r1);
+-            v_float32x4 r3 = v_cvt_f32(r2);
+-            EXPECT_EQ(0x3c00, r2.get0());
+-            EXPECT_EQ(r3.get0(), r1.get0());
+-        }
++        // check conversion
++        v_float32x4 r1 = v_load(data.a.d);
++        v_float16x4 r2 = v_cvt_f16(r1);
++        v_float32x4 r3 = v_cvt_f32(r2);
++        EXPECT_EQ(0x3c00, r2.get0());
++        EXPECT_EQ(r3.get0(), r1.get0());
+ 
+         return *this;
+-#endif
+     }
++#endif
+ 
+ };
+ 
+-- 
+2.9.3
+
diff --git a/meta-oe/recipes-support/opencv/opencv_3.2.bb b/meta-oe/recipes-support/opencv/opencv_3.2.bb
index 2cff212..98b6b06 100644
--- a/meta-oe/recipes-support/opencv/opencv_3.2.bb
+++ b/meta-oe/recipes-support/opencv/opencv_3.2.bb
@@ -27,6 +27,8 @@ SRC_URI = "git://github.com/opencv/opencv.git;name=opencv \
     file://fixpkgconfig.patch \
     file://uselocalxfeatures.patch;patchdir=../contrib/ \
     file://useoeprotobuf.patch;patchdir=../contrib/ \
+    file://0001-Revert-cuda-fix-fp16-compilation.patch \
+    file://0002-Revert-check-FP16-build-condition-correctly.patch \
 "
 
 PV = "3.2+git${SRCPV}"
-- 
2.9.3

     prev parent reply	other threads:[~2017-05-02 18:47 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-02 18:46 [meta-oe][PATCH 1/2] Revert "opencv: disable broken Intel FP16 detection" Randy MacLeod
2017-05-02 18:46 ` Randy MacLeod [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:507d796 dfblob:d1950a9 dfblob:2cff212 dfblob:98b6b06 )
 OR (
bs:"[meta-oe][PATCH 2/2] opencv: disable broken Intel FP16 detection" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170502184655.25127-2-Randy.MacLeod@windriver.com \
    --to=randy.macleod@windriver.com \
    --cc=openembedded-devel@lists.openembedded.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.