Skip to content

3.0-rc1 #105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 71 commits into from
Oct 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
32a3d47
Do some backup
Mar 7, 2021
dad5555
Do some backup
Mar 7, 2021
2a21e62
Do some backup
Mar 7, 2021
e36357e
Many fixes for C11 API
Mar 16, 2021
ea57cc7
Add support for C11 to tests
Mar 16, 2021
bcc489d
Rewrite pack.md
Mar 17, 2021
0faa4f4
Finish documentation on packs
Mar 17, 2021
9b89c33
Add += *= /= ... to C++ advanced API
Apr 6, 2021
47d8843
Forgot some files
Apr 6, 2021
8df5329
Add C++ advanced API aliases
Apr 6, 2021
f52d1e9
Git more files
Apr 7, 2021
b18d60b
Begin adding Sleef big math functions to NSIMD
Apr 11, 2021
6f8cb35
Add doc for +=, -=, ... and for function aliases
Apr 11, 2021
a1b3731
Backup
Apr 14, 2021
914161d
Backup before power cut
Apr 21, 2021
fa6f0bc
src/ulps.cpp
May 2, 2021
06b7300
More on ULPs
May 23, 2021
283d688
More on ULPs
May 25, 2021
6cb1bc7
Fixes in ULPs doc
May 29, 2021
bcfc597
finished with denormal in doc
May 29, 2021
b65952a
Add UFP related functions but no test for now
Jun 3, 2021
83eac95
Add support for scalar math functions
Jun 5, 2021
ce8a6ca
Add test for UFP
Jun 6, 2021
414bb1e
Integration of sine (ulp 3.5) from Sleef seems to work
Jun 8, 2021
ea5a28b
Backup
Jun 8, 2021
7cd9529
Compilation of all tests seems fine
Jun 9, 2021
eb7216a
Remove fixed_point module own operator list and make it use the centr…
Jun 19, 2021
f3f6129
Tests passed for CPU
Jun 20, 2021
b2039c0
It seems to work for Initel
Jun 21, 2021
e9fd654
New tests system
Jun 28, 2021
ed5847b
Add Sleef code
Jun 28, 2021
2855237
Add emulation layer for SIMD not supporting double
Jun 28, 2021
04ddbce
Fix support of SIMD emulation for doubles
Jun 29, 2021
720cb1c
Fix cmake build system
Jul 10, 2021
8344ac3
Improve andnot doc
Jul 10, 2021
da31868
Fix generation of Sleef files
Jul 10, 2021
13670ee
Replace SVE _z intrinsics by _x ones
Jul 11, 2021
e55575e
Fix SVE implementation
Jul 11, 2021
dc0877a
For backup: beginning of merging PPC from xberault
Jul 12, 2021
1dc0d7a
For backup: continuing merging PPC from xberault
Jul 14, 2021
3d53b39
More on VMX and VSX
Jul 15, 2021
9391233
Fix what_is_wrapped
Jul 16, 2021
043e821
Doc seems to OK
Jul 16, 2021
9a1d343
VMX compiles but with a lot of warnings
Jul 16, 2021
be7558f
Refurbishing implementation of platform ppc
Jul 19, 2021
9ff0178
Refurbishing implementation of platform ppc
Jul 20, 2021
7c14568
Refurbishing implementation of platform ppc
Jul 21, 2021
679a5b3
Refurbishing implementation of platform ppc
Jul 26, 2021
7346a83
Refurbishing implementation of platform ppc
Jul 26, 2021
30ff72e
Fixes for PPC
Jul 26, 2021
9709764
No compilation with Xlc
Jul 27, 2021
4ee4d92
Fix warnings when compiling C++/PPC code + update CONTRIBUTING with PPC
Jul 27, 2021
f23b348
Update documentation
Jul 27, 2021
457fd88
Many fixes to ppc
Jul 28, 2021
7a0081d
Fixes for VSX
Jul 29, 2021
a13ad38
More fixes for PPC
Aug 1, 2021
de4df5c
Fix PPC
Aug 2, 2021
2cd07bb
Test
Aug 2, 2021
5cc48ed
Fix README
Aug 2, 2021
038b230
Beginning integrating oneAPI
Aug 3, 2021
f79895e
Backup oneAPI implementation
Aug 5, 2021
9e2c227
Freeze for WASM
Aug 6, 2021
b591841
Fix oneAPI SPMD
Aug 11, 2021
b2c013a
All tests are ok for oneAPI
Aug 11, 2021
b4a9068
Fix various things
Aug 12, 2021
5a2e067
Fixes for CUDA
Sep 8, 2021
a54075e
Fix for ROCm
Sep 15, 2021
93ddf6e
Add contributors
Sep 16, 2021
483d7b0
Update README
Sep 29, 2021
c99cd2e
Backup from time to time
Oct 13, 2021
76ab114
Gather all before rc1
Oct 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 142 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# SOFTWARE.

cmake_minimum_required(VERSION 3.0.2)
project(NSIMD VERSION 2.2 LANGUAGES CXX)
project(NSIMD VERSION 3.0 LANGUAGES C CXX)

# -----------------------------------------------------------------------------
# First check that NSIMD code has been generated
Expand Down Expand Up @@ -62,8 +62,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
set(mapping_sve512 "/DSVE512")
set(mapping_sve1024 "/DSVE1024")
set(mapping_sve2048 "/DSVE2048")
set(mapping_vmx "/DVMX")
set(mapping_vsx "/DVSX")
set(mapping_cuda "/DCUDA")
set(mapping_rocm "/DROCM")
set(mapping_oneapi "/ONEAPI")
else()
set(mapping_sse2 "-DSSE2;-msse2" )
set(mapping_sse42 "-DSSE42;-msse4.2" )
Expand All @@ -89,8 +92,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
";-msve-vector-bits=1024")
set(mapping_sve2048 "-DSVE2048 -march=armv8.2-a+sve"
";-msve-vector-bits=2048")
set(mapping_vmx "-DVMX;-mcpu=powerpc64le;-maltivec")
set(mapping_vsx "-DVSX;-mcpu=powerpc64le;-mvsx")
set(mapping_cuda "-DCUDA")
set(mapping_rocm "-DROCM")
set(mapping_oneapi "-DONEAPI")
endif()
if (DEFINED mapping_${simd_ext})
set(${argument} "${mapping_${simd_ext}}" PARENT_SCOPE)
Expand All @@ -111,51 +117,109 @@ nsimd_get_compiler_argument(${simd} NSIMD_COMPILATION_OPTIONS)
# -----------------------------------------------------------------------------
# Object file selection

set(NSIMD_OBJS "fp16;memory;ulps;api_cpu")
set(NSIMD_OBJS "fp16;gpu;memory;api_cpu;rempitab;sleefsp;sleefdp")

if ("${simd}" STREQUAL "sse2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;sleef_sse2_f32;sleef_sse2_f64")
elseif ("${simd}" STREQUAL "sse42")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64")
elseif ("${simd}" STREQUAL "avx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64")
elseif ("${simd}" STREQUAL "avx2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64")
elseif ("${simd}" STREQUAL "avx512_knl")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
";api_avx512_knl")
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64;"
"api_avx512_knl;sleef_avx512_knl_f32;sleef_avx512_knl_f64")
elseif ("${simd}" STREQUAL "avx512_skylake")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
";api_avx512_skylake")
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
"api_avx512_skylake;sleef_avx512_skylake_f32;"
"sleef_sse2_f32;sleef_sse2_f64;"
"sleef_sse42_f32;sleef_sse42_f64;"
"sleef_avx_f32;sleef_avx_f64;"
"sleef_avx2_f32;sleef_avx2_f64;"
"sleef_avx512_skylake_f64")
elseif ("${simd}" STREQUAL "neon128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128;"
"sleef_neon128_f32;sleef_neon128_f64")
elseif ("${simd}" STREQUAL "aarch64")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;"
"sleef_aarch64_f32;sleef_aarch64_f64")
elseif ("${simd}" STREQUAL "sve")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve_f32;sleef_sve_f64")
elseif ("${simd}" STREQUAL "sve128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve128_f32;sleef_sve128_f64")
elseif ("${simd}" STREQUAL "sve256")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve256_f32;sleef_sve256_f64")
elseif ("${simd}" STREQUAL "sve512")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve512_f32;sleef_sve512_f64")
elseif ("${simd}" STREQUAL "sve1024")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve1024_f32;sleef_sve1024_f64")
elseif ("${simd}" STREQUAL "sve2048")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048")
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048;"
"sleef_aarch64_f32;sleef_aarch64_f64;"
"sleef_sve2048_f32;sleef_sve2048_f64")
elseif ("${simd}" STREQUAL "vmx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;sleef_vmx_f32;sleef_vmx_f64")
elseif ("${simd}" STREQUAL "vsx")
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;api_vsx;sleef_vmx_f32;sleef_vmx_f64;"
"sleef_vsx_f32;sleef_vmx_f64")
endif()

# -----------------------------------------------------------------------------
# Rules for building the library

set(NSIMD_LIB_DEPS "")
foreach(o ${NSIMD_OBJS})
add_library(${o} OBJECT src/${o}.cpp)
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
elseif(("${o}" STREQUAL "sleef_neon128_f64") OR
("${o}" STREQUAL "sleef_vmx_f64"))
add_library(${o} OBJECT
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp_emulation.c")
elseif("${o}" STREQUAL "sleef_vmx_f32")
add_library(${o} OBJECT
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp_emulation.c")
elseif(o MATCHES "sleef_.*_f32")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp.c")
elseif(o MATCHES "sleef_.*_f64")
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp.c")
endif()
if (MSVC)
set(sleef_cflags "/DNDEBUG;/DDORENAME=1")
else()
set(sleef_cflags "-DNDEBUG;-DDORENAME=1")
endif()
set_property(TARGET ${o} PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(${o} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
if (MSVC)
target_compile_definitions(${o} PUBLIC "/D_CRT_SECURE_NO_WARNINGS")
endif()
set(buf "")
if ("${o}" STREQUAL "api_sse2")
nsimd_get_compiler_argument("sse2" buf)
elseif ("${o}" STREQUAL "api_sse42")
Expand Down Expand Up @@ -184,15 +248,75 @@ foreach(o ${NSIMD_OBJS})
nsimd_get_compiler_argument("sve1024" buf)
elseif ("${o}" STREQUAL "api_sve2048")
nsimd_get_compiler_argument("sve2048" buf)
elseif ("${o}" STREQUAL "api_vmx")
nsimd_get_compiler_argument("vmx" buf)
elseif ("${o}" STREQUAL "api_vsx")
nsimd_get_compiler_argument("vsx" buf)
elseif ("${o}" STREQUAL "api_cuda")
nsimd_get_compiler_argument("cuda" buf)
elseif ("${o}" STREQUAL "api_rocm")
nsimd_get_compiler_argument("rocm" buf)
elseif ("${o}" STREQUAL "api_cpu")
nsimd_get_compiler_argument("cpu" buf)
elseif ("${o}" STREQUAL "rempitab")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" STREQUAL "sleefsp")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" STREQUAL "sleefdp")
list(APPEND buf "${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sse2_")
nsimd_get_compiler_argument("sse2" buf)
list(APPEND buf "-DNSIMD_SSE2;-DENABLE_SSE2=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sse42_")
nsimd_get_compiler_argument("sse42" buf)
list(APPEND buf "-DNSIMD_SSE42;-DENABLE_SSE4=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx_")
nsimd_get_compiler_argument("avx" buf)
list(APPEND buf "-DNSIMD_AVX;-DENABLE_AVX=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx2_")
nsimd_get_compiler_argument("avx2" buf)
list(APPEND buf "-DNSIMD_AVX2;-DENABLE_AVX2=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx512_knl_")
nsimd_get_compiler_argument("avx512_knl" buf)
list(APPEND buf "-DNSIMD_AVX512_KNL;-DENABLE_AVX512F=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_avx512_skylake_")
nsimd_get_compiler_argument("avx512_skylake" buf)
list(APPEND buf
"-DNSIMD_AVX512_SKYLAKE;-DENABLE_AVX512F=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_neon128_")
nsimd_get_compiler_argument("neon128" buf)
list(APPEND buf "-DNSIMD_NEON128;-DENABLE_NEON32=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_aarch64_")
nsimd_get_compiler_argument("aarch64" buf)
list(APPEND buf "-DNSIMD_AARCH64;-DENABLE_ADVSIMD=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve_")
nsimd_get_compiler_argument("sve" buf)
list(APPEND buf "-DNSIMD_SVE;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve128_")
nsimd_get_compiler_argument("sve128" buf)
list(APPEND buf "-DNSIMD_SVE128;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve256_")
nsimd_get_compiler_argument("sve256" buf)
list(APPEND buf "-DNSIMD_SVE256;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve512_")
nsimd_get_compiler_argument("sve512" buf)
list(APPEND buf "-DNSIMD_SVE512;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve1024_")
nsimd_get_compiler_argument("sve1024" buf)
list(APPEND buf "-DNSIMD_SVE1024;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_sve2048_")
nsimd_get_compiler_argument("sve2048" buf)
list(APPEND buf "-DNSIMD_SVE2048;-DENABLE_SVE=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_vmx_")
nsimd_get_compiler_argument("vmx" buf)
list(APPEND buf "-DNSIMD_VMX;-DENABLE_VSX=1;${sleef_cflags}")
elseif ("${o}" MATCHES "sleef_vsx_")
nsimd_get_compiler_argument("vsx" buf)
list(APPEND buf "-DNSIMD_VSX;-DENABLE_VSX=1;${sleef_cflags}")
else()
set(buf "")
endif()
message(STATUS "DEBUG: ${o} --> ${buf}")
if (NOT "${buf}" STREQUAL "")
target_compile_options(${o} PUBLIC "${buf}")
endif()
Expand Down
Loading