Skip to content

Commit 9d4b825

Browse files
gquintingquintin
and
gquintin
authored
3.0-rc1 (#105)
* Do some backup * Do some backup * Do some backup * Many fixes for C11 API * Add support for C11 to tests * Rewrite pack.md * Finish documentation on packs * Add += *= /= ... to C++ advanced API * Forgot some files * Add C++ advanced API aliases * Git more files * Begin adding Sleef big math functions to NSIMD * Add doc for +=, -=, ... and for function aliases * Backup * Backup before power cut * src/ulps.cpp * More on ULPs * More on ULPs * Fixes in ULPs doc * finished with denormal in doc * Add UFP related functions but no test for now * Add support for scalar math functions * Add test for UFP * Integration of sine (ulp 3.5) from Sleef seems to work * Backup * Compilation of all tests seems fine * Remove fixed_point module own operator list and make it use the central one * Tests passed for CPU * It seems to work for Initel * New tests system * Add Sleef code * Add emulation layer for SIMD not supporting double * Fix support of SIMD emulation for doubles * Fix cmake build system * Improve andnot doc * Fix generation of Sleef files * Replace SVE _z intrinsics by _x ones * Fix SVE implementation * For backup: beginning of merging PPC from xberault * For backup: continuing merging PPC from xberault * More on VMX and VSX * Fix what_is_wrapped * Doc seems to OK * VMX compiles but with a lot of warnings * Refurbishing implementation of platform ppc * Refurbishing implementation of platform ppc * Refurbishing implementation of platform ppc * Refurbishing implementation of platform ppc * Refurbishing implementation of platform ppc * Fixes for PPC * No compilation with Xlc * Fix warnings when compiling C++/PPC code + update CONTRIBUTING with PPC * Update documentation * Many fixes to ppc * Fixes for VSX * More fixes for PPC * Fix PPC * Test * Fix README * Beginning integrating oneAPI * Backup oneAPI implementation * Freeze for WASM * Fix oneAPI SPMD * All tests are ok for oneAPI * Fix various things * Fixes for CUDA * Fix for ROCm * Add contributors * Update README * Backup from time to time * Gather all before rc1 Co-authored-by: gquintin <[email protected]>
1 parent a50ce2f commit 9d4b825

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+35061
-5415
lines changed

CMakeLists.txt

+142-18
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# SOFTWARE.
2222

2323
cmake_minimum_required(VERSION 3.0.2)
24-
project(NSIMD VERSION 2.2 LANGUAGES CXX)
24+
project(NSIMD VERSION 3.0 LANGUAGES C CXX)
2525

2626
# -----------------------------------------------------------------------------
2727
# First check that NSIMD code has been generated
@@ -62,8 +62,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
6262
set(mapping_sve512 "/DSVE512")
6363
set(mapping_sve1024 "/DSVE1024")
6464
set(mapping_sve2048 "/DSVE2048")
65+
set(mapping_vmx "/DVMX")
66+
set(mapping_vsx "/DVSX")
6567
set(mapping_cuda "/DCUDA")
6668
set(mapping_rocm "/DROCM")
69+
set(mapping_oneapi "/ONEAPI")
6770
else()
6871
set(mapping_sse2 "-DSSE2;-msse2" )
6972
set(mapping_sse42 "-DSSE42;-msse4.2" )
@@ -89,8 +92,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
8992
";-msve-vector-bits=1024")
9093
set(mapping_sve2048 "-DSVE2048 -march=armv8.2-a+sve"
9194
";-msve-vector-bits=2048")
95+
set(mapping_vmx "-DVMX;-mcpu=powerpc64le;-maltivec")
96+
set(mapping_vsx "-DVSX;-mcpu=powerpc64le;-mvsx")
9297
set(mapping_cuda "-DCUDA")
9398
set(mapping_rocm "-DROCM")
99+
set(mapping_oneapi "-DONEAPI")
94100
endif()
95101
if (DEFINED mapping_${simd_ext})
96102
set(${argument} "${mapping_${simd_ext}}" PARENT_SCOPE)
@@ -111,51 +117,109 @@ nsimd_get_compiler_argument(${simd} NSIMD_COMPILATION_OPTIONS)
111117
# -----------------------------------------------------------------------------
112118
# Object file selection
113119

114-
set(NSIMD_OBJS "fp16;memory;ulps;api_cpu")
120+
set(NSIMD_OBJS "fp16;gpu;memory;api_cpu;rempitab;sleefsp;sleefdp")
115121

116122
if ("${simd}" STREQUAL "sse2")
117-
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2")
123+
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;sleef_sse2_f32;sleef_sse2_f64")
118124
elseif ("${simd}" STREQUAL "sse42")
119-
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42")
125+
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;"
126+
"sleef_sse2_f32;sleef_sse2_f64;"
127+
"sleef_sse42_f32;sleef_sse42_f64")
120128
elseif ("${simd}" STREQUAL "avx")
121-
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx")
129+
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;"
130+
"sleef_sse2_f32;sleef_sse2_f64;"
131+
"sleef_sse42_f32;sleef_sse42_f64;"
132+
"sleef_avx_f32;sleef_avx_f64")
122133
elseif ("${simd}" STREQUAL "avx2")
123-
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2")
134+
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
135+
"sleef_sse2_f32;sleef_sse2_f64;"
136+
"sleef_sse42_f32;sleef_sse42_f64;"
137+
"sleef_avx_f32;sleef_avx_f64;"
138+
"sleef_avx2_f32;sleef_avx2_f64")
124139
elseif ("${simd}" STREQUAL "avx512_knl")
125140
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
126-
";api_avx512_knl")
141+
"sleef_sse2_f32;sleef_sse2_f64;"
142+
"sleef_sse42_f32;sleef_sse42_f64;"
143+
"sleef_avx_f32;sleef_avx_f64;"
144+
"sleef_avx2_f32;sleef_avx2_f64;"
145+
"api_avx512_knl;sleef_avx512_knl_f32;sleef_avx512_knl_f64")
127146
elseif ("${simd}" STREQUAL "avx512_skylake")
128-
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2"
129-
";api_avx512_skylake")
147+
set(NSIMD_OBJS "${NSIMD_OBJS};api_sse2;api_sse42;api_avx;api_avx2;"
148+
"api_avx512_skylake;sleef_avx512_skylake_f32;"
149+
"sleef_sse2_f32;sleef_sse2_f64;"
150+
"sleef_sse42_f32;sleef_sse42_f64;"
151+
"sleef_avx_f32;sleef_avx_f64;"
152+
"sleef_avx2_f32;sleef_avx2_f64;"
153+
"sleef_avx512_skylake_f64")
130154
elseif ("${simd}" STREQUAL "neon128")
131-
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128")
155+
set(NSIMD_OBJS "${NSIMD_OBJS};api_neon128;"
156+
"sleef_neon128_f32;sleef_neon128_f64")
132157
elseif ("${simd}" STREQUAL "aarch64")
133-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64")
158+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;"
159+
"sleef_aarch64_f32;sleef_aarch64_f64")
134160
elseif ("${simd}" STREQUAL "sve")
135-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve")
161+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve;"
162+
"sleef_aarch64_f32;sleef_aarch64_f64;"
163+
"sleef_sve_f32;sleef_sve_f64")
136164
elseif ("${simd}" STREQUAL "sve128")
137-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128")
165+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve128;"
166+
"sleef_aarch64_f32;sleef_aarch64_f64;"
167+
"sleef_sve128_f32;sleef_sve128_f64")
138168
elseif ("${simd}" STREQUAL "sve256")
139-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256")
169+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve256;"
170+
"sleef_aarch64_f32;sleef_aarch64_f64;"
171+
"sleef_sve256_f32;sleef_sve256_f64")
140172
elseif ("${simd}" STREQUAL "sve512")
141-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512")
173+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve512;"
174+
"sleef_aarch64_f32;sleef_aarch64_f64;"
175+
"sleef_sve512_f32;sleef_sve512_f64")
142176
elseif ("${simd}" STREQUAL "sve1024")
143-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024")
177+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve1024;"
178+
"sleef_aarch64_f32;sleef_aarch64_f64;"
179+
"sleef_sve1024_f32;sleef_sve1024_f64")
144180
elseif ("${simd}" STREQUAL "sve2048")
145-
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048")
181+
set(NSIMD_OBJS "${NSIMD_OBJS};api_aarch64;api_sve2048;"
182+
"sleef_aarch64_f32;sleef_aarch64_f64;"
183+
"sleef_sve2048_f32;sleef_sve2048_f64")
184+
elseif ("${simd}" STREQUAL "vmx")
185+
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;sleef_vmx_f32;sleef_vmx_f64")
186+
elseif ("${simd}" STREQUAL "vsx")
187+
set(NSIMD_OBJS "${NSIMD_OBJS};api_vmx;api_vsx;sleef_vmx_f32;sleef_vmx_f64;"
188+
"sleef_vsx_f32;sleef_vmx_f64")
146189
endif()
147190

148191
# -----------------------------------------------------------------------------
149192
# Rules for building the library
150193

151194
set(NSIMD_LIB_DEPS "")
152195
foreach(o ${NSIMD_OBJS})
153-
add_library(${o} OBJECT src/${o}.cpp)
196+
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
197+
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.cpp")
198+
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
199+
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/${o}.c")
200+
elseif(("${o}" STREQUAL "sleef_neon128_f64") OR
201+
("${o}" STREQUAL "sleef_vmx_f64"))
202+
add_library(${o} OBJECT
203+
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp_emulation.c")
204+
elseif("${o}" STREQUAL "sleef_vmx_f32")
205+
add_library(${o} OBJECT
206+
"${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp_emulation.c")
207+
elseif(o MATCHES "sleef_.*_f32")
208+
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimdsp.c")
209+
elseif(o MATCHES "sleef_.*_f64")
210+
add_library(${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/src/sleefsimddp.c")
211+
endif()
212+
if (MSVC)
213+
set(sleef_cflags "/DNDEBUG;/DDORENAME=1")
214+
else()
215+
set(sleef_cflags "-DNDEBUG;-DDORENAME=1")
216+
endif()
154217
set_property(TARGET ${o} PROPERTY POSITION_INDEPENDENT_CODE ON)
155218
target_include_directories(${o} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
156219
if (MSVC)
157220
target_compile_definitions(${o} PUBLIC "/D_CRT_SECURE_NO_WARNINGS")
158221
endif()
222+
set(buf "")
159223
if ("${o}" STREQUAL "api_sse2")
160224
nsimd_get_compiler_argument("sse2" buf)
161225
elseif ("${o}" STREQUAL "api_sse42")
@@ -184,15 +248,75 @@ foreach(o ${NSIMD_OBJS})
184248
nsimd_get_compiler_argument("sve1024" buf)
185249
elseif ("${o}" STREQUAL "api_sve2048")
186250
nsimd_get_compiler_argument("sve2048" buf)
251+
elseif ("${o}" STREQUAL "api_vmx")
252+
nsimd_get_compiler_argument("vmx" buf)
253+
elseif ("${o}" STREQUAL "api_vsx")
254+
nsimd_get_compiler_argument("vsx" buf)
187255
elseif ("${o}" STREQUAL "api_cuda")
188256
nsimd_get_compiler_argument("cuda" buf)
189257
elseif ("${o}" STREQUAL "api_rocm")
190258
nsimd_get_compiler_argument("rocm" buf)
191259
elseif ("${o}" STREQUAL "api_cpu")
192260
nsimd_get_compiler_argument("cpu" buf)
261+
elseif ("${o}" STREQUAL "rempitab")
262+
list(APPEND buf "${sleef_cflags}")
263+
elseif ("${o}" STREQUAL "sleefsp")
264+
list(APPEND buf "${sleef_cflags}")
265+
elseif ("${o}" STREQUAL "sleefdp")
266+
list(APPEND buf "${sleef_cflags}")
267+
elseif ("${o}" MATCHES "sleef_sse2_")
268+
nsimd_get_compiler_argument("sse2" buf)
269+
list(APPEND buf "-DNSIMD_SSE2;-DENABLE_SSE2=1;${sleef_cflags}")
270+
elseif ("${o}" MATCHES "sleef_sse42_")
271+
nsimd_get_compiler_argument("sse42" buf)
272+
list(APPEND buf "-DNSIMD_SSE42;-DENABLE_SSE4=1;${sleef_cflags}")
273+
elseif ("${o}" MATCHES "sleef_avx_")
274+
nsimd_get_compiler_argument("avx" buf)
275+
list(APPEND buf "-DNSIMD_AVX;-DENABLE_AVX=1;${sleef_cflags}")
276+
elseif ("${o}" MATCHES "sleef_avx2_")
277+
nsimd_get_compiler_argument("avx2" buf)
278+
list(APPEND buf "-DNSIMD_AVX2;-DENABLE_AVX2=1;${sleef_cflags}")
279+
elseif ("${o}" MATCHES "sleef_avx512_knl_")
280+
nsimd_get_compiler_argument("avx512_knl" buf)
281+
list(APPEND buf "-DNSIMD_AVX512_KNL;-DENABLE_AVX512F=1;${sleef_cflags}")
282+
elseif ("${o}" MATCHES "sleef_avx512_skylake_")
283+
nsimd_get_compiler_argument("avx512_skylake" buf)
284+
list(APPEND buf
285+
"-DNSIMD_AVX512_SKYLAKE;-DENABLE_AVX512F=1;${sleef_cflags}")
286+
elseif ("${o}" MATCHES "sleef_neon128_")
287+
nsimd_get_compiler_argument("neon128" buf)
288+
list(APPEND buf "-DNSIMD_NEON128;-DENABLE_NEON32=1;${sleef_cflags}")
289+
elseif ("${o}" MATCHES "sleef_aarch64_")
290+
nsimd_get_compiler_argument("aarch64" buf)
291+
list(APPEND buf "-DNSIMD_AARCH64;-DENABLE_ADVSIMD=1;${sleef_cflags}")
292+
elseif ("${o}" MATCHES "sleef_sve_")
293+
nsimd_get_compiler_argument("sve" buf)
294+
list(APPEND buf "-DNSIMD_SVE;-DENABLE_SVE=1;${sleef_cflags}")
295+
elseif ("${o}" MATCHES "sleef_sve128_")
296+
nsimd_get_compiler_argument("sve128" buf)
297+
list(APPEND buf "-DNSIMD_SVE128;-DENABLE_SVE=1;${sleef_cflags}")
298+
elseif ("${o}" MATCHES "sleef_sve256_")
299+
nsimd_get_compiler_argument("sve256" buf)
300+
list(APPEND buf "-DNSIMD_SVE256;-DENABLE_SVE=1;${sleef_cflags}")
301+
elseif ("${o}" MATCHES "sleef_sve512_")
302+
nsimd_get_compiler_argument("sve512" buf)
303+
list(APPEND buf "-DNSIMD_SVE512;-DENABLE_SVE=1;${sleef_cflags}")
304+
elseif ("${o}" MATCHES "sleef_sve1024_")
305+
nsimd_get_compiler_argument("sve1024" buf)
306+
list(APPEND buf "-DNSIMD_SVE1024;-DENABLE_SVE=1;${sleef_cflags}")
307+
elseif ("${o}" MATCHES "sleef_sve2048_")
308+
nsimd_get_compiler_argument("sve2048" buf)
309+
list(APPEND buf "-DNSIMD_SVE2048;-DENABLE_SVE=1;${sleef_cflags}")
310+
elseif ("${o}" MATCHES "sleef_vmx_")
311+
nsimd_get_compiler_argument("vmx" buf)
312+
list(APPEND buf "-DNSIMD_VMX;-DENABLE_VSX=1;${sleef_cflags}")
313+
elseif ("${o}" MATCHES "sleef_vsx_")
314+
nsimd_get_compiler_argument("vsx" buf)
315+
list(APPEND buf "-DNSIMD_VSX;-DENABLE_VSX=1;${sleef_cflags}")
193316
else()
194317
set(buf "")
195318
endif()
319+
message(STATUS "DEBUG: ${o} --> ${buf}")
196320
if (NOT "${buf}" STREQUAL "")
197321
target_compile_options(${o} PUBLIC "${buf}")
198322
endif()

0 commit comments

Comments
 (0)