21
21
# SOFTWARE.
22
22
23
23
cmake_minimum_required (VERSION 3.0.2)
24
- project (NSIMD VERSION 2.2 LANGUAGES CXX)
24
+ project (NSIMD VERSION 3.0 LANGUAGES C CXX)
25
25
26
26
# -----------------------------------------------------------------------------
27
27
# First check that NSIMD code has been generated
@@ -62,8 +62,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
62
62
set (mapping_sve512 "/DSVE512" )
63
63
set (mapping_sve1024 "/DSVE1024" )
64
64
set (mapping_sve2048 "/DSVE2048" )
65
+ set (mapping_vmx "/DVMX" )
66
+ set (mapping_vsx "/DVSX" )
65
67
set (mapping_cuda "/DCUDA" )
66
68
set (mapping_rocm "/DROCM" )
69
+ set (mapping_oneapi "/ONEAPI" )
67
70
else ()
68
71
set (mapping_sse2 "-DSSE2;-msse2" )
69
72
set (mapping_sse42 "-DSSE42;-msse4.2" )
@@ -89,8 +92,11 @@ function(nsimd_get_compiler_argument simd_ext argument)
89
92
";-msve-vector-bits=1024" )
90
93
set (mapping_sve2048 "-DSVE2048 -march=armv8.2-a+sve"
91
94
";-msve-vector-bits=2048" )
95
+ set (mapping_vmx "-DVMX;-mcpu=powerpc64le;-maltivec" )
96
+ set (mapping_vsx "-DVSX;-mcpu=powerpc64le;-mvsx" )
92
97
set (mapping_cuda "-DCUDA" )
93
98
set (mapping_rocm "-DROCM" )
99
+ set (mapping_oneapi "-DONEAPI" )
94
100
endif ()
95
101
if (DEFINED mapping_${simd_ext} )
96
102
set (${argument} "${mapping_${simd_ext} }" PARENT_SCOPE)
@@ -111,51 +117,109 @@ nsimd_get_compiler_argument(${simd} NSIMD_COMPILATION_OPTIONS)
111
117
# -----------------------------------------------------------------------------
112
118
# Object file selection
113
119
114
- set (NSIMD_OBJS "fp16;memory;ulps; api_cpu" )
120
+ set (NSIMD_OBJS "fp16;gpu; memory;api_cpu;rempitab;sleefsp;sleefdp " )
115
121
116
122
if ("${simd} " STREQUAL "sse2" )
117
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2" )
123
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;sleef_sse2_f32;sleef_sse2_f64 " )
118
124
elseif ("${simd} " STREQUAL "sse42" )
119
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42" )
125
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;"
126
+ "sleef_sse2_f32;sleef_sse2_f64;"
127
+ "sleef_sse42_f32;sleef_sse42_f64" )
120
128
elseif ("${simd} " STREQUAL "avx" )
121
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx" )
129
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;"
130
+ "sleef_sse2_f32;sleef_sse2_f64;"
131
+ "sleef_sse42_f32;sleef_sse42_f64;"
132
+ "sleef_avx_f32;sleef_avx_f64" )
122
133
elseif ("${simd} " STREQUAL "avx2" )
123
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;api_avx2" )
134
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;api_avx2;"
135
+ "sleef_sse2_f32;sleef_sse2_f64;"
136
+ "sleef_sse42_f32;sleef_sse42_f64;"
137
+ "sleef_avx_f32;sleef_avx_f64;"
138
+ "sleef_avx2_f32;sleef_avx2_f64" )
124
139
elseif ("${simd} " STREQUAL "avx512_knl" )
125
140
set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;api_avx2"
126
- ";api_avx512_knl" )
141
+ "sleef_sse2_f32;sleef_sse2_f64;"
142
+ "sleef_sse42_f32;sleef_sse42_f64;"
143
+ "sleef_avx_f32;sleef_avx_f64;"
144
+ "sleef_avx2_f32;sleef_avx2_f64;"
145
+ "api_avx512_knl;sleef_avx512_knl_f32;sleef_avx512_knl_f64" )
127
146
elseif ("${simd} " STREQUAL "avx512_skylake" )
128
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;api_avx2"
129
- ";api_avx512_skylake" )
147
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_sse2;api_sse42;api_avx;api_avx2;"
148
+ "api_avx512_skylake;sleef_avx512_skylake_f32;"
149
+ "sleef_sse2_f32;sleef_sse2_f64;"
150
+ "sleef_sse42_f32;sleef_sse42_f64;"
151
+ "sleef_avx_f32;sleef_avx_f64;"
152
+ "sleef_avx2_f32;sleef_avx2_f64;"
153
+ "sleef_avx512_skylake_f64" )
130
154
elseif ("${simd} " STREQUAL "neon128" )
131
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_neon128" )
155
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_neon128;"
156
+ "sleef_neon128_f32;sleef_neon128_f64" )
132
157
elseif ("${simd} " STREQUAL "aarch64" )
133
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64" )
158
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;"
159
+ "sleef_aarch64_f32;sleef_aarch64_f64" )
134
160
elseif ("${simd} " STREQUAL "sve" )
135
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve" )
161
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve;"
162
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
163
+ "sleef_sve_f32;sleef_sve_f64" )
136
164
elseif ("${simd} " STREQUAL "sve128" )
137
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve128" )
165
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve128;"
166
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
167
+ "sleef_sve128_f32;sleef_sve128_f64" )
138
168
elseif ("${simd} " STREQUAL "sve256" )
139
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve256" )
169
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve256;"
170
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
171
+ "sleef_sve256_f32;sleef_sve256_f64" )
140
172
elseif ("${simd} " STREQUAL "sve512" )
141
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve512" )
173
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve512;"
174
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
175
+ "sleef_sve512_f32;sleef_sve512_f64" )
142
176
elseif ("${simd} " STREQUAL "sve1024" )
143
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve1024" )
177
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve1024;"
178
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
179
+ "sleef_sve1024_f32;sleef_sve1024_f64" )
144
180
elseif ("${simd} " STREQUAL "sve2048" )
145
- set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve2048" )
181
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_aarch64;api_sve2048;"
182
+ "sleef_aarch64_f32;sleef_aarch64_f64;"
183
+ "sleef_sve2048_f32;sleef_sve2048_f64" )
184
+ elseif ("${simd} " STREQUAL "vmx" )
185
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_vmx;sleef_vmx_f32;sleef_vmx_f64" )
186
+ elseif ("${simd} " STREQUAL "vsx" )
187
+ set (NSIMD_OBJS "${NSIMD_OBJS} ;api_vmx;api_vsx;sleef_vmx_f32;sleef_vmx_f64;"
188
+ "sleef_vsx_f32;sleef_vmx_f64" )
146
189
endif ()
147
190
148
191
# -----------------------------------------------------------------------------
149
192
# Rules for building the library
150
193
151
194
set (NSIMD_LIB_DEPS "" )
152
195
foreach (o ${NSIMD_OBJS} )
153
- add_library (${o} OBJECT src/${o} .cpp)
196
+ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR} /src/${o} .cpp" )
197
+ add_library (${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR} /src/${o} .cpp" )
198
+ elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR} /src/${o} .c" )
199
+ add_library (${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR} /src/${o} .c" )
200
+ elseif (("${o} " STREQUAL "sleef_neon128_f64" ) OR
201
+ ("${o} " STREQUAL "sleef_vmx_f64" ))
202
+ add_library (${o} OBJECT
203
+ "${CMAKE_CURRENT_SOURCE_DIR} /src/sleefsimddp_emulation.c" )
204
+ elseif ("${o} " STREQUAL "sleef_vmx_f32" )
205
+ add_library (${o} OBJECT
206
+ "${CMAKE_CURRENT_SOURCE_DIR} /src/sleefsimdsp_emulation.c" )
207
+ elseif (o MATCHES "sleef_.*_f32" )
208
+ add_library (${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR} /src/sleefsimdsp.c" )
209
+ elseif (o MATCHES "sleef_.*_f64" )
210
+ add_library (${o} OBJECT "${CMAKE_CURRENT_SOURCE_DIR} /src/sleefsimddp.c" )
211
+ endif ()
212
+ if (MSVC )
213
+ set (sleef_cflags "/DNDEBUG;/DDORENAME=1" )
214
+ else ()
215
+ set (sleef_cflags "-DNDEBUG;-DDORENAME=1" )
216
+ endif ()
154
217
set_property (TARGET ${o} PROPERTY POSITION_INDEPENDENT_CODE ON )
155
218
target_include_directories (${o} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR} /include" )
156
219
if (MSVC )
157
220
target_compile_definitions (${o} PUBLIC "/D_CRT_SECURE_NO_WARNINGS" )
158
221
endif ()
222
+ set (buf "" )
159
223
if ("${o} " STREQUAL "api_sse2" )
160
224
nsimd_get_compiler_argument("sse2" buf)
161
225
elseif ("${o} " STREQUAL "api_sse42" )
@@ -184,15 +248,75 @@ foreach(o ${NSIMD_OBJS})
184
248
nsimd_get_compiler_argument("sve1024" buf)
185
249
elseif ("${o} " STREQUAL "api_sve2048" )
186
250
nsimd_get_compiler_argument("sve2048" buf)
251
+ elseif ("${o} " STREQUAL "api_vmx" )
252
+ nsimd_get_compiler_argument("vmx" buf)
253
+ elseif ("${o} " STREQUAL "api_vsx" )
254
+ nsimd_get_compiler_argument("vsx" buf)
187
255
elseif ("${o} " STREQUAL "api_cuda" )
188
256
nsimd_get_compiler_argument("cuda" buf)
189
257
elseif ("${o} " STREQUAL "api_rocm" )
190
258
nsimd_get_compiler_argument("rocm" buf)
191
259
elseif ("${o} " STREQUAL "api_cpu" )
192
260
nsimd_get_compiler_argument("cpu" buf)
261
+ elseif ("${o} " STREQUAL "rempitab" )
262
+ list (APPEND buf "${sleef_cflags} " )
263
+ elseif ("${o} " STREQUAL "sleefsp" )
264
+ list (APPEND buf "${sleef_cflags} " )
265
+ elseif ("${o} " STREQUAL "sleefdp" )
266
+ list (APPEND buf "${sleef_cflags} " )
267
+ elseif ("${o} " MATCHES "sleef_sse2_" )
268
+ nsimd_get_compiler_argument("sse2" buf)
269
+ list (APPEND buf "-DNSIMD_SSE2;-DENABLE_SSE2=1;${sleef_cflags} " )
270
+ elseif ("${o} " MATCHES "sleef_sse42_" )
271
+ nsimd_get_compiler_argument("sse42" buf)
272
+ list (APPEND buf "-DNSIMD_SSE42;-DENABLE_SSE4=1;${sleef_cflags} " )
273
+ elseif ("${o} " MATCHES "sleef_avx_" )
274
+ nsimd_get_compiler_argument("avx" buf)
275
+ list (APPEND buf "-DNSIMD_AVX;-DENABLE_AVX=1;${sleef_cflags} " )
276
+ elseif ("${o} " MATCHES "sleef_avx2_" )
277
+ nsimd_get_compiler_argument("avx2" buf)
278
+ list (APPEND buf "-DNSIMD_AVX2;-DENABLE_AVX2=1;${sleef_cflags} " )
279
+ elseif ("${o} " MATCHES "sleef_avx512_knl_" )
280
+ nsimd_get_compiler_argument("avx512_knl" buf)
281
+ list (APPEND buf "-DNSIMD_AVX512_KNL;-DENABLE_AVX512F=1;${sleef_cflags} " )
282
+ elseif ("${o} " MATCHES "sleef_avx512_skylake_" )
283
+ nsimd_get_compiler_argument("avx512_skylake" buf)
284
+ list (APPEND buf
285
+ "-DNSIMD_AVX512_SKYLAKE;-DENABLE_AVX512F=1;${sleef_cflags} " )
286
+ elseif ("${o} " MATCHES "sleef_neon128_" )
287
+ nsimd_get_compiler_argument("neon128" buf)
288
+ list (APPEND buf "-DNSIMD_NEON128;-DENABLE_NEON32=1;${sleef_cflags} " )
289
+ elseif ("${o} " MATCHES "sleef_aarch64_" )
290
+ nsimd_get_compiler_argument("aarch64" buf)
291
+ list (APPEND buf "-DNSIMD_AARCH64;-DENABLE_ADVSIMD=1;${sleef_cflags} " )
292
+ elseif ("${o} " MATCHES "sleef_sve_" )
293
+ nsimd_get_compiler_argument("sve" buf)
294
+ list (APPEND buf "-DNSIMD_SVE;-DENABLE_SVE=1;${sleef_cflags} " )
295
+ elseif ("${o} " MATCHES "sleef_sve128_" )
296
+ nsimd_get_compiler_argument("sve128" buf)
297
+ list (APPEND buf "-DNSIMD_SVE128;-DENABLE_SVE=1;${sleef_cflags} " )
298
+ elseif ("${o} " MATCHES "sleef_sve256_" )
299
+ nsimd_get_compiler_argument("sve256" buf)
300
+ list (APPEND buf "-DNSIMD_SVE256;-DENABLE_SVE=1;${sleef_cflags} " )
301
+ elseif ("${o} " MATCHES "sleef_sve512_" )
302
+ nsimd_get_compiler_argument("sve512" buf)
303
+ list (APPEND buf "-DNSIMD_SVE512;-DENABLE_SVE=1;${sleef_cflags} " )
304
+ elseif ("${o} " MATCHES "sleef_sve1024_" )
305
+ nsimd_get_compiler_argument("sve1024" buf)
306
+ list (APPEND buf "-DNSIMD_SVE1024;-DENABLE_SVE=1;${sleef_cflags} " )
307
+ elseif ("${o} " MATCHES "sleef_sve2048_" )
308
+ nsimd_get_compiler_argument("sve2048" buf)
309
+ list (APPEND buf "-DNSIMD_SVE2048;-DENABLE_SVE=1;${sleef_cflags} " )
310
+ elseif ("${o} " MATCHES "sleef_vmx_" )
311
+ nsimd_get_compiler_argument("vmx" buf)
312
+ list (APPEND buf "-DNSIMD_VMX;-DENABLE_VSX=1;${sleef_cflags} " )
313
+ elseif ("${o} " MATCHES "sleef_vsx_" )
314
+ nsimd_get_compiler_argument("vsx" buf)
315
+ list (APPEND buf "-DNSIMD_VSX;-DENABLE_VSX=1;${sleef_cflags} " )
193
316
else ()
194
317
set (buf "" )
195
318
endif ()
319
+ message (STATUS "DEBUG: ${o} --> ${buf} " )
196
320
if (NOT "${buf} " STREQUAL "" )
197
321
target_compile_options (${o} PUBLIC "${buf} " )
198
322
endif ()
0 commit comments