Skip to content

Commit 0672a56

Browse files
committed
Configure with parameters for specific CPUs
And remove fft_tuning files.
1 parent e6c2d76 commit 0672a56

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+440
-395
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
src/fmpz/fmpz.c
1313
src/config.h
1414
src/config.h.in
15-
src/fft_tuning.h
1615
src/flint.h
1716
src/flint-config.h
1817
src/gmpcompat.h
@@ -64,3 +63,4 @@ libtool
6463
flint.pc
6564
autom4te.cache/
6665
config.m4
66+
src/flint-mparam.h

CMakeLists.txt

+3-9
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ string(REGEX REPLACE "([A-Za-z0-9_-]+;|[A-Za-z0-9_-]+$)" "src/\\1" BUILD_DIRS "$
274274
# NOTE: Template directories are not supposed to be used.
275275

276276
set(_HEADERS
277-
NTL-interface.h flint.h longlong.h flint-config.h gmpcompat.h fft_tuning.h
277+
NTL-interface.h flint.h longlong.h flint-config.h gmpcompat.h flint-mparam.h
278278
profiler.h templates.h mpf-impl.h
279279
)
280280
string(REGEX REPLACE "([A-Za-z0-9_-]+\.h;|[A-Za-z0-9_-]+\.h$)" "src/\\1" HEADERS "${_HEADERS}")
@@ -352,16 +352,10 @@ endif()
352352

353353
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
354354
configure_file(
355-
${CMAKE_CURRENT_SOURCE_DIR}/src/fft_tuning64.in
356-
${CMAKE_CURRENT_SOURCE_DIR}/src/fft_tuning.h
355+
${CMAKE_CURRENT_SOURCE_DIR}/src/mpn_extras/generic/flint-mparam.h
356+
${CMAKE_CURRENT_SOURCE_DIR}/src/flint-mparam.h
357357
COPYONLY
358358
)
359-
elseif(CMAKE_SIZEOF_VOID_P EQUAL 4)
360-
configure_file(
361-
${CMAKE_CURRENT_SOURCE_DIR}/src/fft_tuning32.in
362-
${CMAKE_CURRENT_SOURCE_DIR}/src/fft_tuning.h
363-
COPYONLY
364-
)
365359
endif()
366360

367361

Makefile.in

+17-31
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,8 @@ WANT_DEPS:=@WANT_DEPS@
6868

6969
WANT_CXX:=@WANT_CXX@
7070

71-
WANT_ADX_ASSEMBLY:=@WANT_ADX_ASSEMBLY@
72-
WANT_ARMV8_ASSEMBLY:=@WANT_ARMV8_ASSEMBLY@
7371
WANT_ASSEMBLY:=@WANT_ASSEMBLY@
72+
ASM_PATH:=$(SRC_DIR)/mpn_extras/@ASM_PATH@
7473

7574
GMP_LIB_PATH:=@GMP_LIB_PATH@
7675
MPFR_LIB_PATH:=@MPFR_LIB_PATH@
@@ -136,9 +135,9 @@ endif
136135
CFG_FILES := \
137136
$(FLINT_DIR)/config.h $(SRC_DIR)/flint-config.h \
138137
$(FLINT_DIR)/config.log $(SRC_DIR)/flint.h \
139-
$(SRC_DIR)/fft_tuning.h $(FLINT_DIR)/flint.pc \
138+
$(SRC_DIR)/gmpcompat.h $(FLINT_DIR)/flint.pc \
140139
$(FLINT_DIR)/Makefile $(SRC_DIR)/fmpz/fmpz.c \
141-
$(SRC_DIR)/gmpcompat.h $(SRC_DIR)/config.m4
140+
$(SRC_DIR)/config.m4 $(SRC_DIR)/flint-mparam.h
142141

143142
################################################################################
144143
# directories
@@ -233,13 +232,9 @@ ifneq ($(COVERAGE), 0)
233232
BUILD_DIRS += \
234233
$(BUILD_DIR)/coverage
235234
endif
236-
ifeq ($(WANT_ADX_ASSEMBLY),1)
237-
BUILD_DIRS += \
238-
$(BUILD_DIR)/mpn_extras/broadwell
239-
endif
240-
ifeq ($(WANT_ARMV8_ASSEMBLY),1)
235+
ifeq ($(WANT_ASSEMBLY),1)
241236
BUILD_DIRS += \
242-
$(BUILD_DIR)/mpn_extras/arm64
237+
$(patsubst $(SRC_DIR)/%,$(BUILD_DIR)/%,$(ASM_PATH))
243238
endif
244239

245240
INSTALL_DIRS := \
@@ -254,13 +249,16 @@ endif
254249
################################################################################
255250

256251
SINGLE_HEADERS := \
257-
flint.h flint-config.h gmpcompat.h \
258-
profiler.h templates.h fft_tuning.h \
252+
flint.h flint-config.h flint-mparam.h \
253+
\
254+
profiler.h templates.h \
259255
\
260256
longlong.h longlong_asm_clang.h \
261257
longlong_asm_gcc.h longlong_div_gnu.h \
262258
longlong_msc_arm64.h longlong_msc_x86.h \
263259
\
260+
gmpcompat.h \
261+
\
264262
crt_helpers.h machine_vectors.h \
265263
\
266264
mpf-impl.h fq_zech_vec.h \
@@ -296,12 +294,7 @@ fmpz_lll_SOURCES := $(filter-out $(SRC_DIR)/fmpz_lll/babai.c $(SRC_DIR)/fmpz_lll
296294
SOURCES := $(foreach dir,$(DIRS),$($(dir)_SOURCES))
297295

298296
ifeq ($(WANT_ASSEMBLY),1)
299-
ifeq ($(WANT_ADX_ASSEMBLY),1)
300-
mpn_extras_ASM_SOURCES := $(wildcard $(SRC_DIR)/mpn_extras/broadwell/*.asm)
301-
endif
302-
ifeq ($(WANT_ARMV8_ASSEMBLY),1)
303-
mpn_extras_ASM_SOURCES := $(wildcard $(SRC_DIR)/mpn_extras/arm64/*.asm)
304-
endif
297+
mpn_extras_ASM_SOURCES := $(wildcard $(ASM_PATH)/*.asm)
305298
mpn_extras_PIC_S_SOURCES := $(patsubst $(SRC_DIR)/%.asm,$(BUILD_DIR)/%_pic.s,$(mpn_extras_ASM_SOURCES))
306299
mpn_extras_S_SOURCES := $(patsubst $(SRC_DIR)/%.asm,$(BUILD_DIR)/%.s,$(mpn_extras_ASM_SOURCES))
307300
ifneq ($(SHARED), 0)
@@ -584,18 +577,11 @@ DEPFLAGS = -MMD -MP -MF $(@:%=%.d)
584577
# generated sources
585578
################################################################################
586579

587-
ifeq ($(WANT_ADX_ASSEMBLY),1)
588-
$(BUILD_DIR)/%.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 $(SRC_DIR)/mpn_extras/asm-defs.m4 $(SRC_DIR)/mpn_extras/broadwell/x86_64-defs.m4 | $(BUILD_DIR)/mpn_extras/broadwell
589-
@$(M4) $< > $@
590-
591-
$(BUILD_DIR)/%_pic.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 $(SRC_DIR)/mpn_extras/asm-defs.m4 $(SRC_DIR)/mpn_extras/broadwell/x86_64-defs.m4 | $(BUILD_DIR)/mpn_extras/broadwell
592-
@$(M4) -DPIC $< > $@
593-
endif
594-
ifeq ($(WANT_ARMV8_ASSEMBLY),1)
595-
$(BUILD_DIR)/%.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 $(SRC_DIR)/mpn_extras/asm-defs.m4 $(SRC_DIR)/mpn_extras/arm64/arm64-defs.m4 | $(BUILD_DIR)/mpn_extras/arm64
580+
ifeq ($(WANT_ASSEMBLY),1)
581+
$(BUILD_DIR)/%.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 | $(BUILD_DIR)/mpn_extras/@ASM_PATH@
596582
@$(M4) $< > $@
597583

598-
$(BUILD_DIR)/%_pic.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 $(SRC_DIR)/mpn_extras/asm-defs.m4 $(SRC_DIR)/mpn_extras/arm64/arm64-defs.m4 | $(BUILD_DIR)/mpn_extras/arm64
584+
$(BUILD_DIR)/%_pic.s: $(SRC_DIR)/%.asm $(FLINT_DIR)/config.m4 | $(BUILD_DIR)/mpn_extras/@ASM_PATH@
599585
@$(M4) -DPIC $< > $@
600586
endif
601587

@@ -966,15 +952,15 @@ src/flint.h: src/flint.h.in config.status
966952
libtool: config.status
967953
./config.status $@
968954

969-
src/fft_tuning.h: @FFT_TUNING_IN@ config.status
970-
./config.status $@
971-
972955
src/fmpz/fmpz.c: @FMPZ_C_IN@ config.status
973956
./config.status $@
974957

975958
src/gmpcompat.h: src/@GMPCOMPAT_H_IN@ config.status
976959
./config.status $@
977960

961+
src/flint-mparam.h: src/mpn_extras/@PARAM_PATH@/flint-mparam.h config.status
962+
./config.status $@
963+
978964
################################################################################
979965
# maintainer stuff
980966
################################################################################

acinclude.m4

+60-141
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,74 @@ define(X86_PATTERN,
8888
define(X86_64_PATTERN,
8989
[[athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | jaguar*-*-* | bulldozer*-*-* | piledriver*-*-* | steamroller*-*-* | excavator*-*-* | zen*-*-* | pentium4-*-* | atom-*-* | silvermont-*-* | goldmont-*-* | tremont-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-* | nehalem*-*-* | westmere*-*-* | sandybridge*-*-* | ivybridge*-*-* | haswell*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-* | raptorlake*-*-*]])
9090

91+
define(X86_64_ADX_PATTERN,
92+
[[zen*-*-* | x86_64-v3*-*-* | broadwell*-*-* | skylake*-*-* | kabylake*-*-* | icelake*-*-* | tigerlake*-*-* | rocketlake*-*-* | alderlake*-*-* | raptorlake*-*-*]])
93+
9194
define(ARM64_PATTERN,
92-
[[aarch64-*-*]])
95+
[[armcortexa53-*-* | armcortexa53neon-*-* | armcortexa55-*-* | armcortexa55neon-*-* | armcortexa57-*-* | armcortexa57neon-*-* | armcortexa7[2-9]-*-* | armcortexa7[2-9]neon-*-* | armexynosm1-*-* | armthunderx-*-* | armxgene1-*-* | aarch64*-*-* | applem[1-9]*-*-* | armv8*-*-*]])
9396

9497
define(SLOW_VROUNDPD_PATTERN,
9598
[[haswell* | broadwell* | skylake* | kabylake* | icelake* | tigerlake* | rocketlake* | alderlake* | raptorlake*]])
9699

97100
define(FAST_VROUNDPD_PATTERN,
98101
[[znver[2-4]* | sandybridge* | ivybridge*]])
99102

103+
104+
dnl FLINT_CHECK_CPU_SET_T([action-if-true],[action-if-false])
105+
dnl -----------------------
106+
dnl Checks if cpu_set_t is supported.
107+
dnl
108+
dnl FIXME: Does this cover all BSD systems?
109+
110+
AC_DEFUN([FLINT_CHECK_CPU_SET_T],
111+
[AC_CACHE_CHECK([if cpu_set_t is supported],
112+
flint_cv_check_cpu_set_t,
113+
[flint_cv_check_cpu_set_t="no"
114+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
115+
#define _GNU_SOURCE
116+
#include <sched.h>
117+
#include <pthread.h>
118+
],
119+
[cpu_set_t s;
120+
CPU_ZERO(&s);
121+
pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), 0);])],
122+
[flint_cv_check_cpu_set_t="yes"])
123+
])
124+
AS_VAR_IF([flint_cv_check_cpu_set_t],"yes",
125+
[m4_default([$1], :)],
126+
[m4_default([$2], :)])
127+
])
128+
129+
130+
dnl FLINT_CHECK_NTL([action-if-true],[action-if-false])
131+
dnl -----------------------
132+
dnl Checks if linking with NTL works.
133+
134+
AC_DEFUN([FLINT_CHECK_NTL],
135+
[AC_REQUIRE([AC_PROG_CXX])
136+
AC_CACHE_CHECK([if linking with NTL works],
137+
flint_cv_check_ntl,
138+
[flint_cv_check_ntl="no"
139+
save_LIBS="$LIBS"
140+
LIBS="-lntl $LIBS"
141+
AC_LANG_PUSH([C++])
142+
AC_LINK_IFELSE([AC_LANG_PROGRAM(
143+
[[#include <NTL/ZZ.h>
144+
]], [NTL::ZZ a, b, c;
145+
std::cin >> a;
146+
std::cin >> b;
147+
c = (a+1)*(b+1);
148+
std::cout << c << "\n";])],
149+
[flint_cv_check_ntl="yes"])
150+
AC_LANG_POP([C++])
151+
LIBS="$save_LIBS"
152+
])
153+
AS_VAR_IF([flint_cv_check_ntl],"yes",
154+
[m4_default([$1], :)],
155+
[m4_default([$2], :)])
156+
])
157+
158+
100159
dnl FLINT_PREPROC_IFELSE(input,[action-if-true],[action-if-false])
101160
dnl -----------------------
102161
dnl Runs preprocessor with CFLAGS.
@@ -114,48 +173,6 @@ ac_cpp="$ac_cpp"
114173
])
115174

116175

117-
dnl FLINT_ARCH
118-
dnl -----------------------
119-
dnl Checks compiler for architectures.
120-
dnl
121-
dnl NOTE: This has to be called after all CFLAGS has been gathered.
122-
dnl
123-
dnl FIXME: Currently only Clang and GCC. Support more compilers?
124-
125-
AC_DEFUN([FLINT_ARCH],
126-
[AC_CACHE_CHECK([for host architecture],
127-
flint_cv_arch,
128-
[flint_cv_arch="unknown"
129-
is_gnu="no"
130-
AC_PREPROC_IFELSE([AC_LANG_SOURCE([
131-
#ifndef __GNUC__
132-
# error
133-
error
134-
#endif
135-
])],
136-
[is_gnu="yes"])
137-
138-
dnl We only know how to proceed with GCC or Clang
139-
AS_VAR_IF([is_gnu],"yes",[
140-
is_clang="no"
141-
AC_PREPROC_IFELSE([AC_LANG_SOURCE([
142-
#ifndef __clang__
143-
# error
144-
error
145-
#endif
146-
])],
147-
[is_clang="yes"])
148-
149-
AS_VAR_IF([is_clang],"yes",[
150-
flint_cv_arch=[`echo | $CC -v $CFLAGS -E - 2>&1 | grep "cc1" | sed -n 's/.*-target-cpu \([^ ]*\).*/\1/p'`]
151-
],[
152-
flint_cv_arch=[`echo | $CC -v $CFLAGS -E - 2>&1 | grep "cc1" | sed -n 's/.*-march=\([^ ]*\).*/\1/p'`]
153-
])
154-
])
155-
])
156-
])
157-
158-
159176
dnl FLINT_CHECK_GMP_H(MAJOR, MINOR, PATCHLEVEL)
160177
dnl -----------------------
161178
dnl Checks that gmp.h can be found and that its version fullfills the version
@@ -263,104 +280,6 @@ fi
263280
])
264281
])
265282

266-
dnl FLINT_SYSTEM_V_ABI([action-success][,action-fail])
267-
dnl -----------------------
268-
dnl Checks if System V ABI.
269-
dnl Do "action-success" if this succeeds, "action-fail" if not.
270-
271-
AC_DEFUN([FLINT_SYSTEM_V_ABI],
272-
[AC_CACHE_CHECK([if system uses System V ABI],
273-
flint_cv_system_v_abi,
274-
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([],[
275-
#if !(defined(__APPLE__) || defined(__unix__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)) || defined(__CYGWIN__)
276-
#error Dead man
277-
error
278-
#endif
279-
])],
280-
[flint_cv_system_v_abi="yes"],
281-
[flint_cv_system_v_abi="no"]
282-
))
283-
AS_VAR_IF([flint_cv_system_v_abi],"yes",
284-
[m4_default([$1], :)],
285-
[m4_default([$2], :)])
286-
])
287-
288-
289-
dnl FLINT_CHECK_ADX([action-success][,action-fail])
290-
dnl -----------------------
291-
dnl Checks if CPU supports the ADX instruction set. Will only run if host CPU
292-
dnl is x86_64. Do "action-success" if this succeeds, "action-fail" if not.
293-
294-
AC_DEFUN([FLINT_CHECK_ADX],
295-
[AS_VAR_IF([host_cpu],"x86_64",
296-
[AC_CACHE_CHECK([if ADX instruction set is supported by CPU],
297-
flint_cv_check_adx,
298-
FLINT_PREPROC_IFELSE([AC_LANG_SOURCE([
299-
#if !defined(__ADX__)
300-
#error Dead man
301-
error
302-
#endif
303-
])],
304-
flint_cv_check_adx="yes",
305-
flint_cv_check_adx="no")
306-
)])
307-
AS_VAR_IF(flint_cv_check_adx,yes,
308-
[m4_default([$1], :)],
309-
[m4_default([$2], :)])
310-
])
311-
312-
313-
dnl FLINT_CHECK_ARMV8([action-success][,action-fail])
314-
dnl -----------------------
315-
dnl Checks if CPU supports the ARM v8-A instruction set. Will only run if host
316-
dnl CPU is aarch64. Do "action-success" if this succeeds, "action-fail" if not.
317-
318-
AC_DEFUN([FLINT_CHECK_ARMV8],
319-
[AS_VAR_IF([host_cpu],"aarch64",
320-
[AC_CACHE_CHECK([if ARM v8-A instruction set is supported by CPU],
321-
flint_cv_check_armv8a,
322-
FLINT_PREPROC_IFELSE([AC_LANG_SOURCE([
323-
#if __ARM_ARCH != 8
324-
#error Dead man
325-
error
326-
#endif
327-
])],
328-
flint_cv_check_armv8a="yes",
329-
flint_cv_check_armv8a="no")
330-
)])
331-
AS_VAR_IF(flint_cv_check_armv8a,yes,
332-
[m4_default([$1], :)],
333-
[m4_default([$2], :)])
334-
])
335-
336-
337-
dnl FLINT_HAVE_ASM([action-success][,action-fail])
338-
dnl -----------------------
339-
dnl Checks if system use FLINT's assembly.
340-
dnl Do "action-success" if this succeeds, "action-fail" if not.
341-
342-
AC_DEFUN([FLINT_HAVE_ASM],
343-
[AC_REQUIRE([FLINT_ABI])
344-
AC_REQUIRE([FLINT_SYSTEM_V_ABI])
345-
AC_REQUIRE([FLINT_CHECK_ADX])
346-
AC_REQUIRE([FLINT_CHECK_ARMV8])
347-
348-
AC_CACHE_CHECK([if system can use FLINT's assembly],
349-
flint_cv_have_asm,
350-
[flint_cv_have_asm="no"
351-
if test "$flint_cv_abi" = "64" && test "$flint_cv_system_v_abi" = "yes";
352-
then
353-
if test "$flint_cv_check_adx" = "yes" || test "$flint_cv_check_armv8a" = "yes";
354-
then
355-
flint_cv_have_asm="yes"
356-
fi
357-
fi])
358-
359-
AS_VAR_IF([flint_cv_have_asm],"yes",
360-
[m4_default([$1], :)],
361-
[m4_default([$2], :)])
362-
])
363-
364283

365284
dnl FLINT_HAVE_FFT_SMALL_ARM_H
366285
dnl -----------------------

0 commit comments

Comments
 (0)