Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement function multi versioning in sysimg #21849

Merged
merged 9 commits into from
Oct 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ before_install:
export JULIA_CPU_CORES=2;
export JULIA_TEST_MAXRSS_MB=600;
TESTSTORUN="all --skip linalg/triangular subarray"; fi # TODO: re enable these if possible without timing out
- echo "override JULIA_CPU_TARGET=generic;native" >> Make.user
- git clone -q git://git.kitenet.net/moreutils
script:
- echo BUILDOPTS=$BUILDOPTS
Expand Down
5 changes: 0 additions & 5 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,6 @@ HAVE_SSP := 0
WITH_GC_VERIFY := 0
WITH_GC_DEBUG_ENV := 0

# When set, give julia binaries CPUID specific names. This is useful in cluster environments
# with heterogeneous architectures. N.B.: will not be automatically rebuilt for all
# architectures if julia is updated.
CPUID_SPECIFIC_BINARIES ?= 0

# Prevent picking up $ARCH from the environment variables
ARCH:=

Expand Down
15 changes: 2 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,11 @@ julia-ui-release julia-ui-debug : julia-ui-% : julia-src-%
julia-inference : julia-base julia-ui-$(JULIA_BUILD_MODE) $(build_prefix)/.examples
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/inference.ji JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)

ifneq ($(CPUID_SPECIFIC_BINARIES), 0)
CPUID_TAG = _$(call exec,$(JULIA_EXECUTABLE) --cpuid)
else
CPUID_TAG =
endif

julia-sysimg-release : julia-inference julia-ui-release
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys$(CPUID_TAG).$(SHLIB_EXT) JULIA_BUILD_MODE=release
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys.$(SHLIB_EXT) JULIA_BUILD_MODE=release

julia-sysimg-debug : julia-inference julia-ui-debug
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys-debug$(CPUID_TAG).$(SHLIB_EXT) JULIA_BUILD_MODE=debug
@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) $(build_private_libdir)/sys-debug.$(SHLIB_EXT) JULIA_BUILD_MODE=debug

julia-debug julia-release : julia-% : julia-ui-% julia-sysimg-% julia-symlink julia-libccalltest

Expand Down Expand Up @@ -229,13 +223,8 @@ $$(build_private_libdir)/sys$1.o: $$(build_private_libdir)/inference.ji $$(JULIA
fi )
.SECONDARY: $(build_private_libdir)/sys$1.o
endef
ifneq ($(CPUID_SPECIFIC_BINARIES),0)
$(eval $(call sysimg_builder,_%,-O3,$(JULIA_EXECUTABLE_release)))
$(eval $(call sysimg_builder,-debug_%,-O0,$(JULIA_EXECUTABLE_debug)))
else
$(eval $(call sysimg_builder,,-O3,$(JULIA_EXECUTABLE_release)))
$(eval $(call sysimg_builder,-debug,-O0,$(JULIA_EXECUTABLE_debug)))
endif

$(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
Expand Down
2 changes: 0 additions & 2 deletions base/pkg/pkg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ init(meta::AbstractString=DEFAULT_META, branch::AbstractString=META_BRANCH) = Di

function __init__()
vers = "v$(VERSION.major).$(VERSION.minor)"
vers = ccall(:jl_uses_cpuid_tag, Cint, ()) == 0 ? vers :
joinpath(vers,hex(ccall(:jl_cpuid_tag, UInt64, ()), 2*sizeof(UInt64)))
unshift!(Base.LOAD_CACHE_PATH, abspath(Dir._pkgroot(), "lib", vers))
end

Expand Down
1 change: 1 addition & 0 deletions contrib/windows/appveyor_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ else
echo 'LIBBLAS = -L$(JULIAHOME)/usr/bin -lopenblas' >> Make.user
echo 'LIBBLASNAME = libopenblas' >> Make.user
fi
echo "override JULIA_CPU_TARGET=generic;native" >> Make.user

# Set XC_HOST if in Cygwin or Linux
case $(uname) in
Expand Down
65 changes: 65 additions & 0 deletions doc/src/devdocs/sysimg.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,68 @@ and `force` set to `true`, one would execute:
```
julia build_sysimg.jl /tmp/sys core2 ~/userimg.jl --force
```

## System image optimized for multiple microarchitectures

The system image can be compiled simultaneously for multiple CPU microarchitectures
under the same instruction set architecture (ISA). Multiple versions of the same function
may be created with minimum dispatch point inserted into shared functions
in order to take advantage of different ISA extensions or other microarchitecture features.
The version that offers the best performance will be selected automatically at runtime
based on available features.

### Specifying multiple system image targets

Multi-microarch system image can be enabled by passing multiple targets
during system image compilation. This can be done either with the `JULIA_CPU_TARGET` make option
or with the `-C` command line option when running the compilation command manually.
Multiple targets are separated by `;` in the option.
The syntax for each target is a CPU name followed by multiple features separated by `,`.
All features supported by LLVM is supported and a feature can be disabled with a `-` prefix.
(`+` prefix is also allowed and ignored to be consistent with LLVM syntax).
Additionally, two special features are supported to control the function cloning behavior.

1. `clone_all`

By default, only functions that are the most likely to benefit from
the microarchitecture features will be cloned.
When `clone_all` is specified for a target, however,
**all** functions in the system image will be cloned for the target.
The negative form `-clone_all` can be used to prevent the built-in
heuristic from cloning all functions.

2. `base(<n>)`

Where `<n>` is a placeholder for a non-negative number (e.g. `base(0)`, `base(1)`).
By default, a partially cloned (i.e. not `clone_all`) target will use functions
from the default target (first one specified) if a function is not cloned.
This behavior can be changed by specifying a different base with the `base(<n>)` option.
The `n`th target (0-based) will be used as the base target instead of the default (`0`th) one.
The base target has to be either `0` or another `clone_all` target.
Specifying a non default `clone_all` target as the base target will cause an error.

### Implementation overview

This is a brief overview of different part involved in the implementation.
See code comments for each components for more implementation details.

1. System image compilation

The parsing and cloning decision are done in `src/processor*`.
We currently support cloning of function based on the present of loops, simd instructions,
or other math operations (e.g. fastmath, fma, muladd).
This information is passed on to `src/llvm-multiversioning.cpp` which does the actual cloning.
In addition to doing the cloning and insert dispatch slots
(see comments in `MultiVersioning::runOnModule` for how this is done),
the pass also generates metadata so that the runtime can load and initialize the
system image correctly.
A detail description of the metadata is available in `src/processor.h`.

2. System image loading

The loading and initialization of the system image is done in `src/processor*` by
parsing the metadata saved during system image generation.
Host feature detection and selection decision are done in `src/processor_*.cpp`
depending on the ISA. The target selection will prefer exact CPU name match,
larger vector register size, and larget number of features.
An overview of this process is in `src/processor.cpp`.
17 changes: 8 additions & 9 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SRCS := \
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
threadgroup threading stackwalk gc gc-debug gc-pages method \
jlapi signal-handling safepoint jloptions timing subtype rtutils \
crc32c
crc32c processor

ifeq ($(USEMSVC), 1)
SRCS += getopt
Expand All @@ -52,7 +52,7 @@ LLVMLINK :=
ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
llvm-propagate-addrspaces llvm-multiversioning llvm-alloc-opt cgmemmgr
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down Expand Up @@ -109,10 +109,6 @@ SHIPFLAGS += $(FLAGS)
SHIPFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\""

ifneq ($(CPUID_SPECIFIC_BINARIES), 0)
override CPPFLAGS += "-DCPUID_SPECIFIC_BINARIES=1"
endif

FLISP_EXECUTABLE_debug := $(BUILDDIR)/flisp/flisp-debug
FLISP_EXECUTABLE_release := $(BUILDDIR)/flisp/flisp
ifeq ($(OS),WINNT)
Expand Down Expand Up @@ -186,12 +182,15 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
# additional dependency links
$(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
$(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
intrinsics.cpp jitlayers.h intrinsics.h debuginfo.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp)
intrinsics.cpp jitlayers.h intrinsics.h debuginfo.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h)
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/anticodegen.o $(BUILDDIR)/anticodegen.dbg.obj: $(SRCDIR)/intrinsics.h
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(SRCDIR)/debuginfo.h
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: \
$(addprefix $(SRCDIR)/,debuginfo.h processor.h)
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/table.c
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
Expand Down
4 changes: 2 additions & 2 deletions src/anticodegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int no
return 0;
}

void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *offsets,
void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
jl_method_instance_t **linfos, size_t n)
{
(void)sysimage_base; (void)base; (void)offsets; (void)linfos; (void)n;
(void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
}

void jl_compile_linfo(jl_method_instance_t *li) { }
Expand Down
Loading