Skip to content

Commit 48275d0

Browse files
authored
Dlopen backend 5 (#779)
Major change to the backend that allows for pluggable versions of llama.cpp/ggml. This was squashed merged from dlopen_backend_5 where the history is preserved.
1 parent f4a1f73 commit 48275d0

22 files changed

+993
-327
lines changed

.gitmodules

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1-
[submodule "llama.cpp"]
2-
path = gpt4all-backend/llama.cpp
1+
[submodule "llama.cpp-230519"]
2+
path = gpt4all-backend/llama.cpp-230519
3+
url = https://github.com/ggerganov/llama.cpp.git
4+
[submodule "llama.cpp-230511"]
5+
path = gpt4all-backend/llama.cpp-230511
36
url = https://github.com/manyoso/llama.cpp.git
7+
[submodule "llama.cpp-mainline"]
8+
path = gpt4all-backend/llama.cpp-mainline
9+
url = https://github.com/ggerganov/llama.cpp.git

gpt4all-backend/CMakeLists.txt

+80-19
Original file line numberDiff line numberDiff line change
@@ -17,36 +17,97 @@ endif()
1717
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
1818

1919
set(LLMODEL_VERSION_MAJOR 0)
20-
set(LLMODEL_VERSION_MINOR 1)
21-
set(LLMODEL_VERSION_PATCH 1)
20+
set(LLMODEL_VERSION_MINOR 2)
21+
set(LLMODEL_VERSION_PATCH 0)
2222
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
2323
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
2424

25+
set(CMAKE_CXX_STANDARD 20)
2526
set(CMAKE_CXX_STANDARD_REQUIRED ON)
27+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
28+
set(BUILD_SHARED_LIBS ON)
2629

27-
set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
28-
set(BUILD_SHARED_LIBS ON FORCE)
30+
# Check for IPO support
31+
include(CheckIPOSupported)
32+
check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_ERROR)
33+
if (NOT IPO_SUPPORTED)
34+
message(WARNING "Interprocedural optimization is not supported by your toolchain! This will lead to bigger file sizes and worse performance: ${IPO_ERROR}")
35+
else()
36+
message(STATUS "Interprocedural optimization support detected")
37+
endif()
38+
39+
include(llama.cpp.cmake)
40+
41+
set(BUILD_VARIANTS default avxonly)
2942

3043
set(CMAKE_VERBOSE_MAKEFILE ON)
31-
if (GPT4ALL_AVX_ONLY)
32-
set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
33-
set(LLAMA_F16C OFF CACHE BOOL "llama: enable F16C" FORCE)
34-
set(LLAMA_FMA OFF CACHE BOOL "llama: enable FMA" FORCE)
35-
endif()
3644

37-
add_subdirectory(llama.cpp)
45+
# Go through each build variant
46+
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
47+
# Determine flags
48+
if (BUILD_VARIANT STREQUAL avxonly)
49+
set(GPT4ALL_ALLOW_NON_AVX NO)
50+
else()
51+
set(GPT4ALL_ALLOW_NON_AVX YES)
52+
endif()
53+
set(LLAMA_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
54+
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
55+
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})
56+
57+
# Include GGML
58+
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
59+
include_ggml(llama.cpp-230511 -230511-${BUILD_VARIANT} ON)
60+
include_ggml(llama.cpp-230519 -230519-${BUILD_VARIANT} ON)
61+
62+
# Function for preparing individual implementations
63+
function(prepare_target TARGET_NAME BASE_LIB)
64+
set(TARGET_NAME ${TARGET_NAME}-${BUILD_VARIANT})
65+
message(STATUS "Configuring model implementation target ${TARGET_NAME}")
66+
# Link to ggml/llama
67+
target_link_libraries(${TARGET_NAME}
68+
PUBLIC ${BASE_LIB}-${BUILD_VARIANT})
69+
# Let it know about its build variant
70+
target_compile_definitions(${TARGET_NAME}
71+
PRIVATE GGML_BUILD_VARIANT="${BUILD_VARIANT}")
72+
# Enable IPO if possible
73+
set_property(TARGET ${TARGET_NAME}
74+
PROPERTY INTERPROCEDURAL_OPTIMIZATION ${IPO_SUPPORTED})
75+
endfunction()
76+
77+
# Add each individual implementations
78+
add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED
79+
llamamodel.cpp)
80+
target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
81+
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
82+
prepare_target(llamamodel-mainline llama-mainline)
83+
84+
add_library(llamamodel-230519-${BUILD_VARIANT} SHARED
85+
llamamodel.cpp)
86+
target_compile_definitions(llamamodel-230519-${BUILD_VARIANT} PRIVATE
87+
LLAMA_VERSIONS===2 LLAMA_DATE=230519)
88+
prepare_target(llamamodel-230519 llama-230519)
89+
90+
add_library(llamamodel-230511-${BUILD_VARIANT} SHARED
91+
llamamodel.cpp)
92+
target_compile_definitions(llamamodel-230511-${BUILD_VARIANT} PRIVATE
93+
LLAMA_VERSIONS=<=1 LLAMA_DATE=230511)
94+
prepare_target(llamamodel-230511 llama-230511)
95+
96+
add_library(gptj-${BUILD_VARIANT} SHARED
97+
gptj.cpp utils.h utils.cpp)
98+
prepare_target(gptj ggml-230511)
99+
100+
add_library(mpt-${BUILD_VARIANT} SHARED
101+
mpt.cpp utils.h utils.cpp)
102+
prepare_target(mpt ggml-230511)
103+
endforeach()
38104

39105
add_library(llmodel
40-
gptj.h gptj.cpp
41-
llamamodel.h llamamodel.cpp
42-
llama.cpp/examples/common.cpp
43-
llmodel.h llmodel_c.h llmodel_c.cpp
44-
mpt.h mpt.cpp
45-
utils.h utils.cpp
106+
llmodel.h llmodel.cpp
107+
llmodel_c.h llmodel_c.cpp
108+
dlhandle.h
46109
)
47-
48-
target_link_libraries(llmodel
49-
PRIVATE llama)
110+
target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
50111

51112
set_target_properties(llmodel PROPERTIES
52113
VERSION ${PROJECT_VERSION}

gpt4all-backend/dlhandle.h

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#ifndef DLHANDLE_H
2+
#define DLHANDLE_H
3+
#ifndef _WIN32
4+
#include <string>
5+
#include <stdexcept>
6+
#include <utility>
7+
#include <dlfcn.h>
8+
9+
10+
11+
class Dlhandle {
12+
void *chandle;
13+
14+
public:
15+
class Exception : public std::runtime_error {
16+
public:
17+
using std::runtime_error::runtime_error;
18+
};
19+
20+
Dlhandle() : chandle(nullptr) {}
21+
Dlhandle(const std::string& fpath, int flags = RTLD_LAZY) {
22+
chandle = dlopen(fpath.c_str(), flags);
23+
if (!chandle) {
24+
throw Exception("dlopen(\""+fpath+"\"): "+dlerror());
25+
}
26+
}
27+
Dlhandle(const Dlhandle& o) = delete;
28+
Dlhandle(Dlhandle&& o) : chandle(o.chandle) {
29+
o.chandle = nullptr;
30+
}
31+
~Dlhandle() {
32+
if (chandle) dlclose(chandle);
33+
}
34+
35+
auto operator =(Dlhandle&& o) {
36+
chandle = std::exchange(o.chandle, nullptr);
37+
}
38+
39+
bool is_valid() const {
40+
return chandle != nullptr;
41+
}
42+
operator bool() const {
43+
return is_valid();
44+
}
45+
46+
template<typename T>
47+
T* get(const std::string& fname) {
48+
auto fres = reinterpret_cast<T*>(dlsym(chandle, fname.c_str()));
49+
return (dlerror()==NULL)?fres:nullptr;
50+
}
51+
auto get_fnc(const std::string& fname) {
52+
return get<void*(...)>(fname);
53+
}
54+
};
55+
#else
56+
#include <string>
57+
#include <exception>
58+
#include <stdexcept>
59+
#include <windows.h>
60+
#include <libloaderapi.h>
61+
62+
63+
64+
class Dlhandle {
65+
HMODULE chandle;
66+
67+
public:
68+
class Exception : public std::runtime_error {
69+
public:
70+
using std::runtime_error::runtime_error;
71+
};
72+
73+
Dlhandle() : chandle(nullptr) {}
74+
Dlhandle(const std::string& fpath) {
75+
chandle = LoadLibraryA(fpath.c_str());
76+
if (!chandle) {
77+
throw Exception("dlopen(\""+fpath+"\"): Error");
78+
}
79+
}
80+
Dlhandle(const Dlhandle& o) = delete;
81+
Dlhandle(Dlhandle&& o) : chandle(o.chandle) {
82+
o.chandle = nullptr;
83+
}
84+
~Dlhandle() {
85+
if (chandle) FreeLibrary(chandle);
86+
}
87+
88+
bool is_valid() const {
89+
return chandle != nullptr;
90+
}
91+
92+
template<typename T>
93+
T* get(const std::string& fname) {
94+
return reinterpret_cast<T*>(GetProcAddress(chandle, fname.c_str()));
95+
}
96+
auto get_fnc(const std::string& fname) {
97+
return get<void*(...)>(fname);
98+
}
99+
};
100+
#endif
101+
#endif // DLHANDLE_H

0 commit comments

Comments
 (0)