Skip to content

Commit a913ad9

Browse files
committed
Merge remote-tracking branch 'upstream/concedo'
2 parents 46fa845 + 93c4b2a commit a913ad9

File tree

8 files changed

+176
-97
lines changed

8 files changed

+176
-97
lines changed

Diff for: Makefile

+8-3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ endif
7575
ifeq ($(UNAME_S),Darwin)
7676
CFLAGS += -pthread
7777
CXXFLAGS += -pthread
78+
CLANG_VER = $(shell clang -v 2>&1 | head -n 1 | awk 'BEGIN {FS="[. ]"};{print $$1 $$2 $$4}')
79+
ifeq ($(CLANG_VER),Appleclang15)
80+
LDFLAGS += -ld_classic
81+
endif
7882
endif
7983
ifeq ($(UNAME_S),FreeBSD)
8084
CFLAGS += -pthread
@@ -116,7 +120,11 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
116120
FULLCFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx
117121
else
118122
# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
123+
ifdef LLAMA_COLAB
124+
CFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx
125+
else
119126
CFLAGS += -march=native -mtune=native
127+
endif
120128
endif
121129
endif
122130
ifneq ($(filter ppc64%,$(UNAME_M)),)
@@ -205,9 +213,6 @@ ifdef LLAMA_HIPBLAS
205213
LLAMA_CUDA_MMV_Y ?= 1
206214
LLAMA_CUDA_KQUANTS_ITER ?= 2
207215
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
208-
# ifdef LLAMA_CUDA_FORCE_DMMV
209-
# HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
210-
# endif # LLAMA_CUDA_FORCE_DMMV
211216
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
212217
HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
213218
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \

Diff for: cmake/FindSIMD.cmake

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
include(CheckCSourceRuns)
2+
3+
set(AVX_CODE "
4+
#include <immintrin.h>
5+
int main()
6+
{
7+
__m256 a;
8+
a = _mm256_set1_ps(0);
9+
return 0;
10+
}
11+
")
12+
13+
set(AVX512_CODE "
14+
#include <immintrin.h>
15+
int main()
16+
{
17+
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
18+
0, 0, 0, 0, 0, 0, 0, 0,
19+
0, 0, 0, 0, 0, 0, 0, 0,
20+
0, 0, 0, 0, 0, 0, 0, 0,
21+
0, 0, 0, 0, 0, 0, 0, 0,
22+
0, 0, 0, 0, 0, 0, 0, 0,
23+
0, 0, 0, 0, 0, 0, 0, 0,
24+
0, 0, 0, 0, 0, 0, 0, 0);
25+
__m512i b = a;
26+
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
27+
return 0;
28+
}
29+
")
30+
31+
set(AVX2_CODE "
32+
#include <immintrin.h>
33+
int main()
34+
{
35+
__m256i a = {0};
36+
a = _mm256_abs_epi16(a);
37+
__m256i x;
38+
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
39+
return 0;
40+
}
41+
")
42+
43+
set(FMA_CODE "
44+
#include <immintrin.h>
45+
int main()
46+
{
47+
__m256 acc = _mm256_setzero_ps();
48+
const __m256 d = _mm256_setzero_ps();
49+
const __m256 p = _mm256_setzero_ps();
50+
acc = _mm256_fmadd_ps( d, p, acc );
51+
return 0;
52+
}
53+
")
54+
55+
macro(check_sse type flags)
56+
set(__FLAG_I 1)
57+
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
58+
foreach (__FLAG ${flags})
59+
if (NOT ${type}_FOUND)
60+
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
61+
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
62+
if (HAS_${type}_${__FLAG_I})
63+
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
64+
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
65+
endif()
66+
math(EXPR __FLAG_I "${__FLAG_I}+1")
67+
endif()
68+
endforeach()
69+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
70+
71+
if (NOT ${type}_FOUND)
72+
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
73+
set(${type}_FLAGS "" CACHE STRING "${type} flags")
74+
endif()
75+
76+
mark_as_advanced(${type}_FOUND ${type}_FLAGS)
77+
endmacro()
78+
79+
# flags are for MSVC only!
80+
check_sse("AVX" " ;/arch:AVX")
81+
if (NOT ${AVX_FOUND})
82+
set(LLAMA_AVX OFF)
83+
else()
84+
set(LLAMA_AVX ON)
85+
endif()
86+
87+
check_sse("AVX2" " ;/arch:AVX2")
88+
check_sse("FMA" " ;/arch:AVX2")
89+
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
90+
set(LLAMA_AVX2 OFF)
91+
else()
92+
set(LLAMA_AVX2 ON)
93+
endif()
94+
95+
check_sse("AVX512" " ;/arch:AVX512")
96+
if (NOT ${AVX512_FOUND})
97+
set(LLAMA_AVX512 OFF)
98+
else()
99+
set(LLAMA_AVX512 ON)
100+
endif()

Diff for: colab.ipynb

+5-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf\"]{allow-input: true}\r\n",
5252
"Layers = 43 #@param [43]{allow-input: true}\r\n",
5353
"ContextSize = 4096 #@param [4096] {allow-input: true}\r\n",
54+
"ForceRebuild = False #@param {type:\"boolean\"}\r\n",
5455
"\r\n",
5556
"import os\r\n",
5657
"if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\r\n",
@@ -61,13 +62,15 @@
6162
"%cd /content/koboldcpp\r\n",
6263
"kvers = !(cat koboldcpp.py | grep 'KcppVersion = ' | cut -d '\"' -f2)\r\n",
6364
"kvers = kvers[0]\r\n",
65+
"if ForceRebuild:\r\n",
66+
" kvers = \"force_rebuild\"\r\n",
6467
"!echo Finding prebuilt binary for {kvers}\r\n",
6568
"!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
6669
"!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
67-
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1\r\n",
70+
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1\r\n",
6871
"!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
6972
"!apt install aria2 -y\r\n",
70-
"!aria2c -x 10 -o model.ggml --allow-overwrite=true --file-allocation=none $Model\r\n",
73+
"!aria2c -x 10 -o model.ggml --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
7174
"!python koboldcpp.py model.ggml --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --hordeconfig concedo 1 1 --remotetunnel\r\n"
7275
]
7376
}

0 commit comments

Comments
 (0)