Skip to content

Commit d194586

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 2ab9d11 + 7f15c5c commit d194586

20 files changed

+1092
-25
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,5 @@ zig-out/
4141
zig-cache/
4242

4343
ppl-*.txt
44+
45+
examples/jeopardy/results.txt

CMakeLists.txt

+18-2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ endif()
6767
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
6868
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
6969
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
70+
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
7071
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
7172

7273
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
@@ -169,6 +170,21 @@ if (LLAMA_CUBLAS)
169170
endif()
170171
endif()
171172

173+
if (LLAMA_CLBLAST)
174+
find_package(CLBlast)
175+
if (CLBlast_FOUND)
176+
message(STATUS "CLBlast found")
177+
178+
set(GGML_OPENCL_SOURCES ggml-opencl.c ggml-opencl.h)
179+
180+
add_compile_definitions(GGML_USE_CLBLAST)
181+
182+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
183+
else()
184+
message(WARNING "CLBlast not found")
185+
endif()
186+
endif()
187+
172188
if (LLAMA_HIPBLAS)
173189
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
174190

@@ -196,7 +212,6 @@ if (LLAMA_HIPBLAS)
196212
else()
197213
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
198214
endif()
199-
200215
endif()
201216

202217
if (LLAMA_ALL_WARNINGS)
@@ -338,7 +353,8 @@ endif()
338353
add_library(ggml OBJECT
339354
ggml.c
340355
ggml.h
341-
${GGML_CUDA_SOURCES})
356+
${GGML_CUDA_SOURCES}
357+
${GGML_OPENCL_SOURCES})
342358

343359
target_include_directories(ggml PUBLIC .)
344360
target_compile_features(ggml PUBLIC c_std_11) # don't bump

Makefile

+9-2
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,21 @@ ifdef LLAMA_OPENBLAS
105105
LDFLAGS += -lopenblas
106106
endif
107107
ifdef LLAMA_CUBLAS
108-
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
109-
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
108+
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
109+
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
110110
OBJS += ggml-cuda.o
111111
NVCC = nvcc
112112
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
113113
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
114114
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
115115
endif
116+
ifdef LLAMA_CLBLAST
117+
CFLAGS += -DGGML_USE_CLBLAST
118+
LDFLAGS += -lclblast -lOpenCL
119+
OBJS += ggml-opencl.o
120+
ggml-opencl.o: ggml-opencl.c ggml-opencl.h
121+
$(CC) $(CFLAGS) -c $< -o $@
122+
endif
116123
ifdef LLAMA_HIPBLAS
117124
ROCM_PATH ?= /opt/rocm
118125
CC := $(ROCM_PATH)/llvm/bin/clang

README.md

+2-4
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
99

1010
**Hot topics:**
1111

12+
- [Roadmap May 2023](https://github.com/ggerganov/llama.cpp/discussions/1220)
1213
- [New quantization methods](https://github.com/ggerganov/llama.cpp#quantization)
13-
- [Added LoRA support](https://github.com/ggerganov/llama.cpp/pull/820)
14-
- [Add GPU support to ggml](https://github.com/ggerganov/llama.cpp/discussions/915)
15-
- [Roadmap Apr 2023](https://github.com/ggerganov/llama.cpp/discussions/784)
1614

1715
## Description
1816

@@ -174,7 +172,7 @@ In order to build llama.cpp you have three different options.
174172

175173
- On Windows:
176174

177-
1. Download the latest fortran version of [w64devkit](https://github.com/seeto/w64devkit/releases).
175+
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
178176
2. Extract `w64devkit` on your pc.
179177
3. Run `w64devkit.exe`.
180178
4. Use the `cd` command to reach the `llama.cpp` folder.

examples/chat-13B.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ The transcript only includes text, it does not include markup like HTML and Mark
3131
3232
$USER_NAME: Hello, $AI_NAME!
3333
$AI_NAME: Hello $USER_NAME! How may I help you today?
34-
$USER_NAME: What time is it?
35-
$AI_NAME: It is $(date +%H:%M).
3634
$USER_NAME: What year is it?
3735
$AI_NAME: We are in $(date +%Y).
3836
$USER_NAME: Please tell me the largest city in Europe.
@@ -50,4 +48,6 @@ $AI_NAME: The arguments are stored in process.argv.
5048
argv[3] is the second argument passed to the script and so on.
5149
$USER_NAME: Name a color.
5250
$AI_NAME: Blue
51+
$USER_NAME: What time is it?
52+
$AI_NAME: It is $(date +%H:%M).
5353
$USER_NAME:" "$@"

examples/common.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
6161
break;
6262
}
6363
params.prompt = argv[i];
64+
} else if (arg == "--session") {
65+
if (++i >= argc) {
66+
invalid_param = true;
67+
break;
68+
}
69+
params.path_session = argv[i];
6470
} else if (arg == "-f" || arg == "--file") {
6571
if (++i >= argc) {
6672
invalid_param = true;
@@ -228,6 +234,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
228234
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
229235
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
230236
fprintf(stderr, " prompt to start generation with (default: empty)\n");
237+
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
231238
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
232239
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
233240
fprintf(stderr, " -f FNAME, --file FNAME\n");

examples/common.h

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ struct gpt_params {
3131

3232
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
3333
std::string prompt = "";
34+
std::string path_session = ""; // path to file for saving/loading model eval state
3435
std::string input_prefix = ""; // string to prefix user inputs with
3536
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
3637

examples/jeopardy/README.md

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# llama.cpp/example/jeopardy
2+
3+
This is pretty much just a straight port of aigoopy/llm-jeopardy/ with an added graph viewer.
4+
5+
The jeopardy test can be used to compare the fact knowledge of different models and compare them to eachother. This is in contrast to some other tests, which test logical deduction, creativity, writing skills, etc.
6+
7+
8+
Step 1: Open jeopardy.sh and modify the following:
9+
```
10+
MODEL=(path to your model)
11+
MODEL_NAME=(name of your model)
12+
prefix=(basically, if you use vicuna it's Human: , if you use something else it might be User: , etc)
13+
opts=(add -instruct here if needed for your model, or anything else you want to test out)
14+
```
15+
Step 2: Run `jeopardy.sh` from the llama.cpp folder
16+
17+
Step 3: Repeat steps 1 and 2 until you have all the results you need.
18+
19+
Step 4: Run `graph.py`, and follow the instructions. At the end, it will generate your final graph.
20+
21+
Note: The Human bar is based off of the full, original 100 sample questions. If you modify the question count or questions, it will not be valid.

examples/jeopardy/graph.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import matplotlib.pyplot as plt
2+
import sys, os
3+
import csv
4+
5+
labels = []
6+
numbers = []
7+
numEntries = 1
8+
9+
rows = []
10+
11+
def bar_chart(numbers, labels, pos):
12+
plt.bar(pos, numbers, color='blue')
13+
plt.xticks(ticks=pos, labels=labels)
14+
plt.title("Jeopardy Results by Model")
15+
plt.xlabel("Model")
16+
plt.ylabel("Questions Correct")
17+
plt.show()
18+
19+
def calculatecorrect():
20+
directory = os.fsencode("./examples/jeopardy/results/")
21+
csv_reader = csv.reader(open("./examples/jeopardy/qasheet.csv", 'rt'), delimiter=',')
22+
for row in csv_reader:
23+
global rows
24+
rows.append(row)
25+
for listing in os.listdir(directory):
26+
filename = os.fsdecode(listing)
27+
if filename.endswith(".txt"):
28+
file = open("./examples/jeopardy/results/" + filename, "rt")
29+
global labels
30+
global numEntries
31+
global numbers
32+
labels.append(filename[:-4])
33+
numEntries += 1
34+
i = 1
35+
totalcorrect = 0
36+
for line in file.readlines():
37+
if line.strip() != "------":
38+
print(line)
39+
else:
40+
print("Correct answer: " + rows[i][2] + "\n")
41+
i+=1
42+
print("Did the AI get the question right? (y/n)")
43+
if input() == "y":
44+
totalcorrect += 1
45+
numbers.append(totalcorrect)
46+
47+
48+
49+
if __name__ == '__main__':
50+
calculatecorrect()
51+
pos = list(range(numEntries))
52+
labels.append("Human")
53+
numbers.append(48.11)
54+
bar_chart(numbers, labels, pos)
55+
print(labels)
56+
print(numbers)

examples/jeopardy/jeopardy.sh

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
set -e
3+
4+
MODEL=./models/ggml-vicuna-13b-1.1-q4_0.bin
5+
MODEL_NAME=Vicuna
6+
7+
# exec options
8+
prefix="Human: " # Ex. Vicuna uses "Human: "
9+
opts="--temp 0 -n 80" # additional flags
10+
nl='
11+
'
12+
introduction="You will be playing a game of Jeopardy. Simply answer the question in the correct format (Ex. What is Paris, or Who is George Washington)."
13+
14+
# file options
15+
question_file=./examples/jeopardy/questions.txt
16+
touch ./examples/jeopardy/results/$MODEL_NAME.txt
17+
output_file=./examples/jeopardy/results/$MODEL_NAME.txt
18+
19+
counter=1
20+
21+
echo 'Running'
22+
while IFS= read -r question
23+
do
24+
exe_cmd="./main -p "\"$prefix$introduction$nl$prefix$question\"" "$opts" -m ""\"$MODEL\""" >> ""\"$output_file\""
25+
echo $counter
26+
echo "Current Question: $question"
27+
eval "$exe_cmd"
28+
echo -e "\n------" >> $output_file
29+
counter=$((counter+1))
30+
done < "$question_file"

0 commit comments

Comments
 (0)