forked from ggml-org/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtests.sh
executable file
·74 lines (59 loc) · 1.82 KB
/
tests.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
set -eu
if [ $# -lt 1 ]
then
echo "usage: $0 path_to_build_binary"
echo "example: $0 ../../build/bin"
exit 1
fi
set -x
SPLIT=$1/gguf-split
MAIN=$1/main
WORK_PATH=/tmp
CUR_DIR=$(pwd)
# 1. Get a model
(
cd $WORK_PATH
"$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories15M.gguf
)
echo PASS
# 2. Split with max tensors strategy
$SPLIT --split-max-tensors 28 $WORK_PATH/stories15M.gguf $WORK_PATH/ggml-model-split
echo PASS
echo
# 2b. Test the sharded model is loading properly
$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00003.gguf --random-prompt --n-predict 32
echo PASS
echo
# 3. Merge
$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00003.gguf $WORK_PATH/ggml-model-merge.gguf
echo PASS
echo
# 3b. Test the merged model is loading properly
$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
echo PASS
echo
# 4. Split with no tensor in metadata
$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
echo PASS
echo
# 4b. Test the sharded model is loading properly
$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00003.gguf --random-prompt --n-predict 32
echo PASS
echo
# 5. Merge
$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00003.gguf $WORK_PATH/ggml-model-merge-2.gguf
echo PASS
echo
# 5b. Test the merged model is loading properly
$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32
echo PASS
echo
# 6. Split with size strategy and no tensor in metadata
$SPLIT --split-max-size 40M $WORK_PATH/ggml-model-merge-2.gguf $WORK_PATH/ggml-model-split-40M
echo PASS
echo
# 6b. Test the sharded model is loading properly
$MAIN --model $WORK_PATH/ggml-model-split-40M-00001-of-00003.gguf --random-prompt --n-predict 32
echo PASS
echo