Skip to content

Commit 2956630

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 0fe6384 + f048af0 commit 2956630

File tree

12 files changed

+403
-189
lines changed

12 files changed

+403
-189
lines changed

.clang-tidy

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
Checks: >
3+
bugprone-*,
4+
-bugprone-easily-swappable-parameters,
5+
-bugprone-implicit-widening-of-multiplication-result,
6+
-bugprone-narrowing-conversions,
7+
readability-*,
8+
-readability-avoid-unconditional-preprocessor-if,
9+
-readability-function-cognitive-complexity,
10+
-readability-identifier-length,
11+
-readability-implicit-bool-conversion,
12+
-readability-magic-numbers,
13+
-readability-uppercase-literal-suffix,
14+
clang-analyzer-*,
15+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
16+
performance-*,
17+
portability-*,
18+
FormatStyle: none

.github/workflows/tidy-post.yml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: clang-tidy review post comments
2+
3+
on:
4+
workflow_run:
5+
workflows: ["clang-tidy-review"]
6+
types:
7+
- completed
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- uses: ZedThree/clang-tidy-review/[email protected]
15+
# lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup
16+
with:
17+
# adjust options as necessary
18+
lgtm_comment_body: ''
19+
annotations: false
20+
max_comments: 25

.github/workflows/tidy-review.yml

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: clang-tidy-review
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- master
7+
8+
jobs:
9+
clang-tidy-review:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- uses: actions/checkout@v3
14+
15+
- uses: ZedThree/[email protected]
16+
id: review
17+
with:
18+
lgtm_comment_body: ''
19+
build_dir: build
20+
cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on
21+
split_workflow: true
22+
23+
- uses: ZedThree/clang-tidy-review/[email protected]

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ build-debug/
1616
build-release/
1717
build-static/
1818
build-cublas/
19+
build-opencl/
1920
build-no-accel/
2021
build-sanitize-addr/
2122
build-sanitize-thread/

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
99

1010
**Hot topics:**
1111

12-
- Qauntization formats `Q4` and `Q5` have changed - requantize any old models [(info)](https://github.com/ggerganov/llama.cpp/pull/1405)
12+
- Quantization formats `Q4` and `Q5` have changed - requantize any old models [(info)](https://github.com/ggerganov/llama.cpp/pull/1405)
1313
- [Roadmap May 2023](https://github.com/ggerganov/llama.cpp/discussions/1220)
1414

1515
<details>
@@ -333,12 +333,12 @@ Several quantization methods are supported. They differ in the resulting model d
333333
334334
| Model | Measure | F16 | Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 |
335335
|------:|--------------|-------:|-------:|-------:|-------:|-------:|-------:|
336-
| 7B | perplexity | 5.9066 | 6.1620 | 6.0910 | 5.9862 | 5.9481 | 5.9069 |
336+
| 7B | perplexity | 5.9066 | 6.1565 | 6.0910 | 5.9862 | 5.9481 | 5.9069 |
337337
| 7B | file size | 13.0G | 4.0G | 4.8G | 4.4G | 4.8G | 7.1G |
338338
| 7B | ms/tok @ 4th | 128 | 50 | 54 | 75 | 83 | 75 |
339339
| 7B | ms/tok @ 8th | 123 | 44 | 52 | 53 | 58 | 72 |
340340
| 7B | bits/weight | 16.0 | 5.0 | 6.0 | 5.5 | 6.0 | 9.0 |
341-
| 13B | perplexity | 5.2543 | 5.3863 | 5.3607 | 5.2856 | 5.2706 | 5.2548 |
341+
| 13B | perplexity | 5.2543 | 5.3860 | 5.3607 | 5.2856 | 5.2706 | 5.2548 |
342342
| 13B | file size | 25.0G | 7.6G | 9.1G | 8.4G | 9.1G | 14G |
343343
| 13B | ms/tok @ 4th | 239 | 93 | 101 | 150 | 164 | 141 |
344344
| 13B | ms/tok @ 8th | 240 | 81 | 96 | 96 | 104 | 136 |

examples/common.cpp

+30-26
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,13 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
9191
bool escape_prompt = false;
9292
std::string arg;
9393
gpt_params default_params;
94+
const std::string arg_prefix = "--";
9495

9596
for (int i = 1; i < argc; i++) {
9697
arg = argv[i];
98+
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
99+
std::replace(arg.begin(), arg.end(), '_', '-');
100+
}
97101

98102
if (arg == "-s" || arg == "--seed") {
99103
#if defined(GGML_USE_CUBLAS)
@@ -141,27 +145,27 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
141145
if (params.prompt.back() == '\n') {
142146
params.prompt.pop_back();
143147
}
144-
} else if (arg == "-n" || arg == "--n_predict") {
148+
} else if (arg == "-n" || arg == "--n-predict") {
145149
if (++i >= argc) {
146150
invalid_param = true;
147151
break;
148152
}
149153
params.n_predict = std::stoi(argv[i]);
150-
} else if (arg == "--top_k") {
154+
} else if (arg == "--top-k") {
151155
if (++i >= argc) {
152156
invalid_param = true;
153157
break;
154158
}
155159
params.top_k = std::stoi(argv[i]);
156-
} else if (arg == "-c" || arg == "--ctx_size") {
160+
} else if (arg == "-c" || arg == "--ctx-size") {
157161
if (++i >= argc) {
158162
invalid_param = true;
159163
break;
160164
}
161165
params.n_ctx = std::stoi(argv[i]);
162-
} else if (arg == "--memory_f32") {
166+
} else if (arg == "--memory-f32") {
163167
params.memory_f16 = false;
164-
} else if (arg == "--top_p") {
168+
} else if (arg == "--top-p") {
165169
if (++i >= argc) {
166170
invalid_param = true;
167171
break;
@@ -185,25 +189,25 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
185189
break;
186190
}
187191
params.typical_p = std::stof(argv[i]);
188-
} else if (arg == "--repeat_last_n") {
192+
} else if (arg == "--repeat-last-n") {
189193
if (++i >= argc) {
190194
invalid_param = true;
191195
break;
192196
}
193197
params.repeat_last_n = std::stoi(argv[i]);
194-
} else if (arg == "--repeat_penalty") {
198+
} else if (arg == "--repeat-penalty") {
195199
if (++i >= argc) {
196200
invalid_param = true;
197201
break;
198202
}
199203
params.repeat_penalty = std::stof(argv[i]);
200-
} else if (arg == "--frequency_penalty") {
204+
} else if (arg == "--frequency-penalty") {
201205
if (++i >= argc) {
202206
invalid_param = true;
203207
break;
204208
}
205209
params.frequency_penalty = std::stof(argv[i]);
206-
} else if (arg == "--presence_penalty") {
210+
} else if (arg == "--presence-penalty") {
207211
if (++i >= argc) {
208212
invalid_param = true;
209213
break;
@@ -215,19 +219,19 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
215219
break;
216220
}
217221
params.mirostat = std::stoi(argv[i]);
218-
} else if (arg == "--mirostat_lr") {
222+
} else if (arg == "--mirostat-lr") {
219223
if (++i >= argc) {
220224
invalid_param = true;
221225
break;
222226
}
223227
params.mirostat_eta = std::stof(argv[i]);
224-
} else if (arg == "--mirostat_ent") {
228+
} else if (arg == "--mirostat-ent") {
225229
if (++i >= argc) {
226230
invalid_param = true;
227231
break;
228232
}
229233
params.mirostat_tau = std::stof(argv[i]);
230-
} else if (arg == "-b" || arg == "--batch_size") {
234+
} else if (arg == "-b" || arg == "--batch-size") {
231235
if (++i >= argc) {
232236
invalid_param = true;
233237
break;
@@ -310,7 +314,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
310314
invalid_param = true;
311315
break;
312316
}
313-
} else if (arg == "--n_parts") {
317+
} else if (arg == "--n-parts") {
314318
if (++i >= argc) {
315319
invalid_param = true;
316320
break;
@@ -384,31 +388,31 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
384388
fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
385389
fprintf(stderr, " -f FNAME, --file FNAME\n");
386390
fprintf(stderr, " prompt file to start generation.\n");
387-
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
388-
fprintf(stderr, " --top_k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
389-
fprintf(stderr, " --top_p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
391+
fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict);
392+
fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
393+
fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
390394
fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
391395
fprintf(stderr, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
392-
fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
393-
fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
394-
fprintf(stderr, " --presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
395-
fprintf(stderr, " --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
396+
fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
397+
fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
398+
fprintf(stderr, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
399+
fprintf(stderr, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
396400
fprintf(stderr, " --mirostat N use Mirostat sampling.\n");
397401
fprintf(stderr, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n");
398402
fprintf(stderr, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
399-
fprintf(stderr, " --mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
400-
fprintf(stderr, " --mirostat_ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
403+
fprintf(stderr, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
404+
fprintf(stderr, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
401405
fprintf(stderr, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n");
402406
fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n");
403407
fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n");
404408
fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
405-
fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx);
409+
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
406410
fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
407411
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
408-
fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n");
412+
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
409413
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
410-
fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n");
411-
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
414+
fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
415+
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
412416
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
413417
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
414418
if (llama_mlock_supported()) {

examples/embedding/embedding.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,6 @@ int main(int argc, char ** argv) {
5656
// tokenize the prompt
5757
auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
5858

59-
// determine newline token
60-
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
61-
6259
if (params.verbose_prompt) {
6360
fprintf(stderr, "\n");
6461
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());

examples/main/main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ int main(int argc, char ** argv) {
121121
// uncomment the "used_mem" line in llama.cpp to see the results
122122
if (params.mem_test) {
123123
{
124-
const std::vector<llama_token> tmp(params.n_batch, 0);
124+
const std::vector<llama_token> tmp(params.n_batch, llama_token_bos());
125125
llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
126126
}
127127

0 commit comments

Comments
 (0)