This repository was archived by the owner on Feb 6, 2024. It is now read-only.
File tree 1 file changed +17
-8
lines changed
Sources/llmfarm_core_cpp/llama
1 file changed +17
-8
lines changed Original file line number Diff line number Diff line change @@ -1570,14 +1570,14 @@ static void llama_kv_cache_seq_shift(
1570
1570
1571
1571
for (uint32_t i = 0 ; i < cache.size ; ++i) {
1572
1572
if (cache.cells [i].has_seq_id (seq_id) && cache.cells [i].pos >= p0 && cache.cells [i].pos < p1) {
1573
+ cache.has_shift = true ;
1573
1574
cache.cells [i].pos += delta;
1575
+ cache.cells [i].delta += delta;
1576
+
1574
1577
if (cache.cells [i].pos < 0 ) {
1575
1578
cache.cells [i].pos = -1 ;
1576
1579
cache.cells [i].seq_id .clear ();
1577
1580
if (new_head == cache.size ) new_head = i;
1578
- } else {
1579
- cache.has_shift = true ;
1580
- cache.cells [i].delta = delta;
1581
1581
}
1582
1582
}
1583
1583
}
@@ -6320,11 +6320,20 @@ static int llama_decode_internal(
6320
6320
#endif
6321
6321
6322
6322
// update the kv ring buffer
6323
- lctx.kv_self .has_shift = false ;
6324
- lctx.kv_self .head += n_tokens;
6325
- // Ensure kv cache head points to a valid index.
6326
- if (lctx.kv_self .head >= lctx.kv_self .size ) {
6327
- lctx.kv_self .head = 0 ;
6323
+ {
6324
+ if (kv_self.has_shift ) {
6325
+ kv_self.has_shift = false ;
6326
+ for (uint32_t i = 0 ; i < kv_self.size ; ++i) {
6327
+ kv_self.cells [i].delta = 0 ;
6328
+ }
6329
+ }
6330
+
6331
+ kv_self.head += n_tokens;
6332
+
6333
+ // Ensure kv cache head points to a valid index.
6334
+ if (kv_self.head >= kv_self.size ) {
6335
+ kv_self.head = 0 ;
6336
+ }
6328
6337
}
6329
6338
6330
6339
#ifdef GGML_PERF
You can’t perform that action at this time.
0 commit comments