@@ -4970,10 +4970,10 @@ static struct ggml_cgraph * llama_build_graph(
4970
4970
// allocate input tensors and set input data
4971
4971
//
4972
4972
4973
- if (batch. token && !alloc_inp_tokens && strcmp (name, " inp_tokens" ) == 0 ) {
4973
+ if (!alloc_inp_tokens && strcmp (name, " inp_tokens" ) == 0 ) {
4974
4974
ggml_allocr_alloc (lctx.alloc , cur);
4975
4975
4976
- if (!ggml_allocr_is_measure (lctx.alloc )) {
4976
+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. token ) {
4977
4977
const int64_t n_tokens = cur->ne [0 ];
4978
4978
4979
4979
memcpy (cur->data , batch.token , n_tokens*ggml_element_size (cur));
@@ -4982,10 +4982,10 @@ static struct ggml_cgraph * llama_build_graph(
4982
4982
alloc_inp_tokens = true ;
4983
4983
}
4984
4984
4985
- if (batch. embd && !alloc_inp_embd && strcmp (name, " inp_embd" ) == 0 ) {
4985
+ if (!alloc_inp_embd && strcmp (name, " inp_embd" ) == 0 ) {
4986
4986
ggml_allocr_alloc (lctx.alloc , cur);
4987
4987
4988
- if (!ggml_allocr_is_measure (lctx.alloc )) {
4988
+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. embd ) {
4989
4989
const int64_t n_embd = cur->ne [0 ];
4990
4990
const int64_t n_tokens = cur->ne [1 ];
4991
4991
@@ -4995,10 +4995,10 @@ static struct ggml_cgraph * llama_build_graph(
4995
4995
alloc_inp_embd = true ;
4996
4996
}
4997
4997
4998
- if (batch. pos && !alloc_inp_pos && strcmp (name, " inp_pos" ) == 0 ) {
4998
+ if (!alloc_inp_pos && strcmp (name, " inp_pos" ) == 0 ) {
4999
4999
ggml_allocr_alloc (lctx.alloc , cur);
5000
5000
5001
- if (!ggml_allocr_is_measure (lctx.alloc )) {
5001
+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. pos ) {
5002
5002
const int64_t n_tokens = cur->ne [0 ];
5003
5003
5004
5004
int32_t * data = (int32_t *) cur->data ;
0 commit comments