@@ -387,7 +387,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
387
387
388
388
// backend registry
389
389
390
- #define GGML_MAX_BACKENDS_REG 16
390
+ #define GGML_SCHED_MAX_BACKENDS_REG 16
391
391
392
392
struct ggml_backend_reg {
393
393
char name [128 ];
@@ -396,7 +396,7 @@ struct ggml_backend_reg {
396
396
void * user_data ;
397
397
};
398
398
399
- static struct ggml_backend_reg ggml_backend_registry [GGML_MAX_BACKENDS_REG ];
399
+ static struct ggml_backend_reg ggml_backend_registry [GGML_SCHED_MAX_BACKENDS_REG ];
400
400
static size_t ggml_backend_registry_count = 0 ;
401
401
402
402
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init (const char * params , void * user_data );
@@ -441,7 +441,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
441
441
}
442
442
443
443
GGML_CALL void ggml_backend_register (const char * name , ggml_backend_init_fn init_fn , ggml_backend_buffer_type_t default_buffer_type , void * user_data ) {
444
- GGML_ASSERT (ggml_backend_registry_count < GGML_MAX_BACKENDS_REG );
444
+ GGML_ASSERT (ggml_backend_registry_count < GGML_SCHED_MAX_BACKENDS_REG );
445
445
446
446
size_t id = ggml_backend_registry_count ;
447
447
@@ -993,16 +993,27 @@ static bool ggml_is_view_op(enum ggml_op op) {
993
993
994
994
// scheduler
995
995
996
- #define GGML_MAX_BACKENDS 16
997
- #define GGML_MAX_SPLITS 256
998
- #define GGML_MAX_SPLIT_INPUTS 16
999
- #define GGML_MAX_COPIES 2
996
+ #ifndef GGML_SCHED_MAX_BACKENDS
997
+ #define GGML_SCHED_MAX_BACKENDS 16
998
+ #endif
999
+
1000
+ #ifndef GGML_SCHED_MAX_SPLITS
1001
+ #define GGML_SCHED_MAX_SPLITS 256
1002
+ #endif
1003
+
1004
+ #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
1005
+ #define GGML_SCHED_MAX_SPLIT_INPUTS 16
1006
+ #endif
1007
+
1008
+ #ifndef GGML_SCHED_MAX_COPIES
1009
+ #define GGML_SCHED_MAX_COPIES 4
1010
+ #endif
1000
1011
1001
1012
struct ggml_backend_sched_split {
1002
1013
int backend_id ;
1003
1014
int i_start ;
1004
1015
int i_end ;
1005
- struct ggml_tensor * inputs [GGML_MAX_SPLIT_INPUTS ];
1016
+ struct ggml_tensor * inputs [GGML_SCHED_MAX_SPLIT_INPUTS ];
1006
1017
int n_inputs ;
1007
1018
// graph view of this split
1008
1019
struct ggml_cgraph graph ;
@@ -1014,15 +1025,15 @@ struct ggml_backend_sched {
1014
1025
1015
1026
int n_backends ;
1016
1027
1017
- ggml_backend_t backends [GGML_MAX_BACKENDS ];
1018
- ggml_backend_buffer_type_t bufts [GGML_MAX_BACKENDS ];
1028
+ ggml_backend_t backends [GGML_SCHED_MAX_BACKENDS ];
1029
+ ggml_backend_buffer_type_t bufts [GGML_SCHED_MAX_BACKENDS ];
1019
1030
ggml_gallocr_t galloc ;
1020
1031
1021
1032
// hash keys of the nodes in the graph
1022
1033
struct ggml_hash_set hash_set ;
1023
1034
// hash values
1024
1035
int * tensor_backend_id ;
1025
- struct ggml_tensor * (* tensor_copies )[GGML_MAX_BACKENDS ][ GGML_MAX_COPIES ];
1036
+ struct ggml_tensor * (* tensor_copies )[GGML_SCHED_MAX_BACKENDS ][ GGML_SCHED_MAX_COPIES ];
1026
1037
1027
1038
int * node_backend_ids ; // [graph_size]
1028
1039
int * leaf_backend_ids ; // [graph_size]
@@ -1031,14 +1042,14 @@ struct ggml_backend_sched {
1031
1042
struct ggml_cgraph * graph ;
1032
1043
1033
1044
// graph splits
1034
- struct ggml_backend_sched_split splits [GGML_MAX_SPLITS ];
1045
+ struct ggml_backend_sched_split splits [GGML_SCHED_MAX_SPLITS ];
1035
1046
int n_splits ;
1036
1047
1037
1048
// pipeline parallelism support
1038
1049
int n_copies ;
1039
1050
int cur_copy ;
1040
- ggml_backend_event_t events [GGML_MAX_BACKENDS ][ GGML_MAX_COPIES ];
1041
- struct ggml_tensor * graph_inputs [GGML_MAX_SPLIT_INPUTS ];
1051
+ ggml_backend_event_t events [GGML_SCHED_MAX_BACKENDS ][ GGML_SCHED_MAX_COPIES ];
1052
+ struct ggml_tensor * graph_inputs [GGML_SCHED_MAX_SPLIT_INPUTS ];
1042
1053
int n_graph_inputs ;
1043
1054
1044
1055
struct ggml_context * ctx ;
@@ -1047,12 +1058,12 @@ struct ggml_backend_sched {
1047
1058
void * callback_eval_user_data ;
1048
1059
1049
1060
// align context_buffer to GGML_MEM_ALIGN
1050
- #ifdef _MSC_VER
1061
+ #ifdef _MSC_VER
1051
1062
__declspec(align (GGML_MEM_ALIGN ))
1052
- #else
1063
+ #else
1053
1064
__attribute__((aligned (GGML_MEM_ALIGN )))
1054
- #endif
1055
- char context_buffer [GGML_MAX_SPLITS * GGML_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + sizeof (struct ggml_cgraph )];
1065
+ #endif
1066
+ char context_buffer [GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + sizeof (struct ggml_cgraph )];
1056
1067
};
1057
1068
1058
1069
#define hash_id (tensor ) ggml_hash_find_or_insert(sched->hash_set, tensor)
@@ -1089,7 +1100,7 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
1089
1100
}
1090
1101
1091
1102
#if 0
1092
- static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_MAX_SPLITS * GGML_MAX_SPLIT_INPUTS ][128 ]; // debug only
1103
+ static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
1093
1104
#define SET_CAUSE (node , ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
1094
1105
#define GET_CAUSE (node ) causes[hash_id(node)]
1095
1106
#else
@@ -1395,7 +1406,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1395
1406
if (tensor_backend_id != cur_backend_id ) {
1396
1407
sched -> splits [cur_split ].i_end = i ;
1397
1408
cur_split ++ ;
1398
- GGML_ASSERT (cur_split < GGML_MAX_SPLITS );
1409
+ GGML_ASSERT (cur_split < GGML_SCHED_MAX_SPLITS );
1399
1410
sched -> splits [cur_split ].backend_id = tensor_backend_id ;
1400
1411
sched -> splits [cur_split ].i_start = i ;
1401
1412
sched -> splits [cur_split ].n_inputs = 0 ;
@@ -1433,7 +1444,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1433
1444
SET_CAUSE (tensor_copy , "4.cpy" );
1434
1445
}
1435
1446
int n_graph_inputs = sched -> n_graph_inputs ++ ;
1436
- GGML_ASSERT (n_graph_inputs < GGML_MAX_SPLIT_INPUTS );
1447
+ GGML_ASSERT (n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS );
1437
1448
sched -> graph_inputs [n_graph_inputs ] = src ;
1438
1449
}
1439
1450
}
@@ -1455,7 +1466,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1455
1466
SET_CAUSE (tensor_copy , "4.cpy" );
1456
1467
}
1457
1468
int n_inputs = sched -> splits [cur_split ].n_inputs ++ ;
1458
- GGML_ASSERT (n_inputs < GGML_MAX_SPLIT_INPUTS );
1469
+ GGML_ASSERT (n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS );
1459
1470
sched -> splits [cur_split ].inputs [n_inputs ] = src ;
1460
1471
}
1461
1472
node -> src [j ] = sched -> tensor_copies [id ][cur_backend_id ][sched -> cur_copy ];
@@ -1507,7 +1518,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1507
1518
1508
1519
// create copies of the graph for each split
1509
1520
// TODO: avoid this copy
1510
- struct ggml_cgraph * graph_copy = ggml_new_graph_custom (sched -> ctx , graph -> n_nodes + sched -> n_splits * GGML_MAX_SPLIT_INPUTS , false);
1521
+ struct ggml_cgraph * graph_copy = ggml_new_graph_custom (sched -> ctx , graph -> n_nodes + sched -> n_splits * GGML_SCHED_MAX_SPLIT_INPUTS , false);
1511
1522
for (int i = 0 ; i < sched -> n_splits ; i ++ ) {
1512
1523
struct ggml_backend_sched_split * split = & sched -> splits [i ];
1513
1524
split -> graph = ggml_graph_view (graph , split -> i_start , split -> i_end );
@@ -1683,23 +1694,23 @@ ggml_backend_sched_t ggml_backend_sched_new(
1683
1694
size_t graph_size ,
1684
1695
bool parallel ) {
1685
1696
GGML_ASSERT (n_backends > 0 );
1686
- GGML_ASSERT (n_backends <= GGML_MAX_BACKENDS );
1697
+ GGML_ASSERT (n_backends <= GGML_SCHED_MAX_BACKENDS );
1687
1698
GGML_ASSERT (ggml_backend_is_cpu (backends [n_backends - 1 ])); // last backend must be CPU
1688
1699
1689
1700
struct ggml_backend_sched * sched = calloc (sizeof (struct ggml_backend_sched ), 1 );
1690
1701
1691
1702
// initialize hash table
1692
- sched -> hash_set = ggml_hash_set_new (graph_size + GGML_MAX_SPLITS * GGML_MAX_SPLIT_INPUTS );
1703
+ sched -> hash_set = ggml_hash_set_new (graph_size + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS );
1693
1704
sched -> tensor_backend_id = calloc (sizeof (sched -> tensor_backend_id [0 ]), sched -> hash_set .size );
1694
1705
sched -> tensor_copies = calloc (sizeof (sched -> tensor_copies [0 ]), sched -> hash_set .size );
1695
1706
sched -> node_backend_ids = calloc (sizeof (sched -> node_backend_ids [0 ]), graph_size );
1696
1707
sched -> leaf_backend_ids = calloc (sizeof (sched -> leaf_backend_ids [0 ]), graph_size );
1697
1708
1698
1709
sched -> n_backends = n_backends ;
1699
1710
1700
- sched -> n_copies = parallel ? GGML_MAX_COPIES : 1 ;
1711
+ sched -> n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1 ;
1701
1712
1702
- GGML_ASSERT (sched -> n_copies <= GGML_MAX_COPIES );
1713
+ GGML_ASSERT (sched -> n_copies <= GGML_SCHED_MAX_COPIES );
1703
1714
1704
1715
for (int b = 0 ; b < n_backends ; b ++ ) {
1705
1716
sched -> backends [b ] = backends [b ];
@@ -1764,7 +1775,7 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
1764
1775
}
1765
1776
1766
1777
bool ggml_backend_sched_alloc_graph (ggml_backend_sched_t sched , struct ggml_cgraph * graph ) {
1767
- GGML_ASSERT ((int )sched -> hash_set .size >= graph -> n_nodes + GGML_MAX_SPLITS * GGML_MAX_SPLIT_INPUTS );
1778
+ GGML_ASSERT ((int )sched -> hash_set .size >= graph -> n_nodes + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS );
1768
1779
1769
1780
ggml_backend_sched_split_graph (sched , graph );
1770
1781
@@ -1812,6 +1823,10 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
1812
1823
return sched -> n_splits ;
1813
1824
}
1814
1825
1826
+ int ggml_backend_sched_get_n_copies (ggml_backend_sched_t sched ) {
1827
+ return sched -> n_copies ;
1828
+ }
1829
+
1815
1830
size_t ggml_backend_sched_get_buffer_size (ggml_backend_sched_t sched , ggml_backend_t backend ) {
1816
1831
int backend_index = ggml_backend_sched_backend_id (sched , backend );
1817
1832
GGML_ASSERT (backend_index >= 0 && backend_index < sched -> n_backends );
0 commit comments