@@ -16626,39 +16626,50 @@ typedef pthread_t ggml_thread_t;
16626
16626
#endif
16627
16627
16628
16628
#ifdef __linux__
16629
- void set_numa_thread_affinity(int thread_n, int n_threads)
16630
- {
16631
- if (!ggml_is_numa()) { return; }
16629
+ void set_numa_thread_affinity(int thread_n, int n_threads) {
16630
+ if (!ggml_is_numa()) {
16631
+ return;
16632
+ }
16633
+
16632
16634
// run thread on node_num thread_n / (threads per node)
16633
- int node_num = thread_n / ((n_threads + g_state.numa.n_nodes - 1) / g_state.numa.n_nodes);
16635
+ const int node_num = thread_n / ((n_threads + g_state.numa.n_nodes - 1) / g_state.numa.n_nodes);
16634
16636
struct ggml_numa_node * node = &g_state.numa.nodes[node_num];
16635
16637
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
16638
+
16636
16639
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
16637
16640
CPU_ZERO_S(setsize, cpus);
16638
16641
for (size_t i = 0; i < node->n_cpus; ++i) {
16639
16642
CPU_SET_S(node->cpus[i], setsize, cpus);
16640
16643
}
16644
+
16641
16645
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
16642
16646
if (rv) {
16643
16647
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
16644
16648
strerror(rv));
16645
16649
}
16650
+
16646
16651
CPU_FREE(cpus);
16647
16652
}
16648
- void clear_numa_thread_affinity(void)
16649
- {
16650
- if (!ggml_is_numa()) { return; }
16653
+
16654
+ void clear_numa_thread_affinity(void) {
16655
+ if (!ggml_is_numa()) {
16656
+ return;
16657
+ }
16658
+
16651
16659
size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);
16660
+
16652
16661
cpu_set_t * cpus = CPU_ALLOC(g_state.numa.total_cpus);
16653
16662
CPU_ZERO_S(setsize, cpus);
16654
16663
for (unsigned i = 0; i < g_state.numa.total_cpus; ++i) {
16655
16664
CPU_SET_S(i, setsize, cpus);
16656
16665
}
16666
+
16657
16667
int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
16658
16668
if (rv) {
16659
16669
fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
16660
16670
strerror(rv));
16661
16671
}
16672
+
16662
16673
CPU_FREE(cpus);
16663
16674
}
16664
16675
#else
@@ -16699,10 +16710,12 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
16699
16710
static thread_ret_t ggml_graph_compute_thread(void * data) {
16700
16711
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
16701
16712
struct ggml_cgraph * cgraph = state->shared->cgraph;
16713
+
16702
16714
const int n_threads = state->shared->n_threads;
16703
16715
set_numa_thread_affinity(state->ith, n_threads);
16704
16716
16705
16717
int node_n = -1;
16718
+
16706
16719
while (true) {
16707
16720
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
16708
16721
// all other threads are finished and spinning
@@ -17165,6 +17178,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
17165
17178
17166
17179
// this is a work thread too
17167
17180
ggml_graph_compute_thread(&workers[0]);
17181
+
17168
17182
// don't leave affinity set on the main thread
17169
17183
clear_numa_thread_affinity();
17170
17184
0 commit comments