1
1
/*
2
- * Tencent is pleased to support the open source community by making TKEStack available.
2
+ * Tencent is pleased to support the open source community by making TKEStack
3
+ * available.
3
4
*
4
5
* Copyright (C) 2012-2019 Tencent. All Rights Reserved.
5
6
*
6
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
7
- * this file except in compliance with the License. You may obtain a copy of the
8
- * License at
7
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
8
+ * use this file except in compliance with the License. You may obtain a copy of
9
+ * the License at
9
10
*
10
11
* https://opensource.org/licenses/Apache-2.0
11
12
*
@@ -99,8 +100,8 @@ typedef struct {
99
100
100
101
/** helper function */
101
102
int int_match (const void * a , const void * b ) {
102
- const int * ra = (const int * ) a ;
103
- const int * rb = (const int * ) b ;
103
+ const int * ra = (const int * )a ;
104
+ const int * rb = (const int * )b ;
104
105
105
106
if (* ra < * rb ) {
106
107
return -1 ;
@@ -167,7 +168,7 @@ static void rate_limiter(int grids, int blocks) {
167
168
LOGGER (5 , "launch kernel %d, curr core: %d" , kernel_size , g_cur_cuda_cores );
168
169
if (g_vcuda_config .enable ) {
169
170
do {
170
- CHECK :
171
+ CHECK :
171
172
before_cuda_cores = g_cur_cuda_cores ;
172
173
LOGGER (8 , "current core: %d" , g_cur_cuda_cores );
173
174
if (before_cuda_cores < 0 ) {
@@ -186,7 +187,7 @@ int delta(int up_limit, int user_current, int share) {
186
187
g_sm_num * g_sm_num * g_max_thread_per_sm * utilization_diff / 2560 ;
187
188
/* Accelerate cuda cores allocation when utilization vary widely */
188
189
if (utilization_diff > up_limit / 2 ) {
189
- increment = increment * utilization_diff * 2 / up_limit ;
190
+ increment = increment * utilization_diff * 2 / ( up_limit + 1 ) ;
190
191
}
191
192
192
193
if (user_current <= up_limit ) {
@@ -218,7 +219,7 @@ static void *utilization_watcher(void *arg UNUSED) {
218
219
while (1 ) {
219
220
nanosleep (& g_wait , NULL );
220
221
do {
221
- atomic_action (pid_path , get_used_gpu_utilization , (void * ) & top_result );
222
+ atomic_action (pid_path , get_used_gpu_utilization , (void * )& top_result );
222
223
} while (!top_result .valid );
223
224
224
225
sys_free = MAX_UTILIZATION - top_result .sys_current ;
@@ -259,9 +260,9 @@ static void *utilization_watcher(void *arg UNUSED) {
259
260
if (i % CHANGE_LIMIT_INTERVAL == 0 ) {
260
261
if (avg_sys_free * 2 / CHANGE_LIMIT_INTERVAL > USAGE_THRESHOLD ) {
261
262
up_limit = up_limit + g_vcuda_config .utilization / 10 >
262
- g_vcuda_config .limit
263
- ? g_vcuda_config .limit
264
- : up_limit + g_vcuda_config .utilization / 10 ;
263
+ g_vcuda_config .limit
264
+ ? g_vcuda_config .limit
265
+ : up_limit + g_vcuda_config .utilization / 10 ;
265
266
}
266
267
i = 0 ;
267
268
}
@@ -295,10 +296,11 @@ static void get_used_gpu_utilization(int fd, void *arg) {
295
296
unsigned int running_processes = MAX_PIDS ;
296
297
nvmlProcessInfo_t pids_on_device [MAX_PIDS ];
297
298
nvmlDevice_t dev ;
298
- utilization_t * top_result = (utilization_t * ) arg ;
299
+ utilization_t * top_result = (utilization_t * )arg ;
299
300
nvmlReturn_t ret ;
300
301
struct timeval cur ;
301
302
size_t microsec ;
303
+ int codec_util = 0 ;
302
304
303
305
int i ;
304
306
@@ -337,11 +339,20 @@ static void get_used_gpu_utilization(int fd, void *arg) {
337
339
for (i = 0 ; i < processes_num ; i ++ ) {
338
340
if (processes_sample [i ].timeStamp >= top_result -> checktime ) {
339
341
top_result -> valid = 1 ;
340
- top_result -> sys_current += processes_sample [i ].smUtil ;
342
+ top_result -> sys_current += GET_VALID_VALUE (processes_sample [i ].smUtil );
343
+
344
+ codec_util = GET_VALID_VALUE (processes_sample [i ].encUtil ) +
345
+ GET_VALID_VALUE (processes_sample [i ].decUtil );
346
+ top_result -> sys_current += CODEC_NORMALIZE (codec_util );
347
+
341
348
LOGGER (8 , "try to find %d from pid tables" , processes_sample [i ].pid );
342
349
if (likely (bsearch (& processes_sample [i ].pid , g_pids_table ,
343
- (size_t ) g_pids_table_size , sizeof (int ), int_match ))) {
344
- top_result -> user_current += processes_sample [i ].smUtil ;
350
+ (size_t )g_pids_table_size , sizeof (int ), int_match ))) {
351
+ top_result -> user_current += GET_VALID_VALUE (processes_sample [i ].smUtil );
352
+
353
+ codec_util = GET_VALID_VALUE (processes_sample [i ].encUtil ) +
354
+ GET_VALID_VALUE (processes_sample [i ].decUtil );
355
+ top_result -> user_current += CODEC_NORMALIZE (codec_util );
345
356
}
346
357
}
347
358
}
@@ -359,7 +370,7 @@ static void load_pids_table(int fd, void *arg UNUSED) {
359
370
int i = 0 ;
360
371
361
372
for (item = 0 ; item < MAX_PIDS ; item ++ ) {
362
- rsize = (int ) read (fd , g_pids_table + item , sizeof (int ));
373
+ rsize = (int )read (fd , g_pids_table + item , sizeof (int ));
363
374
if (unlikely (rsize != sizeof (int ))) {
364
375
break ;
365
376
}
@@ -415,7 +426,7 @@ static void get_used_gpu_memory(int fd, void *arg) {
415
426
}
416
427
417
428
for (i = 0 ; i < size_on_device ; i ++ ) {
418
- if (bsearch (& pids_on_device [i ].pid , g_pids_table , (size_t ) g_pids_table_size ,
429
+ if (bsearch (& pids_on_device [i ].pid , g_pids_table , (size_t )g_pids_table_size ,
419
430
sizeof (int ), int_match )) {
420
431
LOGGER (4 , "%d use memory: %lld" , pids_on_device [i ].pid ,
421
432
pids_on_device [i ].usedGpuMemory );
@@ -441,14 +452,14 @@ static void register_to_remote() {
441
452
& nvml_dev );
442
453
if (unlikely (ret )) {
443
454
LOGGER (FATAL , "can't find device 0, error %s" ,
444
- nvml_error ((nvmlReturn_t ) ret ));
455
+ nvml_error ((nvmlReturn_t )ret ));
445
456
}
446
457
447
458
ret = NVML_ENTRY_CALL (nvml_library_entry , nvmlDeviceGetPciInfo , nvml_dev ,
448
459
& pci_info );
449
460
if (unlikely (ret )) {
450
461
LOGGER (FATAL , "can't find device 0, error %s" ,
451
- nvml_error ((nvmlReturn_t ) ret ));
462
+ nvml_error ((nvmlReturn_t )ret ));
452
463
}
453
464
454
465
strncpy (g_vcuda_config .bus_id , pci_info .busId ,
@@ -467,22 +478,22 @@ static void initialization() {
467
478
ret = CUDA_ENTRY_CALL (cuda_library_entry , cuInit , 0 );
468
479
if (unlikely (ret )) {
469
480
LOGGER (FATAL , "cuInit error %s" ,
470
- cuda_error ((CUresult ) ret , & cuda_err_string ));
481
+ cuda_error ((CUresult )ret , & cuda_err_string ));
471
482
}
472
483
473
484
ret = CUDA_ENTRY_CALL (cuda_library_entry , cuDeviceGetAttribute , & g_sm_num ,
474
485
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT , 0 );
475
486
if (unlikely (ret )) {
476
487
LOGGER (FATAL , "can't get processor number, error %s" ,
477
- cuda_error ((CUresult ) ret , & cuda_err_string ));
488
+ cuda_error ((CUresult )ret , & cuda_err_string ));
478
489
}
479
490
480
491
ret = CUDA_ENTRY_CALL (cuda_library_entry , cuDeviceGetAttribute ,
481
492
& g_max_thread_per_sm ,
482
493
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR , 0 );
483
494
if (unlikely (ret )) {
484
495
LOGGER (FATAL , "can't get max thread per processor, error %s" ,
485
- cuda_error ((CUresult ) ret , & cuda_err_string ));
496
+ cuda_error ((CUresult )ret , & cuda_err_string ));
486
497
}
487
498
488
499
g_total_cuda_cores = g_max_thread_per_sm * g_sm_num * FACTOR ;
@@ -535,7 +546,7 @@ CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize,
535
546
CUresult ret ;
536
547
537
548
if (g_vcuda_config .enable ) {
538
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
549
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
539
550
540
551
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
541
552
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -555,7 +566,7 @@ CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) {
555
566
CUresult ret ;
556
567
557
568
if (g_vcuda_config .enable ) {
558
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
569
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
559
570
560
571
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
561
572
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -574,7 +585,7 @@ CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) {
574
585
CUresult ret ;
575
586
576
587
if (g_vcuda_config .enable ) {
577
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
588
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
578
589
579
590
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
580
591
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -595,7 +606,7 @@ CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch,
595
606
CUresult ret ;
596
607
597
608
if (g_vcuda_config .enable ) {
598
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
609
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
599
610
600
611
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
601
612
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -616,7 +627,7 @@ CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes,
616
627
CUresult ret ;
617
628
618
629
if (g_vcuda_config .enable ) {
619
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
630
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
620
631
621
632
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
622
633
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -655,7 +666,8 @@ static size_t get_array_base_size(int format) {
655
666
return base_size ;
656
667
}
657
668
658
- static CUresult cuArrayCreate_helper (const CUDA_ARRAY_DESCRIPTOR * pAllocateArray ) {
669
+ static CUresult cuArrayCreate_helper (
670
+ const CUDA_ARRAY_DESCRIPTOR * pAllocateArray ) {
659
671
size_t used = 0 ;
660
672
size_t base_size = 0 ;
661
673
size_t request_size = 0 ;
@@ -666,7 +678,7 @@ static CUresult cuArrayCreate_helper(const CUDA_ARRAY_DESCRIPTOR *pAllocateArray
666
678
request_size = base_size * pAllocateArray -> NumChannels *
667
679
pAllocateArray -> Height * pAllocateArray -> Width ;
668
680
669
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
681
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
670
682
671
683
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
672
684
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -708,7 +720,8 @@ CUresult cuArrayCreate(CUarray *pHandle,
708
720
return ret ;
709
721
}
710
722
711
- static CUresult cuArray3DCreate_helper (const CUDA_ARRAY3D_DESCRIPTOR * pAllocateArray ) {
723
+ static CUresult cuArray3DCreate_helper (
724
+ const CUDA_ARRAY3D_DESCRIPTOR * pAllocateArray ) {
712
725
size_t used = 0 ;
713
726
size_t base_size = 0 ;
714
727
size_t request_size = 0 ;
@@ -719,7 +732,7 @@ static CUresult cuArray3DCreate_helper(const CUDA_ARRAY3D_DESCRIPTOR *pAllocateA
719
732
request_size = base_size * pAllocateArray -> NumChannels *
720
733
pAllocateArray -> Height * pAllocateArray -> Width ;
721
734
722
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
735
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
723
736
724
737
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
725
738
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -775,7 +788,7 @@ CUresult cuMipmappedArrayCreate(
775
788
pMipmappedArrayDesc -> Height * pMipmappedArrayDesc -> Width *
776
789
pMipmappedArrayDesc -> Depth ;
777
790
778
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
791
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
779
792
780
793
if (unlikely (used + request_size > g_vcuda_config .gpu_memory )) {
781
794
ret = CUDA_ERROR_OUT_OF_MEMORY ;
@@ -813,7 +826,7 @@ CUresult cuMemGetInfo_v2(size_t *free, size_t *total) {
813
826
size_t used = 0 ;
814
827
815
828
if (g_vcuda_config .enable ) {
816
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
829
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
817
830
818
831
* total = g_vcuda_config .gpu_memory ;
819
832
* free =
@@ -829,7 +842,7 @@ CUresult cuMemGetInfo(size_t *free, size_t *total) {
829
842
size_t used = 0 ;
830
843
831
844
if (g_vcuda_config .enable ) {
832
- atomic_action (pid_path , get_used_gpu_memory , (void * ) & used );
845
+ atomic_action (pid_path , get_used_gpu_memory , (void * )& used );
833
846
834
847
* total = g_vcuda_config .gpu_memory ;
835
848
* free =
0 commit comments