Skip to content

Commit e890469

Browse files
authored
Merge pull request #4 from mYmNeo/dev_ffmpeg
Support ffmpeg video encoding and decoding QoS
2 parents 6f52b6d + 97a1701 commit e890469

File tree

2 files changed

+59
-41
lines changed

2 files changed

+59
-41
lines changed

include/hijack.h

+11-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/*
2-
* Tencent is pleased to support the open source community by making TKEStack available.
2+
* Tencent is pleased to support the open source community by making TKEStack
3+
* available.
34
*
45
* Copyright (C) 2012-2019 Tencent. All Rights Reserved.
56
*
6-
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
7-
* this file except in compliance with the License. You may obtain a copy of the
8-
* License at
7+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
8+
* use this file except in compliance with the License. You may obtain a copy of
9+
* the License at
910
*
1011
* https://opensource.org/licenses/Apache-2.0
1112
*
@@ -101,6 +102,9 @@ extern "C" {
101102
#define CHANGE_LIMIT_INTERVAL (30)
102103
#define USAGE_THRESHOLD (5)
103104

105+
#define GET_VALID_VALUE(x) (((x) >= 0 && (x) <= 100) ? (x) : 0)
106+
#define CODEC_NORMALIZE(x) (x * 85 / 100)
107+
104108
typedef struct {
105109
void *fn_ptr;
106110
char *name;
@@ -165,13 +169,14 @@ int read_controller_configuration();
165169
void load_necessary_data();
166170

167171
/**
168-
* Register data to remote controller to retrieve configuration
172+
* Register data to remote controller to retrieve configuration
169173
*
170174
* @param bus_id bus is of GPU card
171175
* @param pod_uid pod uid of Pod
172176
* @param container_name container name of Pod
173177
*/
174-
void register_to_remote_with_data(const char *bus_id, const char *pod_uid, const char *container_name);
178+
void register_to_remote_with_data(const char *bus_id, const char *pod_uid,
179+
const char *container_name);
175180

176181
/**
177182
* Tell whether we're using old method to find controller configuration path

src/hijack_call.c

+48-35
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/*
2-
* Tencent is pleased to support the open source community by making TKEStack available.
2+
* Tencent is pleased to support the open source community by making TKEStack
3+
* available.
34
*
45
* Copyright (C) 2012-2019 Tencent. All Rights Reserved.
56
*
6-
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
7-
* this file except in compliance with the License. You may obtain a copy of the
8-
* License at
7+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
8+
* use this file except in compliance with the License. You may obtain a copy of
9+
* the License at
910
*
1011
* https://opensource.org/licenses/Apache-2.0
1112
*
@@ -99,8 +100,8 @@ typedef struct {
99100

100101
/** helper function */
101102
int int_match(const void *a, const void *b) {
102-
const int *ra = (const int *) a;
103-
const int *rb = (const int *) b;
103+
const int *ra = (const int *)a;
104+
const int *rb = (const int *)b;
104105

105106
if (*ra < *rb) {
106107
return -1;
@@ -167,7 +168,7 @@ static void rate_limiter(int grids, int blocks) {
167168
LOGGER(5, "launch kernel %d, curr core: %d", kernel_size, g_cur_cuda_cores);
168169
if (g_vcuda_config.enable) {
169170
do {
170-
CHECK:
171+
CHECK:
171172
before_cuda_cores = g_cur_cuda_cores;
172173
LOGGER(8, "current core: %d", g_cur_cuda_cores);
173174
if (before_cuda_cores < 0) {
@@ -186,7 +187,7 @@ int delta(int up_limit, int user_current, int share) {
186187
g_sm_num * g_sm_num * g_max_thread_per_sm * utilization_diff / 2560;
187188
/* Accelerate cuda cores allocation when utilization vary widely */
188189
if (utilization_diff > up_limit / 2) {
189-
increment = increment * utilization_diff * 2 / up_limit;
190+
increment = increment * utilization_diff * 2 / (up_limit + 1);
190191
}
191192

192193
if (user_current <= up_limit) {
@@ -218,7 +219,7 @@ static void *utilization_watcher(void *arg UNUSED) {
218219
while (1) {
219220
nanosleep(&g_wait, NULL);
220221
do {
221-
atomic_action(pid_path, get_used_gpu_utilization, (void *) &top_result);
222+
atomic_action(pid_path, get_used_gpu_utilization, (void *)&top_result);
222223
} while (!top_result.valid);
223224

224225
sys_free = MAX_UTILIZATION - top_result.sys_current;
@@ -259,9 +260,9 @@ static void *utilization_watcher(void *arg UNUSED) {
259260
if (i % CHANGE_LIMIT_INTERVAL == 0) {
260261
if (avg_sys_free * 2 / CHANGE_LIMIT_INTERVAL > USAGE_THRESHOLD) {
261262
up_limit = up_limit + g_vcuda_config.utilization / 10 >
262-
g_vcuda_config.limit
263-
? g_vcuda_config.limit
264-
: up_limit + g_vcuda_config.utilization / 10;
263+
g_vcuda_config.limit
264+
? g_vcuda_config.limit
265+
: up_limit + g_vcuda_config.utilization / 10;
265266
}
266267
i = 0;
267268
}
@@ -295,10 +296,11 @@ static void get_used_gpu_utilization(int fd, void *arg) {
295296
unsigned int running_processes = MAX_PIDS;
296297
nvmlProcessInfo_t pids_on_device[MAX_PIDS];
297298
nvmlDevice_t dev;
298-
utilization_t *top_result = (utilization_t *) arg;
299+
utilization_t *top_result = (utilization_t *)arg;
299300
nvmlReturn_t ret;
300301
struct timeval cur;
301302
size_t microsec;
303+
int codec_util = 0;
302304

303305
int i;
304306

@@ -337,11 +339,20 @@ static void get_used_gpu_utilization(int fd, void *arg) {
337339
for (i = 0; i < processes_num; i++) {
338340
if (processes_sample[i].timeStamp >= top_result->checktime) {
339341
top_result->valid = 1;
340-
top_result->sys_current += processes_sample[i].smUtil;
342+
top_result->sys_current += GET_VALID_VALUE(processes_sample[i].smUtil);
343+
344+
codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) +
345+
GET_VALID_VALUE(processes_sample[i].decUtil);
346+
top_result->sys_current += CODEC_NORMALIZE(codec_util);
347+
341348
LOGGER(8, "try to find %d from pid tables", processes_sample[i].pid);
342349
if (likely(bsearch(&processes_sample[i].pid, g_pids_table,
343-
(size_t) g_pids_table_size, sizeof(int), int_match))) {
344-
top_result->user_current += processes_sample[i].smUtil;
350+
(size_t)g_pids_table_size, sizeof(int), int_match))) {
351+
top_result->user_current += GET_VALID_VALUE(processes_sample[i].smUtil);
352+
353+
codec_util = GET_VALID_VALUE(processes_sample[i].encUtil) +
354+
GET_VALID_VALUE(processes_sample[i].decUtil);
355+
top_result->user_current += CODEC_NORMALIZE(codec_util);
345356
}
346357
}
347358
}
@@ -359,7 +370,7 @@ static void load_pids_table(int fd, void *arg UNUSED) {
359370
int i = 0;
360371

361372
for (item = 0; item < MAX_PIDS; item++) {
362-
rsize = (int) read(fd, g_pids_table + item, sizeof(int));
373+
rsize = (int)read(fd, g_pids_table + item, sizeof(int));
363374
if (unlikely(rsize != sizeof(int))) {
364375
break;
365376
}
@@ -415,7 +426,7 @@ static void get_used_gpu_memory(int fd, void *arg) {
415426
}
416427

417428
for (i = 0; i < size_on_device; i++) {
418-
if (bsearch(&pids_on_device[i].pid, g_pids_table, (size_t) g_pids_table_size,
429+
if (bsearch(&pids_on_device[i].pid, g_pids_table, (size_t)g_pids_table_size,
419430
sizeof(int), int_match)) {
420431
LOGGER(4, "%d use memory: %lld", pids_on_device[i].pid,
421432
pids_on_device[i].usedGpuMemory);
@@ -441,14 +452,14 @@ static void register_to_remote() {
441452
&nvml_dev);
442453
if (unlikely(ret)) {
443454
LOGGER(FATAL, "can't find device 0, error %s",
444-
nvml_error((nvmlReturn_t) ret));
455+
nvml_error((nvmlReturn_t)ret));
445456
}
446457

447458
ret = NVML_ENTRY_CALL(nvml_library_entry, nvmlDeviceGetPciInfo, nvml_dev,
448459
&pci_info);
449460
if (unlikely(ret)) {
450461
LOGGER(FATAL, "can't find device 0, error %s",
451-
nvml_error((nvmlReturn_t) ret));
462+
nvml_error((nvmlReturn_t)ret));
452463
}
453464

454465
strncpy(g_vcuda_config.bus_id, pci_info.busId,
@@ -467,22 +478,22 @@ static void initialization() {
467478
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuInit, 0);
468479
if (unlikely(ret)) {
469480
LOGGER(FATAL, "cuInit error %s",
470-
cuda_error((CUresult) ret, &cuda_err_string));
481+
cuda_error((CUresult)ret, &cuda_err_string));
471482
}
472483

473484
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute, &g_sm_num,
474485
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 0);
475486
if (unlikely(ret)) {
476487
LOGGER(FATAL, "can't get processor number, error %s",
477-
cuda_error((CUresult) ret, &cuda_err_string));
488+
cuda_error((CUresult)ret, &cuda_err_string));
478489
}
479490

480491
ret = CUDA_ENTRY_CALL(cuda_library_entry, cuDeviceGetAttribute,
481492
&g_max_thread_per_sm,
482493
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, 0);
483494
if (unlikely(ret)) {
484495
LOGGER(FATAL, "can't get max thread per processor, error %s",
485-
cuda_error((CUresult) ret, &cuda_err_string));
496+
cuda_error((CUresult)ret, &cuda_err_string));
486497
}
487498

488499
g_total_cuda_cores = g_max_thread_per_sm * g_sm_num * FACTOR;
@@ -535,7 +546,7 @@ CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize,
535546
CUresult ret;
536547

537548
if (g_vcuda_config.enable) {
538-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
549+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
539550

540551
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
541552
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -555,7 +566,7 @@ CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize) {
555566
CUresult ret;
556567

557568
if (g_vcuda_config.enable) {
558-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
569+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
559570

560571
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
561572
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -574,7 +585,7 @@ CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) {
574585
CUresult ret;
575586

576587
if (g_vcuda_config.enable) {
577-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
588+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
578589

579590
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
580591
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -595,7 +606,7 @@ CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch,
595606
CUresult ret;
596607

597608
if (g_vcuda_config.enable) {
598-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
609+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
599610

600611
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
601612
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -616,7 +627,7 @@ CUresult cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes,
616627
CUresult ret;
617628

618629
if (g_vcuda_config.enable) {
619-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
630+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
620631

621632
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
622633
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -655,7 +666,8 @@ static size_t get_array_base_size(int format) {
655666
return base_size;
656667
}
657668

658-
static CUresult cuArrayCreate_helper(const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) {
669+
static CUresult cuArrayCreate_helper(
670+
const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) {
659671
size_t used = 0;
660672
size_t base_size = 0;
661673
size_t request_size = 0;
@@ -666,7 +678,7 @@ static CUresult cuArrayCreate_helper(const CUDA_ARRAY_DESCRIPTOR *pAllocateArray
666678
request_size = base_size * pAllocateArray->NumChannels *
667679
pAllocateArray->Height * pAllocateArray->Width;
668680

669-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
681+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
670682

671683
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
672684
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -708,7 +720,8 @@ CUresult cuArrayCreate(CUarray *pHandle,
708720
return ret;
709721
}
710722

711-
static CUresult cuArray3DCreate_helper(const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) {
723+
static CUresult cuArray3DCreate_helper(
724+
const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) {
712725
size_t used = 0;
713726
size_t base_size = 0;
714727
size_t request_size = 0;
@@ -719,7 +732,7 @@ static CUresult cuArray3DCreate_helper(const CUDA_ARRAY3D_DESCRIPTOR *pAllocateA
719732
request_size = base_size * pAllocateArray->NumChannels *
720733
pAllocateArray->Height * pAllocateArray->Width;
721734

722-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
735+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
723736

724737
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
725738
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -775,7 +788,7 @@ CUresult cuMipmappedArrayCreate(
775788
pMipmappedArrayDesc->Height * pMipmappedArrayDesc->Width *
776789
pMipmappedArrayDesc->Depth;
777790

778-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
791+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
779792

780793
if (unlikely(used + request_size > g_vcuda_config.gpu_memory)) {
781794
ret = CUDA_ERROR_OUT_OF_MEMORY;
@@ -813,7 +826,7 @@ CUresult cuMemGetInfo_v2(size_t *free, size_t *total) {
813826
size_t used = 0;
814827

815828
if (g_vcuda_config.enable) {
816-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
829+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
817830

818831
*total = g_vcuda_config.gpu_memory;
819832
*free =
@@ -829,7 +842,7 @@ CUresult cuMemGetInfo(size_t *free, size_t *total) {
829842
size_t used = 0;
830843

831844
if (g_vcuda_config.enable) {
832-
atomic_action(pid_path, get_used_gpu_memory, (void *) &used);
845+
atomic_action(pid_path, get_used_gpu_memory, (void *)&used);
833846

834847
*total = g_vcuda_config.gpu_memory;
835848
*free =

0 commit comments

Comments
 (0)