Skip to content

Commit 49254c2

Browse files
committed
add slicing
1 parent 19e4679 commit 49254c2

23 files changed

+212
-272
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# rlskedge

cluster.jl

+18-27
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using .job
1010

1111
export ClusterEnv, QUEUE_SIZE, ZONES
1212

13-
const QUEUE_SIZE, ZONES = 128, 32
13+
const QUEUE_SIZE, ZONES, SLICE_SIZE = 128, 32, 50_000
1414

1515
struct Metrics
1616
avg_bounded_slowdown::Float32
@@ -23,6 +23,7 @@ mutable struct ClusterEnv <: AbstractEnv
2323
workload::Workload
2424
time::Int
2525
next_job_index::Int
26+
last_job_index::Int
2627
reward::Float32
2728
done::Bool
2829
cluster::Vector{Job}
@@ -33,15 +34,19 @@ mutable struct ClusterEnv <: AbstractEnv
3334
metrics::Union{Nothing, Metrics}
3435
end
3536

37+
choose_index(wl::Workload) = rand(1:length(wl.jobs) - SLICE_SIZE)
38+
3639
function RLBase.reset!(env::ClusterEnv)
37-
#workload = Workload(rand(WORKLOADS))
38-
#env.workload = workload
40+
workload = Workload(rand(WORKLOADS))
41+
env.workload = workload
42+
index = choose_index(workload)
3943
env.time = env.workload.jobs[1].submit_time
40-
env.next_job_index = 2
44+
env.next_job_index = index + 1
45+
env.last_job_index = index + SLICE_SIZE
4146
env.reward = 0
4247
env.done = false
4348
env.cluster = []
44-
env.queue = [env.workload.jobs[1]]
49+
env.queue = [env.workload.jobs[index]]
4550
env.are_pending_jobs = true
4651
env.available_cores = env.workload.cores
4752
env.utilization = []
@@ -50,34 +55,20 @@ end
5055

5156
function ClusterEnv()
5257
workload = Workload(rand(WORKLOADS))
58+
index = choose_index(workload)
5359
time = workload.jobs[1].submit_time
54-
next_job_index = 2
55-
reward = 0
56-
done = false
57-
cluster = []
58-
queue = [workload.jobs[1]]
59-
are_pending_jobs = true
60-
available_cores = workload.cores
61-
utilization = []
62-
metrics = nothing
63-
64-
ClusterEnv(workload, time, next_job_index, reward, done, cluster, queue, are_pending_jobs, available_cores, utilization, metrics)
65-
end
66-
67-
function ClusterEnv(index)
68-
workload = Workload(WORKLOADS[index])
69-
time = workload.jobs[1].submit_time
70-
next_job_index = 2
60+
next_job_index = index + 1
61+
last_job_index = index + SLICE_SIZE
7162
reward = 0
7263
done = false
7364
cluster = []
74-
queue = [workload.jobs[1]]
65+
queue = [workload.jobs[index]]
7566
are_pending_jobs = true
7667
available_cores = workload.cores
7768
utilization = []
7869
metrics = nothing
7970

80-
ClusterEnv(workload, time, next_job_index, reward, done, cluster, queue, are_pending_jobs, available_cores, utilization, metrics)
71+
ClusterEnv(workload, time, next_job_index, last_job_index, reward, done, cluster, queue, are_pending_jobs, available_cores, utilization, metrics)
8172
end
8273

8374
RLBase.action_space(env::ClusterEnv) = Base.OneTo(QUEUE_SIZE)
@@ -214,7 +205,7 @@ function (env::ClusterEnv)(action)
214205
end
215206

216207
# check for termination
217-
if env.next_job_index > length(env.workload.jobs) # no more pending jobs!
208+
if env.next_job_index > env.last_job_index # no more pending jobs!
218209
env.are_pending_jobs = false
219210
end
220211
# we've finished!
@@ -233,8 +224,8 @@ function (env::ClusterEnv)(action)
233224
bslds = [max((j.simulated_wait_time + j.simulated_run_time) / max(j.simulated_run_time, 10), 1) for j in env.workload.jobs]
234225
sum_bslds = +(bslds...)
235226
avg_bsld = sum_bslds / length(env.workload.jobs)
236-
# currently: negative average bounded slowdown
237-
env.reward = -avg_bsld
227+
# currently: negative average bounded slowdown relative to SJF
228+
env.reward = env.workload.sjf_bsld - avg_bsld
238229
env.done = true
239230
env.metrics = Metrics(avg_bsld, avg_wait_time, max_wait_time, avg_utilization)
240231
break

data/ANL-Intrepid-2009-1.swf

+8-8
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
; fcfs_average_wait: 15236.789f0
1818
; fcfs_max_wait: 260871
1919
; fcfs_utilization: 0.403677f0
20-
; rand_bounded_slowdown: ?
21-
; rand_average_wait: ?
22-
; rand_max_wait: ?
23-
; rand_utilization: ?
24-
; sjf_bounded_slowdown: ?
25-
; sjf_average_wait: ?
26-
; sjf_max_wait: ?
27-
; sjf_utilization: ?
20+
; rand_bounded_slowdown: 33.204773f0
21+
; rand_average_wait: 29764.49f0
22+
; rand_max_wait: 880932
23+
; rand_utilization: 0.4077924f0
24+
; sjf_bounded_slowdown: 6.728023f0
25+
; sjf_average_wait: 6257.9917f0
26+
; sjf_max_wait: 812701
27+
; sjf_utilization: 0.40214235f0
2828

2929
; Note: Scheduler is Cobalt (http://trac.mcs.anl.gov/projects/cobalt/)
3030
1 0 6680 7560 2048 -1 -1 2048 10800 -1 -1 1 -1 -1 1 -1 -1 -1

data/CEA-Curie-2011-2.1-cln.swf

+8-8
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,14 @@
7777
; fcfs_average_wait: 23738.648f0
7878
; fcfs_max_wait: 181225
7979
; fcfs_utilization: 0.37693802f0
80-
; rand_bounded_slowdown: ?
81-
; rand_average_wait: ?
82-
; rand_max_wait: ?
83-
; rand_utilization: ?
84-
; sjf_bounded_slowdown: ?
85-
; sjf_average_wait: ?
86-
; sjf_max_wait: ?
87-
; sjf_utilization: ?
80+
; rand_bounded_slowdown: 951.0125f0
81+
; rand_average_wait: 34300.434f0
82+
; rand_max_wait: 448601
83+
; rand_utilization: 0.37612095f0
84+
; sjf_bounded_slowdown: 340.9844f0
85+
; sjf_average_wait: 14785.063f0
86+
; sjf_max_wait: 1055787
87+
; sjf_utilization: 0.37800074f0
8888

8989
272393 31656837 33492 86395 256 -1 -1 256 86400 -1 1 87 95 -1 -1 4 -1 -1
9090
272394 31664481 249861 1189 2048 -1 -1 2048 72000 -1 0 21 89 -1 -1 4 -1 -1

data/CTC-SP2-1996-3.1-cln.swf

+12-12
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,18 @@
2323
; Queue: 3 piofs
2424
; Queue: 4 informix
2525

26-
; fcfs_bounded_slowdown: 679.31323f0
27-
; fcfs_average_wait: 23738.648f0
28-
; fcfs_max_wait: 181225
29-
; fcfs_utilization: 0.37693802f0
30-
; rand_bounded_slowdown: ?
31-
; rand_average_wait: ?
32-
; rand_max_wait: ?
33-
; rand_utilization: ?
34-
; sjf_bounded_slowdown: ?
35-
; sjf_average_wait: ?
36-
; sjf_max_wait: ?
37-
; sjf_utilization: ?
26+
; fcfs_bounded_slowdown: 7639.569f0
27+
; fcfs_average_wait: 1.3953139f6
28+
; fcfs_max_wait: 4140831
29+
; fcfs_utilization: 0.26490992f0
30+
; rand_bounded_slowdown: 2746.2961f0
31+
; rand_average_wait: 870947.9f0
32+
; rand_max_wait: 4445812
33+
; rand_utilization: 0.25893745f0
34+
; sjf_bounded_slowdown: 618.7664f0
35+
; sjf_average_wait: 237426.42f0
36+
; sjf_max_wait: 29790142
37+
; sjf_utilization: 0.24841218f0
3838

3939
;
4040
; Note: THIS IS A CLEANED VERSION OF THE LOG!

data/HPC2N-2002-2.2-cln.swf

+12-12
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@
2121
; Partition: 1 DEFAULT
2222
; Partition: 2 ALL
2323

24-
; fcfs_bounded_slowdown: 679.31323f0
25-
; fcfs_average_wait: 23738.648f0
26-
; fcfs_max_wait: 181225
27-
; fcfs_utilization: 0.37693802f0
28-
; rand_bounded_slowdown: ?
29-
; rand_average_wait: ?
30-
; rand_max_wait: ?
31-
; rand_utilization: ?
32-
; sjf_bounded_slowdown: ?
33-
; sjf_average_wait: ?
34-
; sjf_max_wait: ?
35-
; sjf_utilization: ?
24+
; fcfs_bounded_slowdown: 161.85031f0
25+
; fcfs_average_wait: 17225.17f0
26+
; fcfs_max_wait: 1164283
27+
; fcfs_utilization: 0.39946938f0
28+
; rand_bounded_slowdown: 129.56932f0
29+
; rand_average_wait: 15103.185f0
30+
; rand_max_wait: 1625164
31+
; rand_utilization: 0.39942858f0
32+
; sjf_bounded_slowdown: 127.50789f0
33+
; sjf_average_wait: 12791.929f0
34+
; sjf_max_wait: 1631664
35+
; sjf_utilization: 0.39929992f0
3636

3737
;
3838
;

0 commit comments

Comments
 (0)