@@ -319,7 +319,7 @@ const WORKER_MONITOR_TASKS = Dict{Int,Task}()
319
319
const WORKER_MONITOR_CHANS = Dict {Int,Dict{UInt64,RemoteChannel}} ()
320
320
function init_proc (state, p, log_sink)
321
321
ctx = Context (Int[]; log_sink)
322
- timespan_start (ctx, :init_proc , p. pid, 0 )
322
+ timespan_start (ctx, :init_proc , (;worker = p. pid), nothing )
323
323
# Initialize pressure and capacity
324
324
gproc = OSProc (p. pid)
325
325
lock (state. lock) do
@@ -387,7 +387,7 @@ function init_proc(state, p, log_sink)
387
387
# Setup dynamic listener
388
388
dynamic_listener! (ctx, state, p. pid)
389
389
390
- timespan_finish (ctx, :init_proc , p. pid, 0 )
390
+ timespan_finish (ctx, :init_proc , (;worker = p. pid), nothing )
391
391
end
392
392
function _cleanup_proc (uid, log_sink)
393
393
empty! (CHUNK_CACHE) # FIXME : Should be keyed on uid!
@@ -403,14 +403,14 @@ end
403
403
function cleanup_proc (state, p, log_sink)
404
404
ctx = Context (Int[]; log_sink)
405
405
wid = p. pid
406
- timespan_start (ctx, :cleanup_proc , wid, 0 )
406
+ timespan_start (ctx, :cleanup_proc , (;worker = wid), nothing )
407
407
lock (WORKER_MONITOR_LOCK) do
408
408
if haskey (WORKER_MONITOR_CHANS, wid)
409
409
delete! (WORKER_MONITOR_CHANS[wid], state. uid)
410
410
end
411
411
end
412
412
remote_do (_cleanup_proc, wid, state. uid, log_sink)
413
- timespan_finish (ctx, :cleanup_proc , wid, 0 )
413
+ timespan_finish (ctx, :cleanup_proc , (;worker = wid), nothing )
414
414
end
415
415
416
416
" Process-local condition variable (and lock) indicating task completion."
@@ -458,24 +458,24 @@ function compute_dag(ctx, d::Thunk; options=SchedulerOptions())
458
458
459
459
master = OSProc (myid ())
460
460
461
- timespan_start (ctx, :scheduler_init , 0 , master)
461
+ timespan_start (ctx, :scheduler_init , nothing , master)
462
462
try
463
463
scheduler_init (ctx, state, d, options, deps)
464
464
finally
465
- timespan_finish (ctx, :scheduler_init , 0 , master)
465
+ timespan_finish (ctx, :scheduler_init , nothing , master)
466
466
end
467
467
468
468
value, errored = try
469
469
scheduler_run (ctx, state, d, options)
470
470
finally
471
471
# Always try to tear down the scheduler
472
- timespan_start (ctx, :scheduler_exit , 0 , master)
472
+ timespan_start (ctx, :scheduler_exit , nothing , master)
473
473
try
474
474
scheduler_exit (ctx, state, options)
475
475
catch err
476
476
@error " Error when tearing down scheduler" exception= (err,catch_backtrace ())
477
477
finally
478
- timespan_finish (ctx, :scheduler_exit , 0 , master)
478
+ timespan_finish (ctx, :scheduler_exit , nothing , master)
479
479
end
480
480
end
481
481
@@ -531,10 +531,10 @@ function scheduler_run(ctx, state::ComputeState, d::Thunk, options)
531
531
check_integrity (ctx)
532
532
533
533
isempty (state. running) && continue
534
- timespan_start (ctx, :take , 0 , 0 )
534
+ timespan_start (ctx, :take , nothing , nothing )
535
535
@dagdebug nothing :take " Waiting for results"
536
536
chan_value = take! (state. chan) # get result of completed thunk
537
- timespan_finish (ctx, :take , 0 , 0 )
537
+ timespan_finish (ctx, :take , nothing , nothing )
538
538
if chan_value isa RescheduleSignal
539
539
continue
540
540
end
@@ -549,13 +549,13 @@ function scheduler_run(ctx, state::ComputeState, d::Thunk, options)
549
549
@warn " Worker $(pid) died, rescheduling work"
550
550
551
551
# Remove dead worker from procs list
552
- timespan_start (ctx, :remove_procs , 0 , 0 )
552
+ timespan_start (ctx, :remove_procs , (;worker = pid), nothing )
553
553
remove_dead_proc! (ctx, state, gproc)
554
- timespan_finish (ctx, :remove_procs , 0 , 0 )
554
+ timespan_finish (ctx, :remove_procs , (;worker = pid), nothing )
555
555
556
- timespan_start (ctx, :handle_fault , 0 , 0 )
556
+ timespan_start (ctx, :handle_fault , (;worker = pid), nothing )
557
557
handle_fault (ctx, state, gproc)
558
- timespan_finish (ctx, :handle_fault , 0 , 0 )
558
+ timespan_finish (ctx, :handle_fault , (;worker = pid), nothing )
559
559
return # effectively `continue`
560
560
else
561
561
if something (ctx. options. allow_errors, false ) ||
@@ -590,9 +590,9 @@ function scheduler_run(ctx, state::ComputeState, d::Thunk, options)
590
590
end
591
591
end
592
592
593
- timespan_start (ctx, :finish , thunk_id, (;thunk_id))
593
+ timespan_start (ctx, :finish , (; thunk_id) , (;thunk_id))
594
594
finish_task! (ctx, state, node, thunk_failed)
595
- timespan_finish (ctx, :finish , thunk_id, (;thunk_id))
595
+ timespan_finish (ctx, :finish , (; thunk_id) , (;thunk_id))
596
596
597
597
delete_unused_tasks! (state)
598
598
end
@@ -675,13 +675,13 @@ function schedule!(ctx, state, procs=procs_to_use(ctx))
675
675
task = nothing
676
676
@label pop_task
677
677
if task != = nothing
678
- timespan_finish (ctx, :schedule , task. id, (;thunk_id= task. id))
678
+ timespan_finish (ctx, :schedule , (;thunk_id = task. id) , (;thunk_id= task. id))
679
679
end
680
680
if isempty (state. ready)
681
681
@goto fire_tasks
682
682
end
683
683
task = pop! (state. ready)
684
- timespan_start (ctx, :schedule , task. id, (;thunk_id= task. id))
684
+ timespan_start (ctx, :schedule , (;thunk_id = task. id) , (;thunk_id= task. id))
685
685
if haskey (state. cache, task)
686
686
if haskey (state. errored, task)
687
687
# An error was eagerly propagated to this task
@@ -869,7 +869,7 @@ function monitor_procs_changed!(ctx, state)
869
869
wait (ctx. proc_notify)
870
870
end
871
871
872
- timespan_start (ctx, :assign_procs , 0 , 0 )
872
+ timespan_start (ctx, :assign_procs , nothing , nothing )
873
873
874
874
# Load new set of procs
875
875
new_ps = procs_to_use (ctx)
@@ -897,7 +897,7 @@ function monitor_procs_changed!(ctx, state)
897
897
end
898
898
end
899
899
900
- timespan_finish (ctx, :assign_procs , 0 , 0 )
900
+ timespan_finish (ctx, :assign_procs , nothing , nothing )
901
901
old_ps = new_ps
902
902
end
903
903
end
@@ -982,9 +982,9 @@ function evict_chunks!(log_sink, chunks::Set{Chunk})
982
982
ctx = Context ([myid ()];log_sink)
983
983
for chunk in chunks
984
984
lock (TASK_SYNC) do
985
- timespan_start (ctx, :evict , myid (), (;data= chunk))
985
+ timespan_start (ctx, :evict , (;worker = myid () ), (;data= chunk))
986
986
haskey (CHUNK_CACHE, chunk) && delete! (CHUNK_CACHE, chunk)
987
- timespan_finish (ctx, :evict , myid (), (;data= chunk))
987
+ timespan_finish (ctx, :evict , (;worker = myid () ), (;data= chunk))
988
988
end
989
989
end
990
990
nothing
@@ -1061,15 +1061,15 @@ function fire_tasks!(ctx, thunks::Vector{<:Tuple}, (gproc, proc), state)
1061
1061
for ts in to_send
1062
1062
# TODO : errormonitor
1063
1063
@async begin
1064
- timespan_start (ctx, :fire , gproc. pid, 0 )
1064
+ timespan_start (ctx, :fire , (;worker = gproc. pid), nothing )
1065
1065
try
1066
1066
remotecall_wait (do_tasks, gproc. pid, proc, state. chan, [ts])
1067
1067
catch err
1068
1068
bt = catch_backtrace ()
1069
1069
thunk_id = ts[1 ]
1070
1070
put! (state. chan, (gproc. pid, proc, thunk_id, (CapturedException (err, bt), nothing )))
1071
1071
finally
1072
- timespan_finish (ctx, :fire , gproc. pid, 0 )
1072
+ timespan_finish (ctx, :fire , (;worker = gproc. pid), nothing )
1073
1073
end
1074
1074
end
1075
1075
end
@@ -1189,25 +1189,26 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
1189
1189
proc_occupancy = istate. proc_occupancy
1190
1190
time_pressure = istate. time_pressure
1191
1191
1192
+ wid = get_parent (to_proc). pid
1192
1193
work_to_do = false
1193
1194
while isopen (return_queue)
1194
1195
# Wait for new tasks
1195
1196
if ! work_to_do
1196
1197
@dagdebug nothing :processor " Waiting for tasks"
1197
- timespan_start (ctx, :proc_run_wait , to_proc, nothing )
1198
+ timespan_start (ctx, :proc_run_wait , (;worker = wid, processor = to_proc) , nothing )
1198
1199
wait (istate. reschedule)
1199
1200
@static if VERSION >= v " 1.9"
1200
1201
reset (istate. reschedule)
1201
1202
end
1202
- timespan_finish (ctx, :proc_run_wait , to_proc, nothing )
1203
+ timespan_finish (ctx, :proc_run_wait , (;worker = wid, processor = to_proc) , nothing )
1203
1204
if istate. done[]
1204
1205
return
1205
1206
end
1206
1207
end
1207
1208
1208
1209
# Fetch a new task to execute
1209
1210
@dagdebug nothing :processor " Trying to dequeue"
1210
- timespan_start (ctx, :proc_run_fetch , to_proc, nothing )
1211
+ timespan_start (ctx, :proc_run_fetch , (;worker = wid, processor = to_proc) , nothing )
1211
1212
work_to_do = false
1212
1213
task_and_occupancy = lock (istate. queue) do queue
1213
1214
# Only steal if there are multiple queued tasks, to prevent
@@ -1226,7 +1227,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
1226
1227
return queue_result
1227
1228
end
1228
1229
if task_and_occupancy === nothing
1229
- timespan_finish (ctx, :proc_run_fetch , to_proc, nothing )
1230
+ timespan_finish (ctx, :proc_run_fetch , (;worker = wid, processor = to_proc) , nothing )
1230
1231
1231
1232
@dagdebug nothing :processor " Failed to dequeue"
1232
1233
@@ -1241,7 +1242,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
1241
1242
@dagdebug nothing :processor " Trying to steal"
1242
1243
1243
1244
# Try to steal a task
1244
- timespan_start (ctx, :steal_local , to_proc, nothing )
1245
+ timespan_start (ctx, :steal_local , (;worker = wid, processor = to_proc) , nothing )
1245
1246
1246
1247
# Try to steal from local queues randomly
1247
1248
# TODO : Prioritize stealing from busiest processors
@@ -1276,12 +1277,12 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
1276
1277
from_proc = other_istate. proc
1277
1278
thunk_id = task[1 ]
1278
1279
@dagdebug thunk_id :processor " Stolen from $from_proc by $to_proc "
1279
- timespan_finish (ctx, :steal_local , to_proc, (;from_proc, thunk_id))
1280
+ timespan_finish (ctx, :steal_local , (;worker = wid, processor = to_proc) , (;from_proc, thunk_id))
1280
1281
# TODO : Keep stealing until we hit full occupancy?
1281
1282
@goto execute
1282
1283
end
1283
1284
end
1284
- timespan_finish (ctx, :steal_local , to_proc, nothing )
1285
+ timespan_finish (ctx, :steal_local , (;worker = wid, processor = to_proc) , nothing )
1285
1286
1286
1287
# TODO : Try to steal from remote queues
1287
1288
@@ -1293,7 +1294,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
1293
1294
task = task_spec[]
1294
1295
thunk_id = task[1 ]
1295
1296
time_util = task[2 ]
1296
- timespan_finish (ctx, :proc_run_fetch , to_proc, (;thunk_id, proc_occupancy= proc_occupancy[], task_occupancy))
1297
+ timespan_finish (ctx, :proc_run_fetch , (;worker = wid, processor = to_proc) , (;thunk_id, proc_occupancy= proc_occupancy[], task_occupancy))
1297
1298
@dagdebug thunk_id :processor " Dequeued task"
1298
1299
1299
1300
# Execute the task and return its result
@@ -1378,7 +1379,7 @@ function do_tasks(to_proc, return_queue, tasks)
1378
1379
for task in tasks
1379
1380
thunk_id = task[1 ]
1380
1381
occupancy = task[4 ]
1381
- timespan_start (ctx, :enqueue , (;to_proc, thunk_id), nothing )
1382
+ timespan_start (ctx, :enqueue , (;processor = to_proc, thunk_id), nothing )
1382
1383
should_launch = lock (TASK_SYNC) do
1383
1384
# Already running; don't try to re-launch
1384
1385
if ! (thunk_id in TASKS_RUNNING)
@@ -1390,7 +1391,7 @@ function do_tasks(to_proc, return_queue, tasks)
1390
1391
end
1391
1392
should_launch || continue
1392
1393
enqueue! (queue, TaskSpecKey (task), occupancy)
1393
- timespan_finish (ctx, :enqueue , (;to_proc, thunk_id), nothing )
1394
+ timespan_finish (ctx, :enqueue , (;processor = to_proc, thunk_id), nothing )
1394
1395
@dagdebug thunk_id :processor " Enqueued task"
1395
1396
end
1396
1397
end
@@ -1435,7 +1436,7 @@ function do_task(to_proc, task_desc)
1435
1436
to_storage_name = nameof (typeof (to_storage))
1436
1437
storage_cap = storage_capacity (to_storage)
1437
1438
1438
- timespan_start (ctx, :storage_wait , thunk_id, (;f, to_proc , device= typeof (to_storage)))
1439
+ timespan_start (ctx, :storage_wait , (; thunk_id, processor = to_proc), (;f, device= typeof (to_storage)))
1439
1440
real_time_util = Ref {UInt64} (0 )
1440
1441
real_alloc_util = UInt64 (0 )
1441
1442
if ! meta
@@ -1493,7 +1494,7 @@ function do_task(to_proc, task_desc)
1493
1494
break
1494
1495
end
1495
1496
end
1496
- timespan_finish (ctx, :storage_wait , thunk_id, (;f, to_proc , device= typeof (to_storage)))
1497
+ timespan_finish (ctx, :storage_wait , (; thunk_id, processor = to_proc), (;f, device= typeof (to_storage)))
1497
1498
1498
1499
@dagdebug thunk_id :execute " Moving data"
1499
1500
@@ -1507,7 +1508,7 @@ function do_task(to_proc, task_desc)
1507
1508
end
1508
1509
fetch_tasks = map (Iterators. zip (_data,_ids)) do (x, id)
1509
1510
@async begin
1510
- timespan_start (ctx, :move , (;thunk_id, id), (;f, id , data= x))
1511
+ timespan_start (ctx, :move , (;thunk_id, id, processor = to_proc ), (;f, data= x))
1511
1512
#= FIXME : This isn't valid if x is written to
1512
1513
x = if x isa Chunk
1513
1514
value = lock(TASK_SYNC) do
@@ -1553,7 +1554,7 @@ function do_task(to_proc, task_desc)
1553
1554
x = @invokelatest move (to_proc, x)
1554
1555
# end
1555
1556
@dagdebug thunk_id :move " Moved argument $id to $to_proc : $x "
1556
- timespan_finish (ctx, :move , (;thunk_id, id), (;f, id , data= x); tasks= [Base. current_task ()])
1557
+ timespan_finish (ctx, :move , (;thunk_id, id, processor = to_proc ), (;f, data= x); tasks= [Base. current_task ()])
1557
1558
return x
1558
1559
end
1559
1560
end
@@ -1587,7 +1588,7 @@ function do_task(to_proc, task_desc)
1587
1588
=#
1588
1589
1589
1590
real_time_util[] += est_time_util
1590
- timespan_start (ctx, :compute , thunk_id, (;f, to_proc ))
1591
+ timespan_start (ctx, :compute , (; thunk_id, processor = to_proc), (;f))
1591
1592
res = nothing
1592
1593
1593
1594
# Start counting time and GC allocations
@@ -1614,13 +1615,13 @@ function do_task(to_proc, task_desc)
1614
1615
# Check if result is safe to store
1615
1616
device = nothing
1616
1617
if ! (res isa Chunk)
1617
- timespan_start (ctx, :storage_safe_scan , thunk_id, (;T= typeof (res)))
1618
+ timespan_start (ctx, :storage_safe_scan , (; thunk_id, processor = to_proc) , (;T= typeof (res)))
1618
1619
device = if walk_storage_safe (res)
1619
1620
to_storage
1620
1621
else
1621
1622
MemPool. CPURAMDevice ()
1622
1623
end
1623
- timespan_finish (ctx, :storage_safe_scan , thunk_id, (;T= typeof (res)))
1624
+ timespan_finish (ctx, :storage_safe_scan , (; thunk_id, processor = to_proc) , (;T= typeof (res)))
1624
1625
end
1625
1626
1626
1627
# Construct result
@@ -1637,7 +1638,7 @@ function do_task(to_proc, task_desc)
1637
1638
threadtime = cputhreadtime () - threadtime_start
1638
1639
# FIXME : This is not a realistic measure of max. required memory
1639
1640
# gc_allocd = min(max(UInt64(Base.gc_num().allocd) - UInt64(gcnum_start.allocd), UInt64(0)), UInt64(1024^4))
1640
- timespan_finish (ctx, :compute , thunk_id, (;f, to_proc ))
1641
+ timespan_finish (ctx, :compute , (; thunk_id, processor = to_proc), (;f))
1641
1642
lock (TASK_SYNC) do
1642
1643
real_time_util[] -= est_time_util
1643
1644
pop! (TASKS_RUNNING, thunk_id)
0 commit comments