Skip to content

Commit b6a843d

Browse files
committed
[Profile] add builtin Allocs.print and formatting equivalents to Profile.print
Defines a converter for Allocs to the types implemented by Profile for printing, allowing flamegraph report generation of allocations with just the stdlib tooling. Make these methods public as well, so that users can access the documentation without getting warnings. Refs: #42768
1 parent 5db9dbd commit b6a843d

File tree

5 files changed

+257
-27
lines changed

5 files changed

+257
-27
lines changed

stdlib/Profile/docs/src/index.md

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
106106

107107
```@docs
108108
Profile.Allocs.clear
109+
Profile.Allocs.print
109110
Profile.Allocs.fetch
110111
Profile.Allocs.start
111112
Profile.Allocs.stop

stdlib/Profile/src/Allocs.jl

+205-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
module Allocs
22

3+
global print # Allocs.print is separate from both Base.print and Profile.print
4+
public @profile,
5+
clear,
6+
print,
7+
fetch
8+
9+
using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
310
using Base.StackTraces: StackTrace, StackFrame, lookup
411
using Base: InterpreterIP
512

@@ -138,7 +145,7 @@ end
138145
# Without this, the Alloc's stacktrace prints for lines and lines and lines...
139146
function Base.show(io::IO, a::Alloc)
140147
stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
141-
print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
148+
Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
142149
end
143150

144151
const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +223,201 @@ function stacktrace_memoized(
216223
return stack
217224
end
218225

226+
function warning_empty()
227+
@warn """
228+
There were no samples collected.
229+
Run your program longer (perhaps by running it multiple times),
230+
or adjust the frequency of samples to record every event with
231+
the `sample_rate=1.0` kwarg."""
232+
end
233+
234+
235+
"""
236+
Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
237+
238+
Prints profiling results to `io` (by default, `stdout`). If you do not
239+
supply a `data` vector, the internal buffer of accumulated backtraces
240+
will be used.
241+
242+
See `Profile.print` for an explanation of the valid keyword arguments.
243+
"""
244+
print(; kwargs...) =
245+
Profile.print(stdout, fetch(); kwargs...)
246+
print(io::IO; kwargs...) =
247+
Profile.print(io, fetch(); kwargs...)
248+
print(io::IO, data::AllocResults; kwargs...) =
249+
Profile.print(io, data; kwargs...)
250+
Profile.print(data::AllocResults; kwargs...) =
251+
Profile.print(stdout, data; kwargs...)
252+
253+
function Profile.print(io::IO,
254+
data::AllocResults,
255+
;
256+
format = :tree,
257+
C = false,
258+
#combine = true,
259+
maxdepth::Int = typemax(Int),
260+
mincount::Int = 0,
261+
noisefloor = 0,
262+
sortedby::Symbol = :filefuncline,
263+
groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
264+
recur::Symbol = :off,
265+
)
266+
pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
267+
Profile.print(io, data, pf, format)
268+
return
269+
end
270+
271+
function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
272+
cols::Int = Base.displaysize(io)[2]
273+
fmt.recur (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
274+
data = data.allocs
275+
if format === :tree
276+
tree(io, data, cols, fmt)
277+
elseif format === :flat
278+
fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
279+
flat(io, data, cols, fmt)
280+
else
281+
throw(ArgumentError("output format $(repr(format)) not recognized"))
282+
end
283+
nothing
284+
end
285+
286+
287+
function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
288+
lilist = StackFrame[]
289+
n = Int[]
290+
m = Int[]
291+
lilist_idx = Dict{T, Int}()
292+
recursive = Set{T}()
293+
totalbytes = 0
294+
for r in data
295+
first = true
296+
empty!(recursive)
297+
nb = r.size # or 1 for counting
298+
totalbytes += nb
299+
for frame in r.stacktrace
300+
!C && frame.from_c && continue
301+
key = (T === UInt64 ? ip : frame)
302+
idx = get!(lilist_idx, key, length(lilist) + 1)
303+
if idx > length(lilist)
304+
push!(recursive, key)
305+
push!(lilist, frame)
306+
push!(n, nb)
307+
push!(m, 0)
308+
elseif !(key in recursive)
309+
push!(recursive, key)
310+
n[idx] += nb
311+
end
312+
if first
313+
m[idx] += nb
314+
first = false
315+
end
316+
end
317+
end
318+
@assert length(lilist) == length(n) == length(m) == length(lilist_idx)
319+
return (lilist, n, m, totalbytes)
320+
end
321+
322+
function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
323+
fmt.combine || error(ArgumentError("combine=false"))
324+
lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
325+
filenamemap = Dict{Symbol,String}()
326+
if isempty(lilist)
327+
warning_empty()
328+
return true
329+
end
330+
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
331+
Base.println(io, "Total snapshots: ", length(data))
332+
Base.println(io, "Total bytes: ", totalbytes)
333+
return false
334+
end
335+
336+
function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
337+
tops = Vector{StackFrameTree{T}}()
338+
build = Dict{T, StackFrameTree{T}}()
339+
for r in all
340+
first = true
341+
nb = r.size # or 1 for counting
342+
root.recur = 0
343+
root.count += nb
344+
parent = root
345+
for i in reverse(eachindex(r.stacktrace))
346+
frame = r.stacktrace[i]
347+
key = (T === UInt64 ? ip : frame)
348+
if (recur === :flat && !frame.from_c) || recur === :flatc
349+
# see if this frame already has a parent
350+
this = get!(build, frame, parent)
351+
if this !== parent
352+
# Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
353+
push!(tops, parent)
354+
parent = this
355+
end
356+
end
357+
!C && frame.from_c && continue
358+
this = get!(StackFrameTree{T}, parent.down, key)
359+
if recur === :off || this.recur == 0
360+
this.frame = frame
361+
this.up = parent
362+
this.count += nb
363+
this.recur = 1
364+
else
365+
this.count_recur += 1
366+
end
367+
parent = this
368+
end
369+
parent.overhead += nb
370+
if recur !== :off
371+
# We mark all visited nodes to so we'll only count those branches
372+
# once for each backtrace. Reset that now for the next backtrace.
373+
empty!(build)
374+
push!(tops, parent)
375+
for top in tops
376+
while top.recur != 0
377+
top.max_recur < top.recur && (top.max_recur = top.recur)
378+
top.recur = 0
379+
top = top.up
380+
end
381+
end
382+
empty!(tops)
383+
end
384+
let this = parent
385+
while this !== root
386+
this.flat_count += nb
387+
this = this.up
388+
end
389+
end
390+
end
391+
function cleanup!(node::StackFrameTree)
392+
stack = [node]
393+
while !isempty(stack)
394+
node = pop!(stack)
395+
node.recur = 0
396+
empty!(node.builder_key)
397+
empty!(node.builder_value)
398+
append!(stack, values(node.down))
399+
end
400+
nothing
401+
end
402+
cleanup!(root)
403+
return root
404+
end
405+
406+
function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
407+
fmt.combine || error(ArgumentError("combine=false"))
408+
if fmt.combine
409+
root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
410+
else
411+
root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
412+
end
413+
print_tree(io, root, cols, fmt, false)
414+
if isempty(root.down)
415+
warning_empty()
416+
return true
417+
end
418+
Base.println(io, "Total snapshots: ", length(data))
419+
Base.println(io, "Total bytes: ", root.count)
420+
return false
421+
end
422+
219423
end

stdlib/Profile/src/Profile.jl

+20-6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,20 @@ Profiling support, main entry point is the [`@profile`](@ref) macro.
55
"""
66
module Profile
77

8+
global print
9+
public @profile,
10+
clear,
11+
print,
12+
fetch,
13+
retrieve,
14+
add_fake_meta,
15+
flatten,
16+
callers,
17+
init,
18+
take_heap_snapshot,
19+
clear_malloc_data,
20+
Allocs
21+
822
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
923

1024
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
@@ -220,7 +234,7 @@ function print(io::IO,
220234

221235
pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
222236
if groupby === :none
223-
print(io, data, lidict, pf, format, threads, tasks, false)
237+
print_group(io, data, lidict, pf, format, threads, tasks, false)
224238
else
225239
if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
226240
error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -244,7 +258,7 @@ function print(io::IO,
244258
printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
245259
for threadid in threadids
246260
printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
247-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
261+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
248262
nosamples && (any_nosamples = true)
249263
println(io)
250264
end
@@ -262,7 +276,7 @@ function print(io::IO,
262276
printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
263277
for taskid in taskids
264278
printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
265-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
279+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
266280
nosamples && (any_nosamples = true)
267281
println(io)
268282
end
@@ -274,7 +288,7 @@ function print(io::IO,
274288
isempty(taskids) && (any_nosamples = true)
275289
for taskid in taskids
276290
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
277-
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
291+
nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
278292
nosamples && (any_nosamples = true)
279293
println(io)
280294
end
@@ -284,7 +298,7 @@ function print(io::IO,
284298
isempty(threadids) && (any_nosamples = true)
285299
for threadid in threadids
286300
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
287-
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
301+
nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
288302
nosamples && (any_nosamples = true)
289303
println(io)
290304
end
@@ -306,7 +320,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
306320
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
307321
print(stdout, data, lidict; kwargs...)
308322

309-
function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
323+
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
310324
format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
311325
is_subsection::Bool = false)
312326
cols::Int = Base.displaysize(io)[2]

stdlib/Profile/test/allocs.jl

+21
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
using Test
22
using Profile: Allocs
33

4+
Allocs.clear()
5+
let iobuf = IOBuffer()
6+
for format in (:tree, :flat)
7+
Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
8+
end
9+
end
10+
411
@testset "alloc profiler doesn't segfault" begin
512
res = Allocs.@profile sample_rate=1.0 begin
613
# test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
1320
@test first_alloc.size > 0
1421
@test length(first_alloc.stacktrace) > 0
1522
@test length(string(first_alloc.type)) > 0
23+
24+
# test printing options
25+
for options in ((format=:tree, C=true),
26+
(format=:tree, maxdepth=2),
27+
(format=:flat, C=true),
28+
(),
29+
(format=:flat, sortedby=:count),
30+
(format=:tree, recur=:flat),
31+
)
32+
iobuf = IOBuffer()
33+
Allocs.print(iobuf; options...)
34+
str = String(take!(iobuf))
35+
@test !isempty(str)
36+
end
1637
end
1738

1839
@testset "alloc profiler works when there are multiple tasks on multiple threads" begin

stdlib/Profile/test/runtests.jl

+10-20
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,18 @@ let r = Profile.retrieve()
3838
end
3939
end
4040

41-
let iobuf = IOBuffer()
42-
Profile.print(iobuf, format=:tree, C=true)
43-
str = String(take!(iobuf))
44-
@test !isempty(str)
45-
truncate(iobuf, 0)
46-
Profile.print(iobuf, format=:tree, maxdepth=2)
47-
str = String(take!(iobuf))
48-
@test !isempty(str)
49-
truncate(iobuf, 0)
50-
Profile.print(iobuf, format=:flat, C=true)
51-
str = String(take!(iobuf))
52-
@test !isempty(str)
53-
truncate(iobuf, 0)
54-
Profile.print(iobuf)
55-
@test !isempty(String(take!(iobuf)))
56-
truncate(iobuf, 0)
57-
Profile.print(iobuf, format=:flat, sortedby=:count)
58-
@test !isempty(String(take!(iobuf)))
59-
Profile.print(iobuf, format=:tree, recur=:flat)
41+
# test printing options
42+
for options in ((format=:tree, C=true),
43+
(format=:tree, maxdepth=2),
44+
(format=:flat, C=true),
45+
(),
46+
(format=:flat, sortedby=:count),
47+
(format=:tree, recur=:flat),
48+
)
49+
iobuf = IOBuffer()
50+
Profile.print(iobuf; options...)
6051
str = String(take!(iobuf))
6152
@test !isempty(str)
62-
truncate(iobuf, 0)
6353
end
6454

6555
@testset "Profile.print() groupby options" begin

0 commit comments

Comments
 (0)