Skip to content

Commit 74028e4

Browse files
authored
Fully dynamic ETW controlled logging for ORT and QNN logs (#20537)
### Description Windows - Fully dynamic ETW controlled logging for ORT and QNN logs The logging support is documented here - https://onnxruntime.ai/docs/performance/tune-performance/logging_tracing.html#tracing---windows - https://onnxruntime.ai/docs/performance/tune-performance/profiling-tools.html#tracelogging-etw-windows-profiling Also add support for logging ORT SessionCreation on ETW CaptureState ### Motivation and Context The previous ETW support only worked if you enabled ETW before the session started. There can commonly be long-lived AI inference processes that need to be traced & debugged. This enables logging fully on the fly. Without this support a dev would have to end up killing a process or stopping a service in order to get tracing. We had to do this for a recent issue with QNN, and it was a bit painful to get the logs and it ruined the repro. ### Testing I tested with the following cases - Leaving default ORT run - Enabling ETW prior to start and leaving running for entire session + inferences, then stopping - Starting ORT session + inf, then enabling and stopping ETW - Start ORT session /w long running Inferences - wpr -start [ort.wprp](https://github.com/microsoft/onnxruntime/blob/e6228575e4d5866bdb831e76cc93e6c35af4de8b/ort.wprp#L4) -start [etw_provider.wprp](https://github.com/microsoft/onnxruntime/blob/e6228575e4d5866bdb831e76cc93e6c35af4de8b/onnxruntime/test/platform/windows/logging/etw_provider.wprp) - Wait a few seconds - wpr -stop ort.etl - Inferences are still running - Verify ONNXRuntimeLogEvent provider events are present and new SessionCreation_CaptureState event under Microsoft.ML.ONNXRuntime provider Related: #18882 #19428
1 parent f8b5c28 commit 74028e4

File tree

19 files changed

+501
-96
lines changed

19 files changed

+501
-96
lines changed

include/onnxruntime/core/common/logging/isink.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@
66
#include <string>
77

88
#include "core/common/logging/logging.h"
9+
#include "core/common/logging/sink_types.h"
910

1011
namespace onnxruntime {
1112
namespace logging {
1213
class ISink {
1314
public:
14-
ISink() = default;
15+
explicit ISink(SinkType type = SinkType::BaseSink) : type_(type) {}
16+
17+
SinkType GetType() const { return type_; }
1518

1619
/**
1720
Sends the message to the sink.
@@ -32,6 +35,8 @@ class ISink {
3235
virtual ~ISink() = default;
3336

3437
private:
38+
SinkType type_;
39+
3540
// Make Code Analysis happy by disabling all for now. Enable as needed.
3641
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ISink);
3742

include/onnxruntime/core/common/logging/logging.h

+26-6
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
#include "core/common/common.h"
1515
#include "core/common/profiler_common.h"
1616
#include "core/common/logging/capture.h"
17-
#include "core/common/logging/severity.h"
18-
1917
#include "core/common/logging/macros.h"
20-
18+
#include "core/common/logging/severity.h"
19+
#include "core/common/logging/sink_types.h"
20+
#include "core/platform/ort_mutex.h"
2121
#include "date/date.h"
2222

2323
/*
@@ -167,6 +167,23 @@ class LoggingManager final {
167167
*/
168168
static bool HasDefaultLogger() { return nullptr != s_default_logger_; }
169169

170+
/**
171+
Gets the default instance of the LoggingManager.
172+
*/
173+
static LoggingManager* GetDefaultInstance();
174+
175+
/**
176+
Removes a Sink if one is present
177+
*/
178+
void RemoveSink(SinkType sinkType);
179+
180+
/**
181+
Adds a Sink to the current sink creating a CompositeSink if necessary
182+
Sinks types must be unique
183+
@param severity The severity level for the new Sink
184+
*/
185+
bool AddSinkOfType(SinkType sinkType, std::function<std::unique_ptr<ISink>()> sinkFactory, logging::Severity severity);
186+
170187
/**
171188
Change the minimum severity level for log messages to be output by the default logger.
172189
@param severity The severity.
@@ -214,7 +231,10 @@ class LoggingManager final {
214231
void CreateDefaultLogger(const std::string& logger_id);
215232

216233
std::unique_ptr<ISink> sink_;
217-
const Severity default_min_severity_;
234+
#ifdef _WIN32
235+
mutable OrtMutex sink_mutex_;
236+
#endif
237+
Severity default_min_severity_;
218238
const bool default_filter_user_data_;
219239
const int default_max_vlog_level_;
220240
bool owns_default_logger_;
@@ -362,8 +382,8 @@ unsigned int GetProcessId();
362382
/**
363383
If the ONNXRuntimeTraceLoggingProvider ETW Provider is enabled, then adds to the existing logger.
364384
*/
365-
std::unique_ptr<ISink> EnhanceLoggerWithEtw(std::unique_ptr<ISink> existingLogger, logging::Severity originalSeverity,
366-
logging::Severity etwSeverity);
385+
std::unique_ptr<ISink> EnhanceSinkWithEtw(std::unique_ptr<ISink> existingSink, logging::Severity originalSeverity,
386+
logging::Severity etwSeverity);
367387

368388
/**
369389
If the ONNXRuntimeTraceLoggingProvider ETW Provider is enabled, then can override the logging level.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#pragma once
2+
3+
namespace onnxruntime {
4+
namespace logging {
5+
enum class SinkType {
6+
BaseSink,
7+
CompositeSink,
8+
EtwSink
9+
};
10+
} // namespace logging
11+
} // namespace onnxruntime

onnxruntime/core/common/logging/logging.cc

+60-12
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
#include "core/common/exceptions.h"
1010
#include "core/common/logging/isink.h"
1111
#include "core/common/logging/logging.h"
12+
#include "core/common/logging/sinks/composite_sink.h"
1213

1314
#ifdef _WIN32
1415
#include <Windows.h>
1516
#include "core/platform/windows/logging/etw_sink.h"
16-
#include "core/common/logging/sinks/composite_sink.h"
1717
#else
1818
#include <unistd.h>
1919
#if defined(__MACH__) || defined(__wasm__) || defined(_AIX)
@@ -22,10 +22,10 @@
2222
#include <sys/syscall.h>
2323
#endif
2424
#endif
25-
#include "core/platform/ort_mutex.h"
2625

2726
#if __FreeBSD__
2827
#include <sys/thr.h> // Use thr_self() syscall under FreeBSD to get thread id
28+
#include "logging.h"
2929
#endif
3030

3131
namespace onnxruntime {
@@ -52,6 +52,10 @@ static std::atomic<void*>& DefaultLoggerManagerInstance() noexcept {
5252
return default_instance;
5353
}
5454

55+
LoggingManager* LoggingManager::GetDefaultInstance() {
56+
return static_cast<LoggingManager*>(DefaultLoggerManagerInstance().load());
57+
}
58+
5559
// GSL_SUPRESS(i.22) is broken. Ignore the warnings for the static local variables that are trivial
5660
// and should not have any destruction order issues via pragmas instead.
5761
// https://developercommunity.visualstudio.com/content/problem/249706/gslsuppress-does-not-work-for-i22-c-core-guideline.html
@@ -66,6 +70,7 @@ static OrtMutex& DefaultLoggerMutex() noexcept {
6670
}
6771

6872
Logger* LoggingManager::s_default_logger_ = nullptr;
73+
OrtMutex sink_mutex_;
6974

7075
#ifdef _MSC_VER
7176
#pragma warning(pop)
@@ -245,35 +250,78 @@ unsigned int GetProcessId() {
245250
#endif
246251
}
247252

248-
std::unique_ptr<ISink> EnhanceLoggerWithEtw(std::unique_ptr<ISink> existingLogger, logging::Severity originalSeverity,
249-
logging::Severity etwSeverity) {
253+
std::unique_ptr<ISink> EnhanceSinkWithEtw(std::unique_ptr<ISink> existing_sink, logging::Severity original_severity,
254+
logging::Severity etw_severity) {
250255
#ifdef _WIN32
251256
auto& manager = EtwRegistrationManager::Instance();
252257
if (manager.IsEnabled()) {
253258
auto compositeSink = std::make_unique<CompositeSink>();
254-
compositeSink->AddSink(std::move(existingLogger), originalSeverity);
255-
compositeSink->AddSink(std::make_unique<EtwSink>(), etwSeverity);
259+
compositeSink->AddSink(std::move(existing_sink), original_severity);
260+
compositeSink->AddSink(std::make_unique<EtwSink>(), etw_severity);
256261
return compositeSink;
257262
} else {
258-
return existingLogger;
263+
return existing_sink;
259264
}
260265
#else
261266
// On non-Windows platforms, just return the existing logger
262-
(void)originalSeverity;
263-
(void)etwSeverity;
264-
return existingLogger;
267+
(void)original_severity;
268+
(void)etw_severity;
269+
return existing_sink;
265270
#endif // _WIN32
266271
}
267272

268-
Severity OverrideLevelWithEtw(Severity originalSeverity) {
273+
Severity OverrideLevelWithEtw(Severity original_severity) {
269274
#ifdef _WIN32
270275
auto& manager = logging::EtwRegistrationManager::Instance();
271276
if (manager.IsEnabled() &&
272277
(manager.Keyword() & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Logs)) != 0) {
273278
return manager.MapLevelToSeverity();
274279
}
275280
#endif // _WIN32
276-
return originalSeverity;
281+
return original_severity;
282+
}
283+
284+
bool LoggingManager::AddSinkOfType(SinkType sink_type, std::function<std::unique_ptr<ISink>()> sinkFactory,
285+
logging::Severity severity) {
286+
std::lock_guard<OrtMutex> guard(sink_mutex_);
287+
if (sink_->GetType() != SinkType::CompositeSink) {
288+
// Current sink is not a composite, create a new composite sink and add the current sink to it
289+
auto new_composite = std::make_unique<CompositeSink>();
290+
new_composite->AddSink(std::move(sink_), default_min_severity_); // Move the current sink into the new composite
291+
sink_ = std::move(new_composite); // Now sink_ is pointing to the new composite
292+
}
293+
// Adjust the default minimum severity level to accommodate new sink needs
294+
default_min_severity_ = std::min(default_min_severity_, severity);
295+
if (s_default_logger_ != nullptr) {
296+
s_default_logger_->SetSeverity(default_min_severity_);
297+
}
298+
CompositeSink* current_composite = static_cast<CompositeSink*>(sink_.get());
299+
if (current_composite->HasType(sink_type)) {
300+
return false; // Sink of this type already exists, do not add another
301+
}
302+
303+
current_composite->AddSink(sinkFactory(), severity);
304+
return true;
305+
}
306+
307+
void LoggingManager::RemoveSink(SinkType sink_type) {
308+
std::lock_guard<OrtMutex> guard(sink_mutex_);
309+
310+
if (sink_->GetType() == SinkType::CompositeSink) {
311+
auto composite_sink = static_cast<CompositeSink*>(sink_.get());
312+
313+
Severity newSeverity = composite_sink->RemoveSink(sink_type);
314+
315+
if (composite_sink->HasOnlyOneSink()) {
316+
// If only one sink remains, replace the CompositeSink with this single sink
317+
sink_ = composite_sink->GetRemoveSingleSink();
318+
}
319+
320+
default_min_severity_ = newSeverity;
321+
if (s_default_logger_ != nullptr) {
322+
s_default_logger_->SetSeverity(default_min_severity_);
323+
}
324+
}
277325
}
278326

279327
} // namespace logging

onnxruntime/core/common/logging/sinks/composite_sink.h

+52-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,17 @@ class CompositeSink : public ISink {
2323
/// Initializes a new instance of the <see cref="CompositeSink"/> class.
2424
/// Use AddSink to add sinks.
2525
/// </summary>
26-
CompositeSink() {}
26+
CompositeSink() : ISink(SinkType::CompositeSink) {}
27+
28+
/// <summary>
29+
/// Check if the composite sink contains a sink of the specified type.
30+
/// </summary>
31+
bool HasType(SinkType sink_type) const {
32+
return std::any_of(sinks_with_severity_.begin(), sinks_with_severity_.end(),
33+
[&](const auto& sink_pair) {
34+
return sink_pair.first->GetType() == sink_type;
35+
});
36+
}
2737

2838
/// <summary>
2939
/// Adds a sink. Takes ownership of the sink (so pass unique_ptr by value).
@@ -37,11 +47,48 @@ class CompositeSink : public ISink {
3747
}
3848

3949
/// <summary>
40-
/// Gets a const reference to the collection of sinks and min severity for that sink
50+
/// Remove a sink of the specified type.
51+
/// </summary>
52+
/// <param name="sink_type">Sink type to remove</param>
53+
/// <returns>Minimum severity of the remaining sinks</returns>
54+
logging::Severity RemoveSink(SinkType sink_type) {
55+
logging::Severity severity = Severity::kFATAL; // default if we end up with no sinks
56+
57+
// find entries to remove and the minimum severity of the remaining sinks
58+
auto entries_to_remove = std::remove_if(sinks_with_severity_.begin(), sinks_with_severity_.end(),
59+
[&](const auto& entry) {
60+
if (entry.first->GetType() == sink_type) {
61+
return true;
62+
} else {
63+
severity = std::min(severity, entry.second);
64+
return false;
65+
}
66+
});
67+
68+
sinks_with_severity_.erase(entries_to_remove, sinks_with_severity_.end());
69+
70+
return severity;
71+
}
72+
73+
/// <summary>
74+
/// Check if there's only one sink left
75+
/// </summary>
76+
/// <returns> True if only 1 sink remaining </returns>
77+
bool HasOnlyOneSink() const {
78+
return sinks_with_severity_.size() == 1;
79+
}
80+
81+
/// <summary>
82+
/// If one sink is remaining then returns it and empties the composite sink
4183
/// </summary>
42-
/// <returns>A const reference to the vector pair of unique_ptr to ISink and severity.</returns>
43-
const std::vector<std::pair<std::unique_ptr<ISink>, logging::Severity>>& GetSinks() const {
44-
return sinks_with_severity_;
84+
/// <returns> If one sink remains then returns the sink, otherwise nullptr </returns>
85+
std::unique_ptr<ISink> GetRemoveSingleSink() {
86+
if (HasOnlyOneSink()) {
87+
auto single_sink = std::move(sinks_with_severity_.begin()->first);
88+
sinks_with_severity_.clear();
89+
return single_sink;
90+
}
91+
return nullptr;
4592
}
4693

4794
private:

onnxruntime/core/platform/telemetry.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, cons
5555
const std::string& model_graph_name,
5656
const std::unordered_map<std::string, std::string>& model_metadata,
5757
const std::string& loadedFrom, const std::vector<std::string>& execution_provider_ids,
58-
bool use_fp16) const {
58+
bool use_fp16, bool captureState) const {
5959
ORT_UNUSED_PARAMETER(session_id);
6060
ORT_UNUSED_PARAMETER(ir_version);
6161
ORT_UNUSED_PARAMETER(model_producer_name);
@@ -67,6 +67,7 @@ void Telemetry::LogSessionCreation(uint32_t session_id, int64_t ir_version, cons
6767
ORT_UNUSED_PARAMETER(loadedFrom);
6868
ORT_UNUSED_PARAMETER(execution_provider_ids);
6969
ORT_UNUSED_PARAMETER(use_fp16);
70+
ORT_UNUSED_PARAMETER(captureState);
7071
}
7172

7273
void Telemetry::LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file,

onnxruntime/core/platform/telemetry.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class Telemetry {
6060
const std::string& model_graph_name,
6161
const std::unordered_map<std::string, std::string>& model_metadata,
6262
const std::string& loadedFrom, const std::vector<std::string>& execution_provider_ids,
63-
bool use_fp16) const;
63+
bool use_fp16, bool captureState) const;
6464

6565
virtual void LogRuntimeError(uint32_t session_id, const common::Status& status, const char* file,
6666
const char* function, uint32_t line) const;

onnxruntime/core/platform/windows/logging/etw_sink.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ namespace logging {
3131

3232
class EtwSink : public ISink {
3333
public:
34-
EtwSink() = default;
34+
EtwSink() : ISink(SinkType::EtwSink) {}
3535
~EtwSink() = default;
3636

3737
constexpr static const char* kEventName = "ONNXRuntimeLogEvent";

0 commit comments

Comments
 (0)