Skip to content
1 change: 1 addition & 0 deletions onnxruntime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
get_version_string, # noqa: F401
has_collective_ops, # noqa: F401
register_execution_provider_library, # noqa: F401
Comment thread
tianleiwu marked this conversation as resolved.
set_default_logger_callback, # noqa: F401
set_default_logger_severity, # noqa: F401
set_default_logger_verbosity, # noqa: F401
set_global_thread_pool_sizes, # noqa: F401
Expand Down
20 changes: 20 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "core/providers/get_execution_providers.h"
#include "onnxruntime_config.h"
#include "core/common/common.h"
#include "core/common/logging/logging.h"
#include "core/session/environment.h"
#include "core/session/ort_env.h"
#include "core/session/inference_session.h"
Expand Down Expand Up @@ -36,8 +37,27 @@ static Status CreateOrtEnv() {
Env::Default().GetTelemetryProvider().SetLanguageProjection(OrtLanguageProjection::ORT_PROJECTION_PYTHON);
OrtEnv::LoggingManagerConstructionInfo lm_info{nullptr, nullptr, ORT_LOGGING_LEVEL_WARNING, "Default"};
Status status;

// Detect whether a process-wide OrtEnv already exists. An embedding application may have
// created one via the C/C++ API before importing the Python module; in that case existing
// sessions/threads may already hold loggers backed by its LoggingManager, so tearing that
// manager down and replacing it below could invalidate those loggers (use-after-free).
// TryGetInstance() bumps the refcount only for the duration of this expression.
const bool env_preexisted = OrtEnv::TryGetInstance() != nullptr;

ort_env = OrtEnv::GetOrCreateInstance(lm_info, status, use_global_tp ? &global_tp_options : nullptr).release();
if (!status.IsOK()) return status;

// Only install the PythonCallbackSink when this module actually created the OrtEnv. When
// the env pre-existed we leave its LoggingManager untouched; g_python_callback_sink then
// stays null and set_default_logger_callback() reports that the sink is unavailable.
//
// This is safe at this point because no ORT sessions or background threads have
// been created yet, so there is no concurrent logging activity.
if (!env_preexisted) {
InstallPythonCallbackLoggingSink(*ort_env);
}

// Keep the ort_env alive, don't free it. It's ok to leak the memory.
#if !defined(__APPLE__) && !defined(ORT_MINIMAL_BUILD)
if (!InitProvidersSharedLibrary()) {
Expand Down
17 changes: 16 additions & 1 deletion onnxruntime/python/onnxruntime_pybind_module_functions.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#pragma once
#include "onnxruntime_pybind.h"
#include "core/common/logging/isink.h"
#include "core/framework/provider_options.h"

struct OrtEnv;

namespace onnxruntime {
class InferenceSession;
namespace python {
Expand All @@ -18,5 +21,17 @@
void addGlobalSchemaFunctions(pybind11::module& m);
void addOpSchemaSubmodule(pybind11::module& m);
void addOpKernelSubmodule(pybind11::module& m);

// Creates the PythonCallbackSink that wraps the platform default sink and can be updated
// later via set_default_logger_callback(). The returned unique_ptr should be passed to
// the LoggingManager; the raw pointer is also stored internally for future updates.
std::unique_ptr<onnxruntime::logging::ISink> CreateAndRegisterPythonCallbackSink(
std::unique_ptr<onnxruntime::logging::ISink> platform_sink);

Check warning on line 29 in onnxruntime/python/onnxruntime_pybind_module_functions.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4] Raw Output: onnxruntime/python/onnxruntime_pybind_module_functions.h:29: Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4]

// Replaces the Default LoggingManager of ort_env with one backed by a PythonCallbackSink so
// that set_default_logger_callback() can route ORT log messages to a Python callable. Only
// call this when this Python module created the OrtEnv and before any sessions/threads exist.
void InstallPythonCallbackLoggingSink(OrtEnv& ort_env);

} // namespace python
} // namespace onnxruntime
} // namespace onnxruntime
208 changes: 208 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "core/framework/TensorSeq.h"
#include "core/graph/graph_viewer.h"
#include "core/platform/env.h"
#include "core/platform/logging/make_platform_default_log_sink.h"
#include "core/providers/get_execution_providers.h"
#include "core/providers/providers.h"
#include "core/providers/tensorrt/tensorrt_provider_options.h"
Expand Down Expand Up @@ -99,6 +100,168 @@

using PyCallback = std::function<void(std::vector<py::object>, py::object user_data, std::string)>;

// Owning reference to the user-provided Python logging callback (Py_None when none is
// installed). Stored as a raw PyObject* rather than a py::object so that its reference is
// never released during static destruction / module unload, which can run after the Python
// interpreter has been finalized (a DECREF at that point can crash). The reference is
// intentionally leaked at process shutdown, mirroring how the global OrtEnv (ort_env) is
// handled. All INCREF/DECREF on this pointer happen while the GIL is held. Protected by
// g_logging_mutex.
static PyObject* g_user_logging_callback = nullptr;
static std::mutex g_logging_mutex;

// A logging sink that can dynamically switch between a Python callable and the platform
// default sink (e.g., stderr on Linux/macOS, OutputDebugString on Windows).
//
// An instance is created once at module import time and installed as the OrtEnv logging
// sink. Calling set_default_logger_callback() only updates the callable stored here,
// so there is no need to rebuild the LoggingManager (and no risk of hitting the
// "Only one Default LoggingManager" singleton guard).
//
// The callable is invoked with:
// (severity: int, category: str, logid: str, code_location: str, message: str)
class PythonCallbackSink : public onnxruntime::logging::ISink {
public:
explicit PythonCallbackSink(std::unique_ptr<onnxruntime::logging::ISink> platform_sink)
: platform_sink_(std::move(platform_sink)) {}

// Replace the active callback. Pass a None py::object to revert to the platform sink.
void SetCallback(py::object callback) {
// The caller (the Python-exposed set_default_logger_callback) already holds the GIL, so
// it is safe to mutate Python refcounts here. Steal the reference out of the py::object
// so ownership transfers to the global, swap it under the mutex, then release the old
// reference while the GIL is still held.
PyObject* new_ref = callback.release().ptr();
PyObject* old_ref = nullptr;
{
std::lock_guard<std::mutex> lock(g_logging_mutex);
old_ref = g_user_logging_callback;
g_user_logging_callback = new_ref;
}
Py_XDECREF(old_ref);
}

void SendImpl(const onnxruntime::logging::Timestamp& timestamp, const std::string& logger_id,
const onnxruntime::logging::Capture& message) override {
// Cheap pre-check: comparing the stored raw pointer against nullptr / Py_None does not
// touch Python refcounts, so it is safe to do under the mutex alone without the GIL. This
// lets us avoid acquiring the GIL on the common path where no Python callback is installed.
// A null pointer is treated the same as Py_None (no callback installed).
bool has_callback;
{
std::lock_guard<std::mutex> lock(g_logging_mutex);
has_callback = g_user_logging_callback != nullptr && g_user_logging_callback != Py_None;
}
if (!has_callback) {
// No Python callback installed: delegate to the platform sink (outside the lock to
// avoid holding it during potentially blocking I/O).
platform_sink_->Send(timestamp, logger_id, message);
return;
}

// Snapshot the values we need (message is only valid while the Capture exists).
int severity = static_cast<int>(message.Severity());
const char* category = message.Category();
std::string code_location = message.Location().ToString();
const std::string msg = message.Message();

// Acquire the GIL before touching the callback's refcount. Copying the callback
// (Py_INCREF) and destroying the local copy (Py_DECREF) must both happen while the GIL is
// held, otherwise we would mutate Python refcounts from a non-Python worker thread
// (undefined behavior). py::gil_scoped_acquire is reentrant, so this is safe even when the
// current thread already holds the GIL. The refcount-free pre-check above runs under the
// mutex alone; whenever both locks are held the order is GIL -> g_logging_mutex (SetCallback
// uses the same order), so there is no deadlock.
py::gil_scoped_acquire acquire;

// Re-read the callback under the lock now that the GIL is held. It may have been cleared
// between the pre-check and here; if so, fall back to the platform sink. reinterpret_borrow
// performs the Py_INCREF while the GIL is held.
py::object cb;
{
std::lock_guard<std::mutex> lock(g_logging_mutex);
if (g_user_logging_callback != nullptr && g_user_logging_callback != Py_None) {
cb = py::reinterpret_borrow<py::object>(g_user_logging_callback);
}
}
if (!cb) {
platform_sink_->Send(timestamp, logger_id, message);
return;
}

try {
cb(severity, category, logger_id, code_location, msg);
} catch (const py::error_already_set&) {
// If the Python callback raises, fall back to the platform sink so ORT log messages are
// not silently lost. Avoid recursive calls to the logger here.
platform_sink_->Send(timestamp, logger_id, message);
} catch (...) {
// Any other C++ exception: best effort, ignore and delegate to platform sink.
platform_sink_->Send(timestamp, logger_id, message);
}
}

private:
std::unique_ptr<onnxruntime::logging::ISink> platform_sink_;
};

// The single PythonCallbackSink instance whose inner callback can be replaced at runtime.
// Owned by the LoggingManager that is embedded in the global OrtEnv; we keep a non-owning
// pointer so that set_default_logger_callback can reach it.
static PythonCallbackSink* g_python_callback_sink = nullptr;

// Creates a PythonCallbackSink wrapping the given platform_sink and stores a non-owning
// pointer to it in g_python_callback_sink so that set_default_logger_callback() can update
// the Python callable later. ("Register" here refers to storing that pointer for future
// updates, not to any logging-system registration.)
std::unique_ptr<onnxruntime::logging::ISink> CreateAndRegisterPythonCallbackSink(
std::unique_ptr<onnxruntime::logging::ISink> platform_sink) {
auto sink = std::make_unique<PythonCallbackSink>(std::move(platform_sink));
g_python_callback_sink = sink.get();
// Initialize the global callback to Py_None so the "no callback installed" fast path in
// SendImpl is taken until the user installs one. Without this the pointer would be null,
// which is treated the same as Py_None. This runs at module import time while the GIL is
// held, so touching Python objects / refcounts here is safe.
{
std::lock_guard<std::mutex> lock(g_logging_mutex);
Py_INCREF(Py_None);
g_user_logging_callback = Py_None;
}
return sink;
}

// Replaces the Default LoggingManager of the given OrtEnv with one backed by a
// PythonCallbackSink (wrapping the platform default sink and any ETW sink). This lets
// set_default_logger_callback() route ORT log messages to a user-provided Python callable
// without rebuilding the LoggingManager (a Default-type singleton).
//
// Must only be called when this Python module actually created the OrtEnv (i.e., no
// pre-existing env created by an embedding C/C++ app whose loggers may still be in use) and
// before any ORT sessions or background threads exist, so there is no concurrent logging.
//
// The sequence is:
// 1. Create the PythonCallbackSink wrapping the platform default sink (+ optional ETW).
// 2. SetLoggingManager(nullptr) destroys the existing Default-type manager. Its destructor
// resets the internal singleton guard (DefaultLoggerManagerInstance atomic pointer in
// logging.cc) to nullptr, allowing a new Default-type manager to be constructed. If that
// implementation detail ever changes, this sequence will need to be revisited.
// 3. Construct and install a new LoggingManager backed by the PythonCallbackSink.
void InstallPythonCallbackLoggingSink(OrtEnv& ort_env) {
using namespace onnxruntime::logging;

Check warning on line 250 in onnxruntime/python/onnxruntime_pybind_state.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5] Raw Output: onnxruntime/python/onnxruntime_pybind_state.cc:250: Do not use namespace using-directives. Use using-declarations instead. [build/namespaces] [5]
auto python_sink = CreateAndRegisterPythonCallbackSink(MakePlatformDefaultLogSink());
constexpr auto kDefaultSeverity = Severity::kWARNING;
auto etw_severity = OverrideLevelWithEtw(kDefaultSeverity);
auto combined_sink = EnhanceSinkWithEtw(std::move(python_sink), kDefaultSeverity, etw_severity);
std::string logger_id{"Default"};
ort_env.SetLoggingManager(nullptr); // Destroys the old Default-type LoggingManager.
ort_env.SetLoggingManager(std::make_unique<LoggingManager>(
std::move(combined_sink),
std::min(kDefaultSeverity, etw_severity),
false,
LoggingManager::InstanceType::Default,
&logger_id));
}

struct AsyncResource {
std::vector<OrtValue> feeds;
std::vector<const OrtValue*> feeds_raw;
Expand Down Expand Up @@ -1629,6 +1792,51 @@
},
"Sets the default logging verbosity level. To activate the verbose log, "
"you need to set the default logging severity to 0:Verbose level.");
m.def(
"set_default_logger_callback",
[](py::object callback, int severity) {
ORT_ENFORCE(severity >= 0 && severity <= 4,
"Invalid logging severity. 0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal");
ORT_ENFORCE(g_python_callback_sink != nullptr,
"Python logging callback sink is not installed "
"(the ORT environment may have been created outside of Python)");

if (!callback.is_none()) {
ORT_ENFORCE(PyCallable_Check(callback.ptr()), "callback must be a callable");
}

const bool is_reset = callback.is_none();
// Update the callback in the existing sink (no need to rebuild the LoggingManager).
g_python_callback_sink->SetCallback(std::move(callback));

// Only adjust the minimum severity when installing a callback. Resetting with None
// restores the platform sink and must not silently overwrite a severity the user may
// have configured separately via set_default_logger_severity().
if (!is_reset) {
logging::LoggingManager* default_logging_manager = GetEnv().GetLoggingManager();
default_logging_manager->SetDefaultLoggerSeverity(static_cast<logging::Severity>(severity));
}
},
py::arg("callback"),
py::arg("severity") = static_cast<int>(ORT_LOGGING_LEVEL_WARNING),
R"pbdoc(Register a Python callable as the global ORT logging callback.

The callback receives every log message produced by ORT at or above *severity*.
Pass ``None`` as the callback to restore the default platform logger (stderr on
Linux/macOS, ``OutputDebugString`` on Windows).

Args:
callback: A Python callable with the signature
``callback(severity: int, category: str, logid: str,
code_location: str, message: str) -> None``,
or ``None`` to reset to the default platform logger.
severity (int): Minimum log severity that will be forwarded to the
callback. 0=Verbose, 1=Info, 2=Warning (default), 3=Error, 4=Fatal.

Note:
The callback may be invoked from a non-Python thread; the GIL is
acquired automatically before each call.
)pbdoc");
m.def(
"get_all_providers", []() -> const std::vector<std::string>& { return GetAllExecutionProviderNames(); },
"Return list of Execution Providers that this version of Onnxruntime can support. "
Expand Down
73 changes: 73 additions & 0 deletions onnxruntime/test/python/onnxruntime_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,79 @@ def test_enabling_and_disabling_telemetry(self):
# may be no-op on certain Windows builds based on build configuration
onnxrt.enable_telemetry_events()

def test_set_default_logger_callback(self):
# Verify that set_default_logger_callback is exposed in the onnxruntime namespace.
self.assertTrue(callable(onnxrt.set_default_logger_callback))

# Setting a Python callable should succeed.
messages = []

def my_callback(severity, category, logid, code_location, message):
messages.append((severity, category, logid, code_location, message))

onnxrt.set_default_logger_callback(my_callback, severity=0)

# Running inference while the callback is active should not crash.
sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), providers=["CPUExecutionProvider"])
x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
# mul_1.onnx has a single input "X" and output "Y" (Y = X * [[1,2],[3,4],[5,6]]).
(res,) = sess.run(["Y"], {"X": x})
np.testing.assert_allclose(res, x * x)
Comment thread
tianleiwu marked this conversation as resolved.

# Creating a session at Verbose severity must have produced ORT log messages that
# were actually routed to the callback (otherwise the callback is a no-op).
self.assertGreater(len(messages), 0, "logging callback was never invoked")
# Each record must match the documented (severity, category, logid, code_location,
# message) signature.
for severity, category, logid, code_location, message in messages:
self.assertIsInstance(severity, int)
self.assertIsInstance(category, str)
self.assertIsInstance(logid, str)
self.assertIsInstance(code_location, str)
self.assertIsInstance(message, str)

# Resetting to None restores the default platform logger and should not raise.
onnxrt.set_default_logger_callback(None)
# Restore the default (Warning) severity so the Verbose level set above does not leak
# into the rest of the test / suite.
onnxrt.set_default_logger_severity(2)

# After reset, further inference still works.
(res,) = sess.run(["Y"], {"X": x})
np.testing.assert_allclose(res, x * x)

# Lambda callables are accepted.
onnxrt.set_default_logger_callback(lambda sev, cat, lid, loc, msg: None)

# Boundary severity values (0=Verbose and 4=Fatal) should be accepted.
onnxrt.set_default_logger_callback(my_callback, severity=0)
onnxrt.set_default_logger_callback(my_callback, severity=4)

# Invalid severity should raise.
with self.assertRaises(RuntimeError):
onnxrt.set_default_logger_callback(my_callback, severity=5)

with self.assertRaises(RuntimeError):
onnxrt.set_default_logger_callback(my_callback, severity=-1)

# Non-callable should raise.
with self.assertRaises(RuntimeError):
onnxrt.set_default_logger_callback("not a callable")

# A callback that raises should not crash the process.
def raising_callback(severity, category, logid, code_location, message):
raise ValueError("intentional error from callback")

onnxrt.set_default_logger_callback(raising_callback, severity=0)
# Create a session to trigger some ORT log output; it should not crash even though
# the callback raises.
onnxrt.InferenceSession(get_name("mul_1.onnx"), providers=["CPUExecutionProvider"])

# Clean up: restore platform default logger and the default (Warning) severity so the
# Verbose level set above does not leak into the rest of the Python test suite.
onnxrt.set_default_logger_callback(None)
onnxrt.set_default_logger_severity(2)

def test_deserialization_from_path_object(self):
# path object is allowed
onnxrt.InferenceSession(pathlib.Path(get_name("mul_1.onnx")), providers=available_providers)
Expand Down
Loading
Loading