triton-inference-server · pskiran1 · Aug 7, 2025 · Jul 21, 2025 · Jul 21, 2025 · Jul 23, 2025
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
@@ -1040,11 +1040,13 @@ Stub::~Stub()
 {
 #ifdef TRITON_ENABLE_GPU
   try {
-    CUDAHandler& cuda_api = CUDAHandler::getInstance();
-    for (auto& m :
-         shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) {
-      if (m.second != nullptr) {
-        cuda_api.CloseCudaHandle(m.first, m.second);
+    if (shm_pool_ != nullptr) {
+      CUDAHandler& cuda_api = CUDAHandler::getInstance();
+      for (auto& m :
+           shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) {
+        if (m.second != nullptr) {
+          cuda_api.CloseCudaHandle(m.first, m.second);
+        }
       }
     }
   }
@@ -1053,13 +1055,14 @@ Stub::~Stub()
   }
 #endif
 
-  {
+  // Ensure the interpreter is active before trying to clean up.
+  if (Py_IsInitialized()) {
     py::gil_scoped_acquire acquire;
     py::object async_event_loop_local(std::move(async_event_loop_));
     py::object background_futures_local(std::move(background_futures_));
     py::object model_instance_local(std::move(model_instance_));
   }
-  stub_instance_.reset();
+
   stub_message_queue_.reset();
   parent_message_queue_.reset();
   stub_to_parent_mq_.reset();
@@ -2030,6 +2033,7 @@ main(int argc, char** argv)
   catch (const PythonBackendException& pb_exception) {
     LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what();
     logger.reset();
+    stub.reset();
     exit(1);
   }
 

diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc
@@ -280,7 +280,9 @@ StubLauncher::Launch()
     // Push a dummy message to the message queue so that the stub
     // process is notified that it can release the object stored in
     // shared memory.
-    stub_message_queue_->Push(DUMMY_MESSAGE);
+    if (stub_message_queue_) {
+      stub_message_queue_->Push(DUMMY_MESSAGE);
+    }
 
     // If the model is not initialized, wait for the stub process to exit.
     if (!is_initialized_) {
@@ -299,11 +301,23 @@ StubLauncher::Launch()
   //
   // The reason it is broken into two steps is that creation of the health
   // monitoring thread may take longer which can make the server process think
-  // that the stub process is unhealthy and return early. Waiting until the
-  // health thread is spawn would make sure would prevent this issue.
-  parent_message_queue_->Pop();
+  // that the stub process is unhealthy and return early. Waiting with a longer
+  // timeout prevents this issue.
+  const uint64_t initialization_timeout_ms = 10000;  // 10 sec
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      "Waiting for the stub health monitoring thread to start");
+
+  bi::managed_external_buffer::handle_t message;
+  auto err = ReceiveMessageFromStub(message, initialization_timeout_ms);
+  if (err != nullptr) {
+    KillStubProcess();
+  }
 
   if (stub_process_kind_ == "AUTOCOMPLETE_STUB") {
+    if (err != nullptr) {
+      throw BackendModelException(err);
+    }
     try {
       AutocompleteStubProcess();
     }
@@ -314,6 +328,7 @@ StubLauncher::Launch()
           TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what()));
     }
   } else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") {
+    RETURN_IF_ERROR(err);
     RETURN_IF_ERROR(ModelInstanceStubProcess());
   } else {
     return TRITONSERVER_ErrorNew(
@@ -435,7 +450,9 @@ StubLauncher::Launch()
       // Push a dummy message to the message queue so that the stub
       // process is notified that it can release the object stored in
       // shared memory.
-      stub_message_queue_->Push(DUMMY_MESSAGE);
+      if (stub_message_queue_) {
+        stub_message_queue_->Push(DUMMY_MESSAGE);
+      }
 
       // If the model is not initialized, wait for the stub process to exit.
       if (!is_initialized_) {
@@ -456,11 +473,23 @@ StubLauncher::Launch()
     //
     // The reason it is broken into two steps is that creation of the health
     // monitoring thread may take longer which can make the server process think
-    // that the stub process is unhealthy and return early. Waiting until the
-    // health thread is spawn would prevent this issue.
-    parent_message_queue_->Pop();
+    // that the stub process is unhealthy and return early. Waiting with a
+    // longer timeout prevents this issue.
+    const uint64_t initialization_timeout_ms = 10000;  // 10 sec
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_VERBOSE,
+        "Waiting for the stub health monitoring thread to start");
+
+    bi::managed_external_buffer::handle_t message;
+    auto err = ReceiveMessageFromStub(message, initialization_timeout_ms);
+    if (err != nullptr) {
+      KillStubProcess();
+    }
 
     if (stub_process_kind_ == "AUTOCOMPLETE_STUB") {
+      if (err != nullptr) {
+        throw BackendModelException(err);
+      }
       try {
         AutocompleteStubProcess();
       }
@@ -471,6 +500,7 @@ StubLauncher::Launch()
             TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what()));
       }
     } else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") {
+      RETURN_IF_ERROR(err);
       RETURN_IF_ERROR(ModelInstanceStubProcess());
     } else {
       return TRITONSERVER_ErrorNew(
@@ -592,8 +622,13 @@ StubLauncher::ModelInstanceStubProcess()
   initialize_message->Args() = initialize_map_handle;
   stub_message_queue_->Push(initialize_message->ShmHandle());
 
+  const uint64_t initialization_timeout_ms = 5000;  // 5 sec
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_VERBOSE,
+      "Waiting for the stub process initialization response");
+
   bi::managed_external_buffer::handle_t message;
-  RETURN_IF_ERROR(ReceiveMessageFromStub(message));
+  RETURN_IF_ERROR(ReceiveMessageFromStub(message, initialization_timeout_ms));
 
   std::unique_ptr<IPCMessage> initialize_response_message =
       IPCMessage::LoadFromSharedMemory(shm_pool_, message);
@@ -726,11 +761,11 @@ StubLauncher::KillStubProcess()
 
 TRITONSERVER_Error*
 StubLauncher::ReceiveMessageFromStub(
-    bi::managed_external_buffer::handle_t& message)
+    bi::managed_external_buffer::handle_t& message,
+    uint64_t timeout_miliseconds)
 {
   bool success = false;
   while (!success) {
-    uint64_t timeout_miliseconds = 1000;
     {
       boost::posix_time::ptime timeout =
           boost::get_system_time() +

diff --git a/src/stub_launcher.h b/src/stub_launcher.h
@@ -147,7 +147,8 @@ class StubLauncher {
 
   // Get a message from the stub process
   TRITONSERVER_Error* ReceiveMessageFromStub(
-      bi::managed_external_buffer::handle_t& message);
+      bi::managed_external_buffer::handle_t& message,
+      uint64_t timeout_miliseconds = 1000);
 
   // Wait for stub process
   void WaitForStubProcess();