diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 76130f94..56048d78 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1040,11 +1040,13 @@ Stub::~Stub() { #ifdef TRITON_ENABLE_GPU try { - CUDAHandler& cuda_api = CUDAHandler::getInstance(); - for (auto& m : - shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) { - if (m.second != nullptr) { - cuda_api.CloseCudaHandle(m.first, m.second); + if (shm_pool_ != nullptr) { + CUDAHandler& cuda_api = CUDAHandler::getInstance(); + for (auto& m : + shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) { + if (m.second != nullptr) { + cuda_api.CloseCudaHandle(m.first, m.second); + } } } } @@ -1053,13 +1055,14 @@ Stub::~Stub() } #endif - { + // Ensure the interpreter is active before trying to clean up. + if (Py_IsInitialized()) { py::gil_scoped_acquire acquire; py::object async_event_loop_local(std::move(async_event_loop_)); py::object background_futures_local(std::move(background_futures_)); py::object model_instance_local(std::move(model_instance_)); } - stub_instance_.reset(); + stub_message_queue_.reset(); parent_message_queue_.reset(); stub_to_parent_mq_.reset(); @@ -2030,6 +2033,7 @@ main(int argc, char** argv) catch (const PythonBackendException& pb_exception) { LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what(); logger.reset(); + stub.reset(); exit(1); } diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc index 828228e6..3bd01321 100644 --- a/src/stub_launcher.cc +++ b/src/stub_launcher.cc @@ -280,7 +280,9 @@ StubLauncher::Launch() // Push a dummy message to the message queue so that the stub // process is notified that it can release the object stored in // shared memory. - stub_message_queue_->Push(DUMMY_MESSAGE); + if (stub_message_queue_) { + stub_message_queue_->Push(DUMMY_MESSAGE); + } // If the model is not initialized, wait for the stub process to exit. if (!is_initialized_) { @@ -299,11 +301,23 @@ StubLauncher::Launch() // // The reason it is broken into two steps is that creation of the health // monitoring thread may take longer which can make the server process think - // that the stub process is unhealthy and return early. Waiting until the - // health thread is spawn would make sure would prevent this issue. - parent_message_queue_->Pop(); + // that the stub process is unhealthy and return early. Waiting with a longer + // timeout prevents this issue. + const uint64_t initialization_timeout_ms = 10000; // 10 sec + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + "Waiting for the stub health monitoring thread to start"); + + bi::managed_external_buffer::handle_t message; + auto err = ReceiveMessageFromStub(message, initialization_timeout_ms); + if (err != nullptr) { + KillStubProcess(); + } if (stub_process_kind_ == "AUTOCOMPLETE_STUB") { + if (err != nullptr) { + throw BackendModelException(err); + } try { AutocompleteStubProcess(); } @@ -314,6 +328,7 @@ StubLauncher::Launch() TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what())); } } else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") { + RETURN_IF_ERROR(err); RETURN_IF_ERROR(ModelInstanceStubProcess()); } else { return TRITONSERVER_ErrorNew( @@ -435,7 +450,9 @@ StubLauncher::Launch() // Push a dummy message to the message queue so that the stub // process is notified that it can release the object stored in // shared memory. - stub_message_queue_->Push(DUMMY_MESSAGE); + if (stub_message_queue_) { + stub_message_queue_->Push(DUMMY_MESSAGE); + } // If the model is not initialized, wait for the stub process to exit. if (!is_initialized_) { @@ -456,11 +473,23 @@ StubLauncher::Launch() // // The reason it is broken into two steps is that creation of the health // monitoring thread may take longer which can make the server process think - // that the stub process is unhealthy and return early. Waiting until the - // health thread is spawn would prevent this issue. - parent_message_queue_->Pop(); + // that the stub process is unhealthy and return early. Waiting with a + // longer timeout prevents this issue. + const uint64_t initialization_timeout_ms = 10000; // 10 sec + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + "Waiting for the stub health monitoring thread to start"); + + bi::managed_external_buffer::handle_t message; + auto err = ReceiveMessageFromStub(message, initialization_timeout_ms); + if (err != nullptr) { + KillStubProcess(); + } if (stub_process_kind_ == "AUTOCOMPLETE_STUB") { + if (err != nullptr) { + throw BackendModelException(err); + } try { AutocompleteStubProcess(); } @@ -471,6 +500,7 @@ StubLauncher::Launch() TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what())); } } else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") { + RETURN_IF_ERROR(err); RETURN_IF_ERROR(ModelInstanceStubProcess()); } else { return TRITONSERVER_ErrorNew( @@ -592,8 +622,13 @@ StubLauncher::ModelInstanceStubProcess() initialize_message->Args() = initialize_map_handle; stub_message_queue_->Push(initialize_message->ShmHandle()); + const uint64_t initialization_timeout_ms = 5000; // 5 sec + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + "Waiting for the stub process initialization response"); + bi::managed_external_buffer::handle_t message; - RETURN_IF_ERROR(ReceiveMessageFromStub(message)); + RETURN_IF_ERROR(ReceiveMessageFromStub(message, initialization_timeout_ms)); std::unique_ptr initialize_response_message = IPCMessage::LoadFromSharedMemory(shm_pool_, message); @@ -726,11 +761,11 @@ StubLauncher::KillStubProcess() TRITONSERVER_Error* StubLauncher::ReceiveMessageFromStub( - bi::managed_external_buffer::handle_t& message) + bi::managed_external_buffer::handle_t& message, + uint64_t timeout_miliseconds) { bool success = false; while (!success) { - uint64_t timeout_miliseconds = 1000; { boost::posix_time::ptime timeout = boost::get_system_time() + diff --git a/src/stub_launcher.h b/src/stub_launcher.h index 6c8dd910..58cdcc61 100644 --- a/src/stub_launcher.h +++ b/src/stub_launcher.h @@ -147,7 +147,8 @@ class StubLauncher { // Get a message from the stub process TRITONSERVER_Error* ReceiveMessageFromStub( - bi::managed_external_buffer::handle_t& message); + bi::managed_external_buffer::handle_t& message, + uint64_t timeout_miliseconds = 1000); // Wait for stub process void WaitForStubProcess();