From 6273da7e6b71341b1c07142c27fbf8e73b48dc2f Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 2 Apr 2021 10:08:28 +0900 Subject: [PATCH] fix: Remove no longer necessary isolation level (#416) Backported-From: main Backported-To: 20.09 --- changes/416.fix | 1 + src/ai/backend/manager/scheduler/dispatcher.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changes/416.fix diff --git a/changes/416.fix b/changes/416.fix new file mode 100644 index 000000000..20af1d086 --- /dev/null +++ b/changes/416.fix @@ -0,0 +1 @@ +Fix a regression of spawning multi-node cluster sessions due to DB API changes related to setting transaction isolation levels diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 925c46b2e..d2b8fd910 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -526,7 +526,7 @@ async def _schedule_multi_node_session( log_args = _log_args.get() agent_query_extra_conds = None kernel_agent_bindings: List[KernelAgentBinding] = [] - async with agent_db_conn.begin(isolation_level="REPEATABLE READ"): + async with agent_db_conn.begin(): # This outer transaction is rolled back when any exception occurs inside, # including scheduling failures of a kernel. # It ensures that occupied_slots are recovered when there are partial