From a8bab1ca1b50969f3075f3fcf05dcd5893ccc247 Mon Sep 17 00:00:00 2001
From: Sergey Chugunov
Date: Thu, 2 Apr 2026 16:01:11 +0300
Subject: [PATCH 1/9] IGNITE-28242 Move cp readlock up in call stack to ensure
correct lock acquisition ordering
---
.../processors/cache/GridCacheMapEntry.java | 4 ---
.../distributed/near/GridNearTxLocal.java | 35 +++++++++++--------
2 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
index da6f47073df88..4b2cb3ff27588 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
@@ -457,8 +457,6 @@ protected GridDhtLocalPartition localPartition() {
boolean deferred = false;
GridCacheVersion ver0 = null;
- cctx.shared().database().checkpointReadLock();
-
lockEntry();
try {
@@ -494,8 +492,6 @@ protected GridDhtLocalPartition localPartition() {
}
finally {
unlockEntry();
-
- cctx.shared().database().checkpointReadUnlock();
}
if (obsolete) {
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
index 3c79a2f908364..a37492fffa4e5 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
@@ -2391,21 +2391,28 @@ private Collection enlistRead(
optimistic() ? accessPolicy(cacheCtx, txKey, expiryPlc) : null;
if (needReadVer) {
- getRes = primaryLocal(entry) ?
- entry.innerGetVersioned(
- null,
- this,
- /*metrics*/true,
- /*event*/true,
- null,
- resolveTaskName(),
- accessPlc,
- !deserializeBinary,
- null) : null;
+ cctx.database().checkpointReadLock();
- if (getRes != null) {
- val = getRes.value();
- readVer = getRes.version();
+ try {
+ getRes = primaryLocal(entry) ?
+ entry.innerGetVersioned(
+ null,
+ this,
+ /*metrics*/true,
+ /*event*/true,
+ null,
+ resolveTaskName(),
+ accessPlc,
+ !deserializeBinary,
+ null) : null;
+
+ if (getRes != null) {
+ val = getRes.value();
+ readVer = getRes.version();
+ }
+ }
+ finally {
+ cctx.database().checkpointReadUnlock();
}
}
else {
From b3c8d4e6610daeaa01d46ac00517b3ba3e2e76ff Mon Sep 17 00:00:00 2001
From: Sergey Chugunov
Date: Fri, 3 Apr 2026 15:15:58 +0300
Subject: [PATCH 2/9] IGNITE-28242 Add cp readlock to other places after test
report analysis
---
.../dht/GridDhtTxLocalAdapter.java | 24 ++++---
.../distributed/near/GridNearTxLocal.java | 64 ++++++++-----------
2 files changed, 43 insertions(+), 45 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
index e8dfb2b1f6fb7..d04b7250e2729 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
@@ -589,18 +589,24 @@ IgniteInternalFuture lockAllAsync(
if (txEntry == null) {
GridDhtCacheEntry cached;
- while (true) {
- try {
- cached = dhtCache.entryExx(key, topVer);
+ cctx.database().checkpointReadLock();
- cached.unswap(read);
+ try {
+ while (true) {
+ try {
+ cached = dhtCache.entryExx(key, topVer);
- break;
- }
- catch (GridCacheEntryRemovedException ignore) {
- if (log.isDebugEnabled())
- log.debug("Get removed entry: " + key);
+ cached.unswap(read);
+
+ break;
+ }
+ catch (GridCacheEntryRemovedException ignore) {
+ if (log.isDebugEnabled())
+ log.debug("Get removed entry: " + key);
+ }
}
+ } finally {
+ cctx.database().checkpointReadUnlock();
}
addActiveCache(dhtCache.context(), false);
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
index a37492fffa4e5..194fbb1353d0c 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
@@ -1251,29 +1251,29 @@ private boolean enlistWriteEntry(GridCacheContext cacheCtx,
GridCacheEntryEx entry = entryEx(cacheCtx, txKey, entryTopVer != null ? entryTopVer : topologyVersion());
try {
- entry.unswap(false);
-
- // Check if lock is being explicitly acquired by the same thread.
- if (!implicit && cctx.kernalContext().config().isCacheSanityCheckEnabled() &&
- entry.lockedByThread(threadId, xidVer)) {
- throw new IgniteCheckedException("Cannot access key within transaction if lock is " +
- "externally held [key=" + CU.value(cacheKey, cacheCtx, false) +
- ", entry=" + entry +
- ", xidVer=" + xidVer +
- ", threadId=" + threadId +
- ", locNodeId=" + cctx.localNodeId() + ']');
- }
-
CacheObject old = null;
GridCacheVersion readVer = null;
- if (optimistic() && !implicit()) {
- try {
- if (needReadVer) {
- if (primaryLocal(entry)) {
- cctx.database().checkpointReadLock();
+ cctx.database().checkpointReadLock();
- try {
+ try {
+ entry.unswap(false);
+
+ // Check if lock is being explicitly acquired by the same thread.
+ if (!implicit && cctx.kernalContext().config().isCacheSanityCheckEnabled() &&
+ entry.lockedByThread(threadId, xidVer)) {
+ throw new IgniteCheckedException("Cannot access key within transaction if lock is " +
+ "externally held [key=" + CU.value(cacheKey, cacheCtx, false) +
+ ", entry=" + entry +
+ ", xidVer=" + xidVer +
+ ", threadId=" + threadId +
+ ", locNodeId=" + cctx.localNodeId() + ']');
+ }
+
+ if (optimistic() && !implicit()) {
+ try {
+ if (needReadVer) {
+ if (primaryLocal(entry)) {
EntryGetResult res = entry.innerGetVersioned(
null,
this,
@@ -1290,15 +1290,8 @@ private boolean enlistWriteEntry(GridCacheContext cacheCtx,
readVer = res.version();
}
}
- finally {
- cctx.database().checkpointReadUnlock();
- }
}
- }
- else {
- cctx.database().checkpointReadLock();
-
- try {
+ else {
old = entry.innerGet(
null,
this,
@@ -1310,19 +1303,18 @@ private boolean enlistWriteEntry(GridCacheContext cacheCtx,
null,
keepBinary);
}
- finally {
- cctx.database().checkpointReadUnlock();
- }
}
- }
- catch (ClusterTopologyCheckedException e) {
- entry.touch();
+ catch (ClusterTopologyCheckedException e) {
+ entry.touch();
- throw e;
+ throw e;
+ }
}
+ else
+ old = entry.rawGet();
+ } finally {
+ cctx.database().checkpointReadUnlock();
}
- else
- old = entry.rawGet();
final GridCacheOperation op = rmv ? DELETE :
entryProc != null ? TRANSFORM : old != null ? UPDATE : CREATE;
From a44645e67af64ae17c8faac8a74c05eacde7ef0f Mon Sep 17 00:00:00 2001
From: Sergey Chugunov
Date: Tue, 7 Apr 2026 16:36:02 +0300
Subject: [PATCH 3/9] IGNITE-28242 Fix checkstyle
---
.../cache/distributed/dht/GridDhtTxLocalAdapter.java | 3 ++-
.../processors/cache/distributed/near/GridNearTxLocal.java | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
index d04b7250e2729..d7d73770c47c5 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java
@@ -605,7 +605,8 @@ IgniteInternalFuture lockAllAsync(
log.debug("Get removed entry: " + key);
}
}
- } finally {
+ }
+ finally {
cctx.database().checkpointReadUnlock();
}
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
index 194fbb1353d0c..0963117c827ea 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/near/GridNearTxLocal.java
@@ -1312,7 +1312,8 @@ private boolean enlistWriteEntry(GridCacheContext cacheCtx,
}
else
old = entry.rawGet();
- } finally {
+ }
+ finally {
cctx.database().checkpointReadUnlock();
}
From 340d06bbe80530b8d1b8dc8ab111974cc9907867 Mon Sep 17 00:00:00 2001
From: Sergey Chugunov
Date: Mon, 4 May 2026 16:15:11 +0300
Subject: [PATCH 4/9] IGNITE-28242 Add reproducer, add assertion for cp read
lock
---
.../processors/cache/GridCacheMapEntry.java | 2 +
.../TxPutTxGetCheckpointerDeadlockTest.java | 197 ++++++++++++++++++
.../testsuites/IgnitePdsTestSuite7.java | 3 +
3 files changed, 202 insertions(+)
create mode 100644 modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
index 4b2cb3ff27588..a5275d349cee3 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java
@@ -457,6 +457,8 @@ protected GridDhtLocalPartition localPartition() {
boolean deferred = false;
GridCacheVersion ver0 = null;
+ assert !checkExpire || cctx.shared().database().checkpointLockIsHeldByThread();
+
lockEntry();
try {
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
new file mode 100644
index 0000000000000..73398049aae0f
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/db/TxPutTxGetCheckpointerDeadlockTest.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.persistence.db;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.CacheAtomicityMode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.configuration.DataStorageConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager;
+import org.apache.ignite.internal.processors.cache.persistence.IgniteCacheDatabaseSharedManager;
+import org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointManager;
+import org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointTimeoutLock;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.ignite.transactions.Transaction;
+import org.junit.Test;
+
+import static org.apache.ignite.transactions.TransactionConcurrency.PESSIMISTIC;
+import static org.apache.ignite.transactions.TransactionIsolation.READ_COMMITTED;
+
+/**
+ * Test verifies there is no deadlock between GridCacheMapEntry.unswap() in tx put operation and a checkpointer requesting cp write lock
+ * with a parallel tx get operation.
+ *
+ * Root cause of the deadlock was wrong locking order when cp readlock is acquired under already locked GridCacheMapEntry instance
+ * by the first tx put op.
+ */
+public class TxPutTxGetCheckpointerDeadlockTest extends GridCommonAbstractTest {
+ /** */
+ private final AtomicBoolean deadlockDetected = new AtomicBoolean(false);
+
+ /** */
+ private final AtomicBoolean testFinished = new AtomicBoolean(false);
+
+ /** */
+ private static final String CP_WRITE_LOCK_SWITCHING_THREAD_NAME = "cp-write-lock-switching-runner";
+
+ /** {@inheritDoc} */
+ @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+ IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+ cfg.setDataStorageConfiguration(
+ new DataStorageConfiguration()
+ .setDefaultDataRegionConfiguration(
+ new DataRegionConfiguration()
+ .setPersistenceEnabled(true)
+ )
+ );
+
+ cfg.setFailureHandler(new StopNodeFailureHandler());
+
+ return cfg;
+ }
+
+ /** {@inheritDoc} */
+ @Override protected void beforeTest() throws Exception {
+ stopAllGrids();
+
+ cleanPersistenceDir();
+ }
+
+ /** {@inheritDoc} */
+ @SuppressWarnings({"deprecation"})
+ @Override protected void afterTest() throws Exception {
+ if (deadlockDetected.get()) {
+ Thread.getAllStackTraces().keySet().stream()
+ .filter(t -> t.getName().startsWith(CP_WRITE_LOCK_SWITCHING_THREAD_NAME))
+ .forEach(Thread::interrupt);
+ }
+
+ testFinished.set(true);
+
+ stopAllGrids();
+
+ cleanPersistenceDir();
+ }
+
+ /**
+ * Tests for the absence of a deadlock between transactional cache operations and checkpointer write lock acquisition.
+ *
+ * This test simulates a scenario where:
+ *
+ *
One thread performs continuous transactional {@code put} operations on a cache entry.
+ *
Another thread performs continuous transactional {@code get} operations on the same entry, potentially
+ * triggering unswapping of the entry under lock.
+ *
A third thread repeatedly acquires and releases the checkpoint write lock, simulating checkpointer activity.
+ *
+ *
+ *
+ * The primary purpose is to verify that there is no deadlock caused by incorrect lock ordering,
+ * specifically when a transactional {@code put} holds a lock on a {@link org.apache.ignite.internal.processors.cache.GridCacheMapEntry}
+ * while attempting to acquire a checkpoint read lock, at the same time as the checkpointer
+ * tries to acquire checkpoint write lock.
+ *