diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java index da6f47073df88..a5275d349cee3 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheMapEntry.java @@ -457,7 +457,7 @@ protected GridDhtLocalPartition localPartition() { boolean deferred = false; GridCacheVersion ver0 = null; - cctx.shared().database().checkpointReadLock(); + assert !checkExpire || cctx.shared().database().checkpointLockIsHeldByThread(); lockEntry(); @@ -494,8 +494,6 @@ protected GridDhtLocalPartition localPartition() { } finally { unlockEntry(); - - cctx.shared().database().checkpointReadUnlock(); } if (obsolete) { diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtGetFuture.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtGetFuture.java index 5a7585353156f..57d97caf4291e 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtGetFuture.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtGetFuture.java @@ -379,7 +379,14 @@ private IgniteInternalFuture> getAsync( boolean addReader = !e.deleted(); if (addReader) { - e.unswap(false); + cctx.shared().database().checkpointReadLock(); + + try { + e.unswap(false); + } + finally { + cctx.shared().database().checkpointReadUnlock(); + } // Entry will be removed on touch() if no data in cache, // but they could be loaded from store, diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java index 7cba44d7ef5a4..9952a3c57653d 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtLockFuture.java @@ -933,8 +933,14 @@ private void map(Iterable entries) { boolean needVal; try { - // Must unswap entry so that isNewLocked returns correct value. - e.unswap(false); + cctx.shared().database().checkpointReadLock(); + try { + // Must unswap entry so that isNewLocked returns correct value. + e.unswap(false); + } + finally { + cctx.shared().database().checkpointReadUnlock(); + } needVal = e.isNewLocked(); @@ -1060,7 +1066,14 @@ private void loadMissingFromStore() { for (GridDhtCacheEntry entry : entries) { try { - entry.unswap(false); + cctx.shared().database().checkpointReadLock(); + + try { + entry.unswap(false); + } + finally { + cctx.shared().database().checkpointReadUnlock(); + } if (!entry.hasValue()) loadMap.put(entry.key(), entry); diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java index af3655f3b2ffb..f6f9a182ab886 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxLocalAdapter.java @@ -620,19 +620,26 @@ IgniteInternalFuture lockAllAsync( if (txEntry == null) { GridDhtCacheEntry cached; - while (true) { - try { - cached = dhtCache.entryExx(key, topVer); + cctx.database().checkpointReadLock(); - cached.unswap(read); + try { + while (true) { + try { + cached = dhtCache.entryExx(key, topVer); - break; - } - catch (GridCacheEntryRemovedException ignore) { - if (log.isDebugEnabled()) - log.debug("Get removed entry: " + key); + cached.unswap(read); + + break; + } + catch (GridCacheEntryRemovedException ignore) { + if (log.isDebugEnabled()) + log.debug("Get removed entry: " + key); + } } } + finally { + cctx.database().checkpointReadUnlock(); + } addActiveCache(dhtCache.context(), false); diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxPrepareFuture.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxPrepareFuture.java index 7af5629a66818..873fb674f4d04 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxPrepareFuture.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/GridDhtTxPrepareFuture.java @@ -1212,11 +1212,18 @@ private IgniteInternalFuture forceRebalanceKeys(Map + * Root cause of the deadlock was wrong locking order when cp readlock is acquired under already locked GridCacheMapEntry instance + * by the first tx put op. + */ +public class TxPutTxGetCheckpointerDeadlockTest extends GridCommonAbstractTest { + /** */ + private final AtomicBoolean deadlockDetected = new AtomicBoolean(false); + + /** */ + private final AtomicBoolean testFinished = new AtomicBoolean(false); + + /** */ + private static final String CP_WRITE_LOCK_SWITCHING_THREAD_NAME = "cp-write-lock-switching-runner"; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + cfg.setDataStorageConfiguration( + new DataStorageConfiguration() + .setDefaultDataRegionConfiguration( + new DataRegionConfiguration() + .setPersistenceEnabled(true) + ) + ); + + cfg.setFailureHandler(new StopNodeFailureHandler()); + + return cfg; + } + + /** {@inheritDoc} */ + @Override protected void beforeTest() throws Exception { + stopAllGrids(); + + cleanPersistenceDir(); + } + + /** {@inheritDoc} */ + @SuppressWarnings({"deprecation"}) + @Override protected void afterTest() throws Exception { + if (deadlockDetected.get()) { + Thread.getAllStackTraces().keySet().stream() + .filter(t -> t.getName().startsWith(CP_WRITE_LOCK_SWITCHING_THREAD_NAME)) + .forEach(Thread::interrupt); + } + + testFinished.set(true); + + stopAllGrids(); + + cleanPersistenceDir(); + } + + /** + * Tests for the absence of a deadlock between transactional cache operations and checkpointer write lock acquisition. + *

+ * This test simulates a scenario where: + *

    + *
  • One thread performs continuous transactional {@code put} operations on a cache entry.
  • + *
  • Another thread performs continuous transactional {@code get} operations on the same entry, potentially + * triggering unswapping of the entry under lock.
  • + *
  • A third thread repeatedly acquires and releases the checkpoint write lock, simulating checkpointer activity.
  • + *
+ *

+ *

+ * The primary purpose is to verify that there is no deadlock caused by incorrect lock ordering, + * specifically when a transactional {@code put} holds a lock on a {@link org.apache.ignite.internal.processors.cache.GridCacheMapEntry} + * while attempting to acquire a checkpoint read lock, at the same time as the checkpointer + * tries to acquire checkpoint write lock. + *

+ * + * @throws Exception If failed. + */ + @Test + public void testDeadlock() throws Exception { + IgniteEx ignite = startGrid(0); + + ignite.cluster().state(ClusterState.ACTIVE); + + IgniteCache cache = ignite + .getOrCreateCache(new CacheConfiguration<>("test").setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL)); + + AtomicInteger cnt = new AtomicInteger(-1); + + GridTestUtils.runAsync( + () -> { + while (!testFinished.get()) { + try (Transaction tx = ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) { + cache.put(0, cnt.incrementAndGet()); + tx.commit(); + } + } + }, + "write-tx-runner" + ); + + GridTestUtils.runAsync( + () -> { + while (!testFinished.get()) { + try (Transaction tx = ignite.transactions().txStart(PESSIMISTIC, READ_COMMITTED)) { + cache.get(0); + } + } + }, + "read-tx-runner" + ); + + GridTestUtils.runAsync( + () -> { + IgniteCacheDatabaseSharedManager db = ignite.context().cache().context().database(); + CheckpointManager cpMgr = ((GridCacheDatabaseSharedManager)db).getCheckpointManager(); + CheckpointTimeoutLock timeoutLock = cpMgr.checkpointTimeoutLock(); + ReentrantReadWriteLock.WriteLock cpWriteLock = GridTestUtils.getFieldValue(timeoutLock, + "checkpointReadWriteLock", "checkpointLock", "writeLock"); + + while (!testFinished.get()) { + // an interruptible version of lock method is used to allow end the deadlock at the end of the test + cpWriteLock.lockInterruptibly(); + cpWriteLock.unlock(); + } + }, + CP_WRITE_LOCK_SWITCHING_THREAD_NAME + ); + + GridTestUtils.runAsync( + () -> { + int prevVal = -1; + + while (!testFinished.get()) { + int curVal = cnt.get(); + + if (curVal != -1 && curVal == prevVal) { + deadlockDetected.set(true); + + return; + } + else { + prevVal = curVal; + + doSleep(200); + } + } + }, + "progress-monitor" + ); + + assertTrue(GridTestUtils.waitForCondition(() -> cnt.get() != -1, 10_000L)); + assertFalse("Unexpected deadlock detected", GridTestUtils.waitForCondition(deadlockDetected::get, 10_000L)); + } +} diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java index 7cd4bb16d5b30..627ad5e8fb39c 100644 --- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java +++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgnitePdsTestSuite7.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.List; import org.apache.ignite.internal.processors.cache.persistence.EagerTtlTest; +import org.apache.ignite.internal.processors.cache.persistence.db.TxPutTxGetCheckpointerDeadlockTest; import org.apache.ignite.internal.processors.cache.persistence.wal.WalRotatedIdPartRecordTest; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.DynamicSuite; @@ -47,6 +48,8 @@ public static List> suite(Collection ignoredTests) { GridTestUtils.addTestIfNeeded(suite, WalRotatedIdPartRecordTest.class, ignoredTests); + GridTestUtils.addTestIfNeeded(suite, TxPutTxGetCheckpointerDeadlockTest.class, ignoredTests); + return suite; } }