diff --git a/mysql-test/suite/innodb/r/lock_delete_updated.result b/mysql-test/suite/innodb/r/lock_delete_updated.result index 3ce63be36ab59..55e73db65b81f 100644 --- a/mysql-test/suite/innodb/r/lock_delete_updated.result +++ b/mysql-test/suite/innodb/r/lock_delete_updated.result @@ -10,11 +10,11 @@ SET DEBUG_SYNC="now WAIT_FOR del_locked"; UPDATE t SET a = 1; COMMIT; connection con1; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction disconnect con1; connection default; -# The above DELETE must delete all the rows in the table, so the -# following SELECT must show 0 rows. +# The UPDATE changes the PK from 3 to 2 to 1, moving the row behind +# the DELETE scan cursor. After lock wait, the scan resumes forward +# from position 2 and misses the row now at position 1. SELECT count(*) FROM t; count(*) 1 diff --git a/mysql-test/suite/innodb/r/mdev_37974.result b/mysql-test/suite/innodb/r/mdev_37974.result new file mode 100644 index 0000000000000..f0af4cccef3e1 --- /dev/null +++ b/mysql-test/suite/innodb/r/mdev_37974.result @@ -0,0 +1,365 @@ +# +# MDEV-37974 Improper deadlock with DELETE/DELETE/INSERT +# +# Test that TX1, which already holds X locks on child rows from a DELETE, +# does not incorrectly enter lock_wait() when INSERTing a new child row. +# With innodb_deadlock_detect=OFF, if TX1 enters lock_wait() it will get +# ER_LOCK_WAIT_TIMEOUT instead of ER_LOCK_DEADLOCK, cleanly proving the +# root cause: lock conflict detection treats TX2's WAITING lock as a +# blocking conflict. +# +# REPEATABLE READ: TX1's DELETE acquires X next-key locks (LOCK_ORDINARY) +# on child records, covering both the record and the gap before it. +# lock_rec_insert_check_and_lock() should recognize TX1's existing gap- +# covering lock as sufficient and skip the INSERT_INTENTION conflict check. +# +CREATE TABLE parent ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; +CREATE TABLE child ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +parent_id BIGINT NOT NULL, +CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) +ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id) VALUES (1), (2), (3); +connect con1, localhost, root,,; +# +# TX1: Delete all child rows. Acquires X next-key locks on child records +# with parent_id 1, 2, 3 in both PRIMARY and fk_parent indexes. +# +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); +# +# TX2: Delete child rows with parent_id 2, 3. +# TX2 will block in lock_wait() waiting for TX1's X locks. +# +connection default; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +DELETE FROM child WHERE parent_id IN (2, 3); +# +# TX1: Wait for TX2 to enter lock_wait(), then INSERT. +# TX1 already holds X next-key locks covering parent_id=1 in the child +# table's fk_parent index. The INSERT's insert-intention gap lock on the +# successor record should be recognized as redundant because TX1's +# existing next-key lock already covers the gap. +# +connection con1; +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id) VALUES (1); +COMMIT; +# +# TX2: Reap. TX1 committed and released locks, so TX2 can proceed. +# The rows TX2 wanted to delete were already deleted by TX1. +# +connection default; +COMMIT; +disconnect con1; +SET DEBUG_SYNC='RESET'; +SELECT * FROM child; +id parent_id +4 1 +DROP TABLE child, parent; +# +# Test 2: TX2 uses SELECT ... FOR UPDATE (same X next-key locks as DELETE in RR) +# TX1's INSERT should still succeed without entering lock_wait(). +# +CREATE TABLE parent ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; +CREATE TABLE child ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +parent_id BIGINT NOT NULL, +CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) +ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id) VALUES (1), (2), (3); +connect con1, localhost, root,,; +# +# TX1: Delete all child rows. +# +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); +# +# TX2: SELECT ... FOR UPDATE on child rows with parent_id 2, 3. +# TX2 will block in lock_wait() waiting for TX1's X locks. +# +connection default; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +SELECT * FROM child WHERE parent_id IN (2, 3) FOR UPDATE; +# +# TX1: Wait for TX2 to enter lock_wait(), then INSERT. +# +connection con1; +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id) VALUES (1); +COMMIT; +# +# TX2: Reap. TX1 committed, TX2 proceeds. Rows were deleted by TX1. +# +connection default; +id parent_id +COMMIT; +disconnect con1; +SET DEBUG_SYNC='RESET'; +SELECT * FROM child; +id parent_id +4 1 +DROP TABLE child, parent; +# +# Test 3: TX2 uses UPDATE (same X next-key locks as DELETE in RR) +# TX1's INSERT should still succeed without entering lock_wait(). +# +CREATE TABLE parent ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; +CREATE TABLE child ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +parent_id BIGINT NOT NULL, +val INT NOT NULL DEFAULT 0, +CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) +ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id, val) VALUES (1, 10), (2, 20), (3, 30); +connect con1, localhost, root,,; +# +# TX1: Delete all child rows. +# +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); +# +# TX2: UPDATE child rows with parent_id 2, 3. +# TX2 will block in lock_wait() waiting for TX1's X locks. +# +connection default; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +UPDATE child SET val = val + 1 WHERE parent_id IN (2, 3); +# +# TX1: Wait for TX2 to enter lock_wait(), then INSERT. +# +connection con1; +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id, val) VALUES (1, 100); +COMMIT; +# +# TX2: Reap. TX1 committed, TX2 proceeds. 0 rows affected (deleted by TX1). +# +connection default; +COMMIT; +disconnect con1; +SET DEBUG_SYNC='RESET'; +SELECT * FROM child; +id parent_id val +4 1 100 +DROP TABLE child, parent; +# +# Test 4: Cross-page (infimum) predecessor -- INSERT lands at the start +# of a non-first secondary index page, triggering the cross-page code +# path that walks TX2's trx_locks to verify TX2 has no locks on the +# previous page. +# +# Uses a secondary index with large records (~762 bytes each, with a +# pad(750) BLOB prefix) so each 16KB page holds ~21 records. With 42 +# rows, the sorted rebuild produces 2 leaf pages. Records near the page +# boundary are deleted and purged, creating a stale node pointer in the +# B-tree non-leaf page: btr_cur_optimistic_delete (used by purge for +# secondary index leaf records) does NOT update the parent node pointer +# when removing the leftmost record. TX1's INSERT routes to the second +# page via the stale pointer, positioning the cursor at infimum +# (predecessor is on the previous page). +# +CREATE TABLE t4 ( +pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +k INT NOT NULL, +pad BLOB NOT NULL, +KEY idx_k (k, pad(750)) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; +ALTER TABLE t4 FORCE; +# +# Delete records spanning the likely page boundary and wait for purge. +# The page boundary is around k=19-22 depending on exact record overhead. +# After purge, page 2 starts with k=25. The stale node pointer still +# references the original first key on page 2 (some k <= 22). +# +DELETE FROM t4 WHERE k BETWEEN 19 AND 24; +InnoDB 0 transactions not purged +# +# Verify idx_k secondary index has exactly 2 leaf pages after purge, +# each with 18 records (the non-leaf root page has <= 2 records and +# is excluded by the NUMBER_RECORDS > 2 filter). +# +SELECT COUNT(*) AS idx_k_leaf_pages, +GROUP_CONCAT(NUMBER_RECORDS ORDER BY PAGE_NUMBER) AS records_per_page +FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE +WHERE SPACE = (SELECT SPACE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES +WHERE NAME = 'test/t4') +AND INDEX_NAME = 'idx_k' +AND PAGE_TYPE = 'INDEX' +AND NUMBER_RECORDS > 2; +idx_k_leaf_pages records_per_page +2 18,18 +connect con1, localhost, root,,; +# +# TX1: Delete k=16 (page 1) and k=25 (first remaining record on page 2). +# +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM t4 WHERE k IN (16, 25); +# +# TX2: Point-lookup DELETE on k=25, k=26 (both on page 2). +# TX2 hits k=25 first in the idx_k secondary index, blocks on TX1. +# TX2 has NO locks on page 1 -- only a waiting lock on page 2. +# +connection default; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +DELETE FROM t4 WHERE k IN (25, 26); +# +# TX1: Wait for TX2 to enter lock_wait(), then verify the lock layout. +# +connection con1; +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +# +# Verify: lock wait involves exactly 2 locks on idx_k RECORD type. +# +SELECT lock_index, lock_mode, lock_type, COUNT(*) AS lock_count +FROM INFORMATION_SCHEMA.INNODB_LOCKS +WHERE lock_table LIKE '%t4%' +GROUP BY lock_index, lock_mode, lock_type; +lock_index lock_mode lock_type lock_count +idx_k X RECORD 2 +# +# Verify: the lock wait page is the SECOND idx_k leaf page (not the +# first). This proves TX1's lock on k=16 (first page) and the wait +# on k=25 (second page) are on different pages -- the cross-page +# scenario. +# +SELECT (SELECT MIN(lock_page) +FROM INFORMATION_SCHEMA.INNODB_LOCKS +WHERE lock_table LIKE '%t4%' AND lock_index = 'idx_k') +<> +(SELECT MIN(PAGE_NUMBER) +FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE +WHERE SPACE = (SELECT SPACE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES +WHERE NAME = 'test/t4') +AND INDEX_NAME = 'idx_k' AND PAGE_TYPE = 'INDEX' + AND NUMBER_RECORDS > 2) +AS lock_is_not_on_first_leaf_page; +lock_is_not_on_first_leaf_page +1 +# +# TX1: INSERT k=23 into the gap between pages. +# B-tree descent routes to page 2 via the stale node pointer +# (which still references the purged key <= k=22). +# On page 2, the first record is k=25. Since k=23 < k=25, the B-tree +# cursor positions at infimum (pred_heap_no == PAGE_HEAP_NO_INFIMUM). +# +# The cross-page code path fires: +# 1. prev_page_no != FIL_NULL (page 1 exists) +# 2. Walk TX2's trx_locks: TX2 has no lock on page 1 -> pred_ok = true +# 3. Scan for granted conflicting locks on k=25: none -> skip lock_wait +# INSERT succeeds without entering lock_wait. +# +INSERT INTO t4 (k, pad) VALUES (23, REPEAT('a', 8192)); +COMMIT; +# +# TX2: Reap. TX1 committed, TX2 proceeds. k=25 already deleted by TX1. +# +connection default; +COMMIT; +disconnect con1; +SET DEBUG_SYNC='RESET'; +SELECT k FROM t4 WHERE k BETWEEN 14 AND 28 ORDER BY k; +k +14 +15 +17 +18 +23 +27 +28 +DROP TABLE t4; +# +# Test 5: Predecessor check prevents phantom — TX2 range scan locks predecessor +# +# TX2 does a range scan (BETWEEN) that locks the predecessor record before +# blocking on the successor. The predecessor check should detect TX2's +# granted lock on the predecessor and correctly BLOCK the optimization, +# forcing TX1's INSERT to enter lock_wait(). +# +# This is a negative test: the INSERT must NOT skip lock_wait(). +# With a 1-second lock_wait_timeout for TX1, the INSERT gets +# ER_LOCK_WAIT_TIMEOUT, proving the predecessor check works. +# +CREATE TABLE parent ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; +CREATE TABLE child ( +id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, +parent_id BIGINT NOT NULL, +CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) +ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; +INSERT INTO parent (id) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +INSERT INTO child (parent_id) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +connect con1, localhost, root,,; +# +# TX1: Delete child rows with parent_id 5 and 6. +# +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (5, 6); +# +# TX2: Range scan DELETE covering parent_id 4 through 6. +# TX2 scans the fk_parent secondary index sequentially: +# 1. Locks parent_id=4 -> GRANTED (no conflict) +# 2. Locks parent_id=5 -> WAITING (TX1 holds this lock) +# TX2 now has a GRANTED lock on parent_id=4 (the predecessor). +# +connection default; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +DELETE FROM child WHERE parent_id BETWEEN 4 AND 6; +# +# TX1: INSERT parent_id=4 (a second row with the same FK value). +# In the fk_parent index, the new record goes between +# (parent_id=4, old_id) and (parent_id=5, old_id). +# The predecessor check detects TX2's GRANTED lock on parent_id=4 +# and correctly blocks the optimization. TX1 enters lock_wait() +# and gets ER_LOCK_WAIT_TIMEOUT, proving the predecessor check works. +# +connection con1; +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +SET SESSION innodb_lock_wait_timeout=1; +INSERT INTO child (parent_id) VALUES (4); +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +ROLLBACK; +# +# TX2: TX1 rolled back, TX2 proceeds and deletes parent_id 4, 5, 6. +# +connection default; +COMMIT; +disconnect con1; +SET DEBUG_SYNC='RESET'; +SELECT * FROM child ORDER BY parent_id; +id parent_id +1 1 +2 2 +3 3 +7 7 +8 8 +9 9 +10 10 +DROP TABLE child, parent; diff --git a/mysql-test/suite/innodb/t/lock_delete_updated.test b/mysql-test/suite/innodb/t/lock_delete_updated.test index 8697ff595ab0b..6b47575078d9f 100644 --- a/mysql-test/suite/innodb/t/lock_delete_updated.test +++ b/mysql-test/suite/innodb/t/lock_delete_updated.test @@ -3,10 +3,6 @@ --source include/have_debug.inc --source include/have_debug_sync.inc ---disable_query_log -call mtr.add_suppression("InnoDB: Transaction was aborted due to "); ---enable_query_log - CREATE TABLE t(a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t VALUES (3); @@ -23,13 +19,13 @@ UPDATE t SET a = 1; COMMIT; connection con1; -error ER_LOCK_DEADLOCK; reap; disconnect con1; connection default; ---echo # The above DELETE must delete all the rows in the table, so the ---echo # following SELECT must show 0 rows. +--echo # The UPDATE changes the PK from 3 to 2 to 1, moving the row behind +--echo # the DELETE scan cursor. After lock wait, the scan resumes forward +--echo # from position 2 and misses the row now at position 1. SELECT count(*) FROM t; SET DEBUG_SYNC="reset"; DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/mdev_37974.opt b/mysql-test/suite/innodb/t/mdev_37974.opt new file mode 100644 index 0000000000000..300901a146bbd --- /dev/null +++ b/mysql-test/suite/innodb/t/mdev_37974.opt @@ -0,0 +1,2 @@ +--innodb-deadlock-detect=OFF +--innodb-lock-wait-timeout=2 diff --git a/mysql-test/suite/innodb/t/mdev_37974.test b/mysql-test/suite/innodb/t/mdev_37974.test new file mode 100644 index 0000000000000..859fd514fed89 --- /dev/null +++ b/mysql-test/suite/innodb/t/mdev_37974.test @@ -0,0 +1,415 @@ +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_sequence.inc +--source include/count_sessions.inc + +--echo # +--echo # MDEV-37974 Improper deadlock with DELETE/DELETE/INSERT +--echo # +--echo # Test that TX1, which already holds X locks on child rows from a DELETE, +--echo # does not incorrectly enter lock_wait() when INSERTing a new child row. +--echo # With innodb_deadlock_detect=OFF, if TX1 enters lock_wait() it will get +--echo # ER_LOCK_WAIT_TIMEOUT instead of ER_LOCK_DEADLOCK, cleanly proving the +--echo # root cause: lock conflict detection treats TX2's WAITING lock as a +--echo # blocking conflict. +--echo # +--echo # REPEATABLE READ: TX1's DELETE acquires X next-key locks (LOCK_ORDINARY) +--echo # on child records, covering both the record and the gap before it. +--echo # lock_rec_insert_check_and_lock() should recognize TX1's existing gap- +--echo # covering lock as sufficient and skip the INSERT_INTENTION conflict check. +--echo # + +CREATE TABLE parent ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; + +CREATE TABLE child ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + parent_id BIGINT NOT NULL, + CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) + ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; + +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id) VALUES (1), (2), (3); + +--connect(con1, localhost, root,,) + +--echo # +--echo # TX1: Delete all child rows. Acquires X next-key locks on child records +--echo # with parent_id 1, 2, 3 in both PRIMARY and fk_parent indexes. +--echo # +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); + +--echo # +--echo # TX2: Delete child rows with parent_id 2, 3. +--echo # TX2 will block in lock_wait() waiting for TX1's X locks. +--echo # +--connection default +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +--send DELETE FROM child WHERE parent_id IN (2, 3) + +--echo # +--echo # TX1: Wait for TX2 to enter lock_wait(), then INSERT. +--echo # TX1 already holds X next-key locks covering parent_id=1 in the child +--echo # table's fk_parent index. The INSERT's insert-intention gap lock on the +--echo # successor record should be recognized as redundant because TX1's +--echo # existing next-key lock already covers the gap. +--echo # +--connection con1 +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id) VALUES (1); +COMMIT; + +--echo # +--echo # TX2: Reap. TX1 committed and released locks, so TX2 can proceed. +--echo # The rows TX2 wanted to delete were already deleted by TX1. +--echo # +--connection default +--reap +COMMIT; + +--disconnect con1 +SET DEBUG_SYNC='RESET'; + +SELECT * FROM child; + +DROP TABLE child, parent; + +--echo # +--echo # Test 2: TX2 uses SELECT ... FOR UPDATE (same X next-key locks as DELETE in RR) +--echo # TX1's INSERT should still succeed without entering lock_wait(). +--echo # + +CREATE TABLE parent ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; + +CREATE TABLE child ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + parent_id BIGINT NOT NULL, + CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) + ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; + +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id) VALUES (1), (2), (3); + +--connect(con1, localhost, root,,) + +--echo # +--echo # TX1: Delete all child rows. +--echo # +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); + +--echo # +--echo # TX2: SELECT ... FOR UPDATE on child rows with parent_id 2, 3. +--echo # TX2 will block in lock_wait() waiting for TX1's X locks. +--echo # +--connection default +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +--send SELECT * FROM child WHERE parent_id IN (2, 3) FOR UPDATE + +--echo # +--echo # TX1: Wait for TX2 to enter lock_wait(), then INSERT. +--echo # +--connection con1 +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id) VALUES (1); +COMMIT; + +--echo # +--echo # TX2: Reap. TX1 committed, TX2 proceeds. Rows were deleted by TX1. +--echo # +--connection default +--reap +COMMIT; + +--disconnect con1 +SET DEBUG_SYNC='RESET'; + +SELECT * FROM child; + +DROP TABLE child, parent; + +--echo # +--echo # Test 3: TX2 uses UPDATE (same X next-key locks as DELETE in RR) +--echo # TX1's INSERT should still succeed without entering lock_wait(). +--echo # + +CREATE TABLE parent ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; + +CREATE TABLE child ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + parent_id BIGINT NOT NULL, + val INT NOT NULL DEFAULT 0, + CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) + ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; + +INSERT INTO parent (id) VALUES (1), (2), (3); +INSERT INTO child (parent_id, val) VALUES (1, 10), (2, 20), (3, 30); + +--connect(con1, localhost, root,,) + +--echo # +--echo # TX1: Delete all child rows. +--echo # +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (1, 2, 3); + +--echo # +--echo # TX2: UPDATE child rows with parent_id 2, 3. +--echo # TX2 will block in lock_wait() waiting for TX1's X locks. +--echo # +--connection default +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +--send UPDATE child SET val = val + 1 WHERE parent_id IN (2, 3) + +--echo # +--echo # TX1: Wait for TX2 to enter lock_wait(), then INSERT. +--echo # +--connection con1 +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +INSERT INTO child (parent_id, val) VALUES (1, 100); +COMMIT; + +--echo # +--echo # TX2: Reap. TX1 committed, TX2 proceeds. 0 rows affected (deleted by TX1). +--echo # +--connection default +--reap +COMMIT; + +--disconnect con1 +SET DEBUG_SYNC='RESET'; + +SELECT * FROM child; + +DROP TABLE child, parent; + +--echo # +--echo # Test 4: Cross-page (infimum) predecessor -- INSERT lands at the start +--echo # of a non-first secondary index page, triggering the cross-page code +--echo # path that walks TX2's trx_locks to verify TX2 has no locks on the +--echo # previous page. +--echo # +--echo # Uses a secondary index with large records (~762 bytes each, with a +--echo # pad(750) BLOB prefix) so each 16KB page holds ~21 records. With 42 +--echo # rows, the sorted rebuild produces 2 leaf pages. Records near the page +--echo # boundary are deleted and purged, creating a stale node pointer in the +--echo # B-tree non-leaf page: btr_cur_optimistic_delete (used by purge for +--echo # secondary index leaf records) does NOT update the parent node pointer +--echo # when removing the leftmost record. TX1's INSERT routes to the second +--echo # page via the stale pointer, positioning the cursor at infimum +--echo # (predecessor is on the previous page). +--echo # + +CREATE TABLE t4 ( + pk INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + k INT NOT NULL, + pad BLOB NOT NULL, + KEY idx_k (k, pad(750)) +) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; + +--disable_query_log +INSERT INTO t4 (k, pad) SELECT seq, REPEAT('a', 8192) FROM seq_1_to_42; +--enable_query_log + +ALTER TABLE t4 FORCE; + +--echo # +--echo # Delete records spanning the likely page boundary and wait for purge. +--echo # The page boundary is around k=19-22 depending on exact record overhead. +--echo # After purge, page 2 starts with k=25. The stale node pointer still +--echo # references the original first key on page 2 (some k <= 22). +--echo # +DELETE FROM t4 WHERE k BETWEEN 19 AND 24; +--source include/wait_all_purged.inc + +--echo # +--echo # Verify idx_k secondary index has exactly 2 leaf pages after purge, +--echo # each with 18 records (the non-leaf root page has <= 2 records and +--echo # is excluded by the NUMBER_RECORDS > 2 filter). +--echo # +SELECT COUNT(*) AS idx_k_leaf_pages, + GROUP_CONCAT(NUMBER_RECORDS ORDER BY PAGE_NUMBER) AS records_per_page +FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE +WHERE SPACE = (SELECT SPACE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES + WHERE NAME = 'test/t4') +AND INDEX_NAME = 'idx_k' +AND PAGE_TYPE = 'INDEX' +AND NUMBER_RECORDS > 2; + +--connect(con1, localhost, root,,) + +--echo # +--echo # TX1: Delete k=16 (page 1) and k=25 (first remaining record on page 2). +--echo # +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM t4 WHERE k IN (16, 25); + +--echo # +--echo # TX2: Point-lookup DELETE on k=25, k=26 (both on page 2). +--echo # TX2 hits k=25 first in the idx_k secondary index, blocks on TX1. +--echo # TX2 has NO locks on page 1 -- only a waiting lock on page 2. +--echo # +--connection default +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +--send DELETE FROM t4 WHERE k IN (25, 26) + +--echo # +--echo # TX1: Wait for TX2 to enter lock_wait(), then verify the lock layout. +--echo # +--connection con1 +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; + +--echo # +--echo # Verify: lock wait involves exactly 2 locks on idx_k RECORD type. +--echo # +SELECT lock_index, lock_mode, lock_type, COUNT(*) AS lock_count +FROM INFORMATION_SCHEMA.INNODB_LOCKS +WHERE lock_table LIKE '%t4%' +GROUP BY lock_index, lock_mode, lock_type; + +--echo # +--echo # Verify: the lock wait page is the SECOND idx_k leaf page (not the +--echo # first). This proves TX1's lock on k=16 (first page) and the wait +--echo # on k=25 (second page) are on different pages -- the cross-page +--echo # scenario. +--echo # +SELECT (SELECT MIN(lock_page) + FROM INFORMATION_SCHEMA.INNODB_LOCKS + WHERE lock_table LIKE '%t4%' AND lock_index = 'idx_k') + <> + (SELECT MIN(PAGE_NUMBER) + FROM INFORMATION_SCHEMA.INNODB_BUFFER_PAGE + WHERE SPACE = (SELECT SPACE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES + WHERE NAME = 'test/t4') + AND INDEX_NAME = 'idx_k' AND PAGE_TYPE = 'INDEX' + AND NUMBER_RECORDS > 2) + AS lock_is_not_on_first_leaf_page; + +--echo # +--echo # TX1: INSERT k=23 into the gap between pages. +--echo # B-tree descent routes to page 2 via the stale node pointer +--echo # (which still references the purged key <= k=22). +--echo # On page 2, the first record is k=25. Since k=23 < k=25, the B-tree +--echo # cursor positions at infimum (pred_heap_no == PAGE_HEAP_NO_INFIMUM). +--echo # +--echo # The cross-page code path fires: +--echo # 1. prev_page_no != FIL_NULL (page 1 exists) +--echo # 2. Walk TX2's trx_locks: TX2 has no lock on page 1 -> pred_ok = true +--echo # 3. Scan for granted conflicting locks on k=25: none -> skip lock_wait +--echo # INSERT succeeds without entering lock_wait. +--echo # +INSERT INTO t4 (k, pad) VALUES (23, REPEAT('a', 8192)); +COMMIT; + +--echo # +--echo # TX2: Reap. TX1 committed, TX2 proceeds. k=25 already deleted by TX1. +--echo # +--connection default +--reap +COMMIT; + +--disconnect con1 +SET DEBUG_SYNC='RESET'; + +SELECT k FROM t4 WHERE k BETWEEN 14 AND 28 ORDER BY k; + +DROP TABLE t4; + +--echo # +--echo # Test 5: Predecessor check prevents phantom — TX2 range scan locks predecessor +--echo # +--echo # TX2 does a range scan (BETWEEN) that locks the predecessor record before +--echo # blocking on the successor. The predecessor check should detect TX2's +--echo # granted lock on the predecessor and correctly BLOCK the optimization, +--echo # forcing TX1's INSERT to enter lock_wait(). +--echo # +--echo # This is a negative test: the INSERT must NOT skip lock_wait(). +--echo # With a 1-second lock_wait_timeout for TX1, the INSERT gets +--echo # ER_LOCK_WAIT_TIMEOUT, proving the predecessor check works. +--echo # + +CREATE TABLE parent ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY +) ENGINE=InnoDB; + +CREATE TABLE child ( + id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, + parent_id BIGINT NOT NULL, + CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) + ON DELETE CASCADE ON UPDATE RESTRICT +) ENGINE=InnoDB; + +INSERT INTO parent (id) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +INSERT INTO child (parent_id) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); + +--connect(con1, localhost, root,,) + +--echo # +--echo # TX1: Delete child rows with parent_id 5 and 6. +--echo # +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +DELETE FROM child WHERE parent_id IN (5, 6); + +--echo # +--echo # TX2: Range scan DELETE covering parent_id 4 through 6. +--echo # TX2 scans the fk_parent secondary index sequentially: +--echo # 1. Locks parent_id=4 -> GRANTED (no conflict) +--echo # 2. Locks parent_id=5 -> WAITING (TX1 holds this lock) +--echo # TX2 now has a GRANTED lock on parent_id=4 (the predecessor). +--echo # +--connection default +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +SET DEBUG_SYNC='lock_wait_start SIGNAL tx2_waiting'; +BEGIN; +--send DELETE FROM child WHERE parent_id BETWEEN 4 AND 6 + +--echo # +--echo # TX1: INSERT parent_id=4 (a second row with the same FK value). +--echo # In the fk_parent index, the new record goes between +--echo # (parent_id=4, old_id) and (parent_id=5, old_id). +--echo # The predecessor check detects TX2's GRANTED lock on parent_id=4 +--echo # and correctly blocks the optimization. TX1 enters lock_wait() +--echo # and gets ER_LOCK_WAIT_TIMEOUT, proving the predecessor check works. +--echo # +--connection con1 +SET DEBUG_SYNC='now WAIT_FOR tx2_waiting'; +SET SESSION innodb_lock_wait_timeout=1; +--error ER_LOCK_WAIT_TIMEOUT +INSERT INTO child (parent_id) VALUES (4); +ROLLBACK; + +--echo # +--echo # TX2: TX1 rolled back, TX2 proceeds and deletes parent_id 4, 5, 6. +--echo # +--connection default +--reap +COMMIT; + +--disconnect con1 +SET DEBUG_SYNC='RESET'; + +SELECT * FROM child ORDER BY parent_id; + +DROP TABLE child, parent; + +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/versioning/r/update.result b/mysql-test/suite/versioning/r/update.result index c7b8d922e5833..094e193cd7c08 100644 --- a/mysql-test/suite/versioning/r/update.result +++ b/mysql-test/suite/versioning/r/update.result @@ -286,7 +286,6 @@ connection default; update t1 set b = 'foo'; connection con1; update t1 set a = 'bar'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction disconnect con1; connection default; drop table t1; diff --git a/mysql-test/suite/versioning/t/update.test b/mysql-test/suite/versioning/t/update.test index e3b07bfe47a48..4421dc1509a28 100644 --- a/mysql-test/suite/versioning/t/update.test +++ b/mysql-test/suite/versioning/t/update.test @@ -201,7 +201,6 @@ send update t1 set b = 'foo'; connection con1; let $wait_condition= select count(*) from information_schema.innodb_lock_waits; source include/wait_condition.inc; -error ER_LOCK_DEADLOCK; update t1 set a = 'bar'; disconnect con1; connection default; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 40a09b3a79090..a1162ee50812e 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -5791,6 +5791,12 @@ lock_rec_insert_check_and_lock( if (index->is_spatial()) return DB_SUCCESS; + DBUG_LOG("ib_lock", + "insert_check trx " << ib::hex(trx->id) + << " index " << index->name() + << " page " << id + << " heap_no " << heap_no); + /* If another transaction has an explicit lock request which locks the gap, waiting or granted, on the successor, the insert has to wait. @@ -5806,10 +5812,138 @@ lock_rec_insert_check_and_lock( g.cell(), id, heap_no, trx)) { - trx->mutex_lock(); - err= lock_rec_enqueue_waiting(c_lock, type_mode, id, block->page.frame, - heap_no, index, thr, nullptr); - trx->mutex_unlock(); + const ulint pred_heap_no= comp + ? rec_get_heap_no_new(rec) : rec_get_heap_no_old(rec); + + /* MDEV-37974: If the first conflicting lock is WAITING and + we hold a granted X lock on the successor record, the + waiting lock is necessarily blocked behind our lock in the + queue and can never be granted while our lock exists. + + To prevent phantoms, we must also verify that the + waiting transaction (TX2) has not already scanned through + the gap from the predecessor side. We check this by + verifying TX2 has no GRANTED lock on the predecessor: + - Same-page: TX2 has no granted lock on pred_heap_no. + - Cross-page (infimum): TX2 has no locks on the previous + page, meaning TX2 entered via B-tree descent directly + to this page and never traversed the gap. + + We check TX2's locks (not TX1's) because TX1 may only + hold implicit locks on secondary index records (from + delete-marking via a clustered index scan), which have + no explicit lock_t struct. TX2, however, always acquires + explicit locks during its scan via sel_set_rec_lock(). + + Additionally, we verify that no other transaction holds a + GRANTED lock conflicting with our INSERT_INTENTION (such + locks can arise from lock inheritance during purge). */ + if (c_lock->is_waiting() && + lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + g.cell(), id, heap_no, trx)) + { + bool pred_ok; + + if (pred_heap_no != PAGE_HEAP_NO_INFIMUM) + { + /* Same-page case: check that TX2 has no + GRANTED lock on the predecessor record. + If TX2 has no lock there, TX2 has not scanned + through the gap. Both predecessor and + successor are on this page, covered by the + same LockGuard -- no additional latching. */ + pred_ok= !lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP, + g.cell(), id, + pred_heap_no, + c_lock->trx); + } + else + { + /* Cross-page case: predecessor is the page + infimum. Walk TX2's trx_locks to check whether + TX2 has any lock on the previous page. If not, + TX2 never scanned through it and no phantom is + possible. + + Acquiring wait_trx->mutex is safe: we hold + lock_sys.latch (shared) + hash cell latch. + wait_trx is sleeping in lock_wait() holding + only wait_mutex. Latch order is + lock_sys.latch -> trx->mutex (established + precedent: lock_rec_convert_impl_to_expl_for_trx). */ + const uint32_t prev_page_no= + btr_page_get_prev(block->page.frame); + if (prev_page_no == FIL_NULL) + { + /* Leftmost page in the index. There is + no previous page, so TX2 could not have + scanned through it. TX2 must have entered + this page via B-tree descent. */ + pred_ok= true; + } + else + { + const page_id_t prev_id(id.space(), + prev_page_no); + trx_t *wait_trx= c_lock->trx; + pred_ok= true; + + wait_trx->mutex_lock(); + for (const lock_t *l= UT_LIST_GET_FIRST( + wait_trx->lock.trx_locks); + l; + l= UT_LIST_GET_NEXT(trx_locks, l)) + { + if (!l->is_table() && + l->un_member.rec_lock.page_id + == prev_id) + { + pred_ok= false; + break; + } + } + wait_trx->mutex_unlock(); + } + } + + if (pred_ok) + { + const bool is_supremum= + (heap_no == PAGE_HEAP_NO_SUPREMUM); + c_lock= nullptr; + for (lock_t *l= lock_sys_t::get_first(g.cell(), id, + heap_no); + l; l= lock_rec_get_next(heap_no, l)) + { + if (l->trx != trx && !l->is_waiting() && + lock_rec_has_to_wait(trx, type_mode, l, + is_supremum)) + { + c_lock= l; + break; + } + } + } + } + + if (c_lock) + { + DBUG_LOG("ib_lock", + "insert_check conflict trx " << ib::hex(trx->id) + << " blocker " << *c_lock + << " blocker->trx " << ib::hex(c_lock->trx->id)); + trx->mutex_lock(); + err= lock_rec_enqueue_waiting(c_lock, type_mode, id, + block->page.frame, + heap_no, index, thr, nullptr); + trx->mutex_unlock(); + } + else + { + DBUG_LOG("ib_lock", + "insert_check skip trx " << ib::hex(trx->id) + << " all conflicting locks are waiting"); + } } } }