@@ -25,7 +25,8 @@ use std::sync::Arc;
2525
2626use datafusion:: arrow:: array:: { Int32Array , StringArray } ;
2727use datafusion:: prelude:: SessionContext ;
28- use paimon:: { CatalogOptions , FileSystemCatalog , Options } ;
28+ use paimon:: catalog:: Identifier ;
29+ use paimon:: { Catalog , CatalogOptions , FileSystemCatalog , Options } ;
2930use paimon_datafusion:: { PaimonCatalogProvider , PaimonRelationPlanner , PaimonSqlHandler } ;
3031use tempfile:: TempDir ;
3132
@@ -1146,3 +1147,112 @@ async fn test_pk_multiple_value_columns() {
11461147 ]
11471148 ) ;
11481149}
1150+
1151+ // ======================= FirstRow Engine: INSERT OVERWRITE =======================
1152+
1153+ /// INSERT OVERWRITE on a partitioned FirstRow-engine PK table should delete
1154+ /// level-0 files. Before the fix, `skip_level_zero` was applied in the overwrite
1155+ /// scan path, causing level-0 files to survive the overwrite.
1156+ ///
1157+ /// Verifies via TableScan (scan_all_files) that the overwrite correctly produces
1158+ /// delete entries for level-0 files, leaving only the new file per partition.
1159+ #[ tokio:: test]
1160+ async fn test_pk_first_row_insert_overwrite ( ) {
1161+ let ( _tmp, catalog) = create_test_env ( ) ;
1162+ let handler = create_handler ( catalog. clone ( ) ) ;
1163+ handler
1164+ . sql ( "CREATE SCHEMA paimon.test_db" )
1165+ . await
1166+ . expect ( "CREATE SCHEMA failed" ) ;
1167+
1168+ handler
1169+ . sql (
1170+ "CREATE TABLE paimon.test_db.t_fr_ow (
1171+ dt STRING, id INT NOT NULL, name STRING,
1172+ PRIMARY KEY (dt, id)
1173+ ) PARTITIONED BY (dt STRING)
1174+ WITH ('bucket' = '1', 'merge-engine' = 'first-row')" ,
1175+ )
1176+ . await
1177+ . unwrap ( ) ;
1178+
1179+ // First commit: two partitions, creates level-0 files
1180+ handler
1181+ . sql (
1182+ "INSERT INTO paimon.test_db.t_fr_ow VALUES \
1183+ ('2024-01-01', 1, 'alice'), ('2024-01-01', 2, 'bob'), \
1184+ ('2024-01-02', 3, 'carol')",
1185+ )
1186+ . await
1187+ . unwrap ( )
1188+ . collect ( )
1189+ . await
1190+ . unwrap ( ) ;
1191+
1192+ // Verify via scan_all_files: 2 level-0 files (one per partition)
1193+ let table = catalog
1194+ . get_table ( & Identifier :: new ( "test_db" , "t_fr_ow" ) )
1195+ . await
1196+ . unwrap ( ) ;
1197+ let plan = table
1198+ . new_read_builder ( )
1199+ . new_scan ( )
1200+ . with_scan_all_files ( )
1201+ . plan ( )
1202+ . await
1203+ . unwrap ( ) ;
1204+ let file_count: usize = plan. splits ( ) . iter ( ) . map ( |s| s. data_files ( ) . len ( ) ) . sum ( ) ;
1205+ assert_eq ! ( file_count, 2 , "After INSERT: 2 level-0 files (one per partition)" ) ;
1206+
1207+ // INSERT OVERWRITE partition 2024-01-01 — must delete old level-0 file
1208+ handler
1209+ . sql ( "INSERT OVERWRITE paimon.test_db.t_fr_ow VALUES ('2024-01-01', 10, 'new_alice')" )
1210+ . await
1211+ . unwrap ( )
1212+ . collect ( )
1213+ . await
1214+ . unwrap ( ) ;
1215+
1216+ let table = catalog
1217+ . get_table ( & Identifier :: new ( "test_db" , "t_fr_ow" ) )
1218+ . await
1219+ . unwrap ( ) ;
1220+ let plan = table
1221+ . new_read_builder ( )
1222+ . new_scan ( )
1223+ . with_scan_all_files ( )
1224+ . plan ( )
1225+ . await
1226+ . unwrap ( ) ;
1227+ let file_count: usize = plan. splits ( ) . iter ( ) . map ( |s| s. data_files ( ) . len ( ) ) . sum ( ) ;
1228+ assert_eq ! (
1229+ file_count, 2 ,
1230+ "After OVERWRITE: 2 files (1 replaced for 2024-01-01 + 1 unchanged for 2024-01-02)"
1231+ ) ;
1232+
1233+ // Second overwrite on the same partition — no stale files should accumulate
1234+ handler
1235+ . sql ( "INSERT OVERWRITE paimon.test_db.t_fr_ow VALUES ('2024-01-01', 20, 'newer_alice')" )
1236+ . await
1237+ . unwrap ( )
1238+ . collect ( )
1239+ . await
1240+ . unwrap ( ) ;
1241+
1242+ let table = catalog
1243+ . get_table ( & Identifier :: new ( "test_db" , "t_fr_ow" ) )
1244+ . await
1245+ . unwrap ( ) ;
1246+ let plan = table
1247+ . new_read_builder ( )
1248+ . new_scan ( )
1249+ . with_scan_all_files ( )
1250+ . plan ( )
1251+ . await
1252+ . unwrap ( ) ;
1253+ let file_count: usize = plan. splits ( ) . iter ( ) . map ( |s| s. data_files ( ) . len ( ) ) . sum ( ) ;
1254+ assert_eq ! (
1255+ file_count, 2 ,
1256+ "After second OVERWRITE: still 2 files (no stale level-0 files accumulated)"
1257+ ) ;
1258+ }
0 commit comments