@@ -32,6 +32,13 @@ impl Truncate {
3232 pub fn new ( width : u32 ) -> Self {
3333 Self { width }
3434 }
35+
36+ fn truncate_str_by_char ( s : & str , max_chars : usize ) -> & str {
37+ match s. char_indices ( ) . nth ( max_chars) {
38+ None => s,
39+ Some ( ( idx, _) ) => & s[ ..idx] ,
40+ }
41+ }
3542}
3643
3744impl TransformFunction for Truncate {
@@ -79,7 +86,7 @@ impl TransformFunction for Truncate {
7986 . downcast_ref :: < arrow_array:: StringArray > ( )
8087 . unwrap ( )
8188 . iter ( )
82- . map ( |v| v. map ( |v| & v [ .. len] ) ) ,
89+ . map ( |v| v. map ( |v| Self :: truncate_str_by_char ( v , len) ) ) ,
8390 ) ;
8491 Ok ( Arc :: new ( res) )
8592 }
@@ -91,7 +98,7 @@ impl TransformFunction for Truncate {
9198 . downcast_ref :: < arrow_array:: LargeStringArray > ( )
9299 . unwrap ( )
93100 . iter ( )
94- . map ( |v| v. map ( |v| & v [ .. len] ) ) ,
101+ . map ( |v| v. map ( |v| Self :: truncate_str_by_char ( v , len) ) ) ,
95102 ) ;
96103 Ok ( Arc :: new ( res) )
97104 }
@@ -112,7 +119,7 @@ mod test {
112119
113120 // Test case ref from: https://iceberg.apache.org/spec/#truncate-transform-details
114121 #[ test]
115- fn test_truncate ( ) {
122+ fn test_truncate_simple ( ) {
116123 // test truncate int
117124 let input = Arc :: new ( Int32Array :: from ( vec ! [ 1 , -1 ] ) ) ;
118125 let res = super :: Truncate :: new ( 10 ) . transform ( input) . unwrap ( ) ;
@@ -174,4 +181,37 @@ mod test {
174181 "ice"
175182 ) ;
176183 }
184+
185+ #[ test]
186+ fn test_string_truncate ( ) {
187+ let test1 = "イロハニホヘト" ;
188+ let test1_2_expected = "イロ" ;
189+ assert_eq ! (
190+ super :: Truncate :: truncate_str_by_char( test1, 2 ) ,
191+ test1_2_expected
192+ ) ;
193+
194+ let test1_3_expected = "イロハ" ;
195+ assert_eq ! (
196+ super :: Truncate :: truncate_str_by_char( test1, 3 ) ,
197+ test1_3_expected
198+ ) ;
199+
200+ let test2 = "щщаεはчωいにπάほхεろへσκζ" ;
201+ let test2_7_expected = "щщаεはчω" ;
202+ assert_eq ! (
203+ super :: Truncate :: truncate_str_by_char( test2, 7 ) ,
204+ test2_7_expected
205+ ) ;
206+
207+ let test3 = "\u{FFFF} \u{FFFF} " ;
208+ assert_eq ! ( super :: Truncate :: truncate_str_by_char( test3, 2 ) , test3) ;
209+
210+ let test4 = "\u{10000} \u{10000} " ;
211+ let test4_1_expected = "\u{10000} " ;
212+ assert_eq ! (
213+ super :: Truncate :: truncate_str_by_char( test4, 1 ) ,
214+ test4_1_expected
215+ ) ;
216+ }
177217}
0 commit comments