From 8c23920a528f0f7b2cf9bb8645b9b9e365f9f20a Mon Sep 17 00:00:00 2001
From: FidelSch <honoratofidel@protonmail.com>
Date: Tue, 20 Jan 2026 17:21:55 -0300
Subject: [PATCH 1/3] fold: read input in chunks

This allows processing input that does not contain newlines.
---
 src/uu/fold/src/fold.rs | 223 ++++++++++++++++++++++++++++------------
 1 file changed, 159 insertions(+), 64 deletions(-)

diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs
index 2eb97933180..a232ddb70d9 100644
--- a/src/uu/fold/src/fold.rs
+++ b/src/uu/fold/src/fold.rs
@@ -7,7 +7,7 @@
 
 use clap::{Arg, ArgAction, Command};
 use std::fs::File;
-use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
+use std::io::{BufReader, BufWriter, Read, Write, stdin, stdout};
 use std::path::Path;
 use unicode_width::UnicodeWidthChar;
 use uucore::display::Quotable;
@@ -19,6 +19,7 @@ const TAB_WIDTH: usize = 8;
 const NL: u8 = b'\n';
 const CR: u8 = b'\r';
 const TAB: u8 = b'\t';
+const READ_CHUNK_SIZE: usize = 8192; // 8KB chunks
 
 mod options {
     pub const BYTES: &str = "bytes";
@@ -186,65 +187,105 @@ fn fold_file_bytewise<T: Read, W: Write>(
     width: usize,
     output: &mut W,
 ) -> UResult<()> {
-    let mut line = Vec::new();
+    let mut buffer = Vec::new();
+    let mut chunk_buf = vec![0u8; READ_CHUNK_SIZE];
 
     loop {
-        if file
-            .read_until(NL, &mut line)
-            .map_err_context(|| translate!("fold-error-readline"))?
-            == 0
-        {
+        let bytes_read = file
+            .read(&mut chunk_buf)
+            .map_err_context(|| translate!("fold-error-readline"))?;
+
+        if bytes_read == 0 {
+            // EOF: process any remaining data as a line without newline
+            if !buffer.is_empty() {
+                process_bytewise_line(&buffer, spaces, width, output)?;
+            }
             break;
         }
 
-        if line == [NL] {
-            output.write_all(&[NL])?;
-            line.truncate(0);
-            continue;
+        // Append chunk to buffer
+        buffer.extend_from_slice(&chunk_buf[..bytes_read]);
+
+        // Process complete lines one at a time
+        let mut start = 0;
+        for i in 0..buffer.len() {
+            if buffer[i] == NL {
+                // Process line including the newline
+                process_bytewise_line(&buffer[start..=i], spaces, width, output)?;
+                start = i + 1;
+            }
         }
 
-        let len = line.len();
-        let mut i = 0;
-
-        while i < len {
-            let width = if len - i >= width { width } else { len - i };
-            let slice = {
-                let slice = &line[i..i + width];
-                if spaces && i + width < len {
-                    match slice
-                        .iter()
-                        .enumerate()
-                        .rev()
-                        .find(|(_, c)| c.is_ascii_whitespace() && **c != CR)
-                    {
-                        Some((m, _)) => &slice[..=m],
-                        None => slice,
-                    }
-                } else {
-                    slice
-                }
-            };
+        // Remove processed data from buffer
+        if start > 0 {
+            buffer.drain(..start);
+        }
 
-            // Don't duplicate trailing newlines: if the slice is "\n", the
-            // previous iteration folded just before the end of the line and
-            // has already printed this newline.
-            if slice == [NL] {
-                break;
-            }
+        // If buffer is getting too large without a newline, process what we can
+        // and keep any remainder to maintain proper line handling
+        if buffer.len() >= READ_CHUNK_SIZE * 2 {
+            // Process the data but keep any partial line state
+            let process_len = buffer.len();
+            process_bytewise_line(&buffer[..process_len], spaces, width, output)?;
+            buffer.clear();
+        }
+    }
 
-            i += slice.len();
+    Ok(())
+}
 
-            let at_eol = i >= len;
+fn process_bytewise_line<W: Write>(
+    line: &[u8],
+    spaces: bool,
+    width: usize,
+    output: &mut W,
+) -> UResult<()> {
+    if line.is_empty() {
+        return Ok(());
+    }
+
+    if line == [NL] {
+        output.write_all(&[NL])?;
+        return Ok(());
+    }
 
-            if at_eol {
-                output.write_all(slice)?;
+    let len = line.len();
+    let mut i = 0;
+
+    while i < len {
+        let segment_width = if len - i >= width { width } else { len - i };
+        let slice = {
+            let slice = &line[i..i + segment_width];
+            if spaces && i + segment_width < len {
+                match slice
+                    .iter()
+                    .enumerate()
+                    .rev()
+                    .find(|(_, c)| c.is_ascii_whitespace() && **c != CR)
+                {
+                    Some((m, _)) => &slice[..=m],
+                    None => slice,
+                }
             } else {
-                output.write_all(slice)?;
-                output.write_all(&[NL])?;
+                slice
             }
+        };
+
+        // Don't duplicate trailing newlines: if the slice is "\n", the
+        // previous iteration folded just before the end of the line and
+        // has already printed this newline.
+        if slice == [NL] {
+            break;
         }
 
-        line.truncate(0);
+        i += slice.len();
+
+        let at_eol = i >= len;
+
+        output.write_all(slice)?;
+        if !at_eol {
+            output.write_all(&[NL])?;
+        }
     }
 
     Ok(())
@@ -572,36 +613,90 @@ fn fold_file<T: Read, W: Write>(
     mode: WidthMode,
     writer: &mut W,
 ) -> UResult<()> {
-    let mut line = Vec::new();
+    let mut buffer = Vec::new();
     let mut output = Vec::new();
     let mut col_count = 0;
     let mut last_space = None;
+    let mut chunk_buf = vec![0u8; READ_CHUNK_SIZE];
 
     loop {
-        if file
-            .read_until(NL, &mut line)
-            .map_err_context(|| translate!("fold-error-readline"))?
-            == 0
-        {
+        let bytes_read = file
+            .read(&mut chunk_buf)
+            .map_err_context(|| translate!("fold-error-readline"))?;
+
+        if bytes_read == 0 {
+            // EOF: process any remaining data as a line without newline
+            if !buffer.is_empty() {
+                let mut ctx = FoldContext {
+                    spaces,
+                    width,
+                    mode,
+                    writer,
+                    output: &mut output,
+                    col_count: &mut col_count,
+                    last_space: &mut last_space,
+                };
+
+                match std::str::from_utf8(&buffer) {
+                    Ok(s) => process_utf8_line(s, &mut ctx)?,
+                    Err(_) => process_non_utf8_line(&buffer, &mut ctx)?,
+                }
+            }
             break;
         }
 
-        let mut ctx = FoldContext {
-            spaces,
-            width,
-            mode,
-            writer,
-            output: &mut output,
-            col_count: &mut col_count,
-            last_space: &mut last_space,
-        };
+        buffer.extend_from_slice(&chunk_buf[..bytes_read]);
+
+        // Process complete lines one at a time
+        let mut start = 0;
+        for i in 0..buffer.len() {
+            if buffer[i] == NL {
+                // Process line including the newline
+                let line_data = &buffer[start..=i];
+
+                let mut ctx = FoldContext {
+                    spaces,
+                    width,
+                    mode,
+                    writer,
+                    output: &mut output,
+                    col_count: &mut col_count,
+                    last_space: &mut last_space,
+                };
 
-        match std::str::from_utf8(&line) {
-            Ok(s) => process_utf8_line(s, &mut ctx)?,
-            Err(_) => process_non_utf8_line(&line, &mut ctx)?,
+                match std::str::from_utf8(line_data) {
+                    Ok(s) => process_utf8_line(s, &mut ctx)?,
+                    Err(_) => process_non_utf8_line(line_data, &mut ctx)?,
+                }
+
+                start = i + 1;
+            }
         }
 
-        line.clear();
+        // Remove processed data from buffer
+        if start > 0 {
+            buffer.drain(..start);
+        }
+
+        // If buffer is getting too large without a newline, process it anyway
+        if buffer.len() >= READ_CHUNK_SIZE * 2 {
+            let mut ctx = FoldContext {
+                spaces,
+                width,
+                mode,
+                writer,
+                output: &mut output,
+                col_count: &mut col_count,
+                last_space: &mut last_space,
+            };
+
+            match std::str::from_utf8(&buffer) {
+                Ok(s) => process_utf8_line(s, &mut ctx)?,
+                Err(_) => process_non_utf8_line(&buffer, &mut ctx)?,
+            }
+
+            buffer.clear();
+        }
     }
 
     if !output.is_empty() {

From 98da70f7631c6e2326efea6b8bf0fcdc767f10da Mon Sep 17 00:00:00 2001
From: FidelSch <honoratofidel@protonmail.com>
Date: Wed, 21 Jan 2026 09:57:25 -0300
Subject: [PATCH 2/3] fold: handle multi-byte characters at chunk borders

---
 src/uu/fold/src/fold.rs | 75 +++++++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs
index a232ddb70d9..6e6d92a6de1 100644
--- a/src/uu/fold/src/fold.rs
+++ b/src/uu/fold/src/fold.rs
@@ -295,6 +295,49 @@ fn next_tab_stop(col_count: usize) -> usize {
     col_count + TAB_WIDTH - col_count % TAB_WIDTH
 }
 
+/// Find the start of an incomplete UTF-8 sequence at the end of a buffer.
+/// Returns the index where the incomplete sequence starts, or buffer.len() if complete.
+fn find_incomplete_utf8_start(buffer: &[u8]) -> usize {
+    let len = buffer.len();
+
+    // Check last 3 bytes looking for an incomplete UTF-8 sequence
+    // UTF-8 continuation bytes start with 0b10xxxxxx
+    // UTF-8 start bytes: 0b0xxxxxxx (1-byte), 0b110xxxxx (2-byte), 0b1110xxxx (3-byte), 0b11110xxx (4-byte)
+
+    for i in (len.saturating_sub(3)..len).rev() {
+        let byte = buffer[i];
+
+        // Check if this is a UTF-8 start byte
+        if byte & 0b10000000 == 0 {
+            // Single-byte character (ASCII), complete
+            return len;
+        } else if byte & 0b11000000 == 0b11000000 {
+            // This is a UTF-8 start byte
+            let expected_len = if byte & 0b11100000 == 0b11000000 {
+                2
+            } else if byte & 0b11110000 == 0b11100000 {
+                3
+            } else if byte & 0b11111000 == 0b11110000 {
+                4
+            } else {
+                // Invalid UTF-8 start byte
+                return len;
+            };
+
+            let actual_len = len - i;
+            if actual_len < expected_len {
+                // Incomplete sequence found
+                return i;
+            } else {
+                // Complete sequence
+                return len;
+            }
+        }
+    }
+
+    len
+}
+
 fn compute_col_count(buffer: &[u8], mode: WidthMode) -> usize {
     match mode {
         WidthMode::Characters => std::str::from_utf8(buffer)
@@ -680,22 +723,26 @@ fn fold_file<T: Read, W: Write>(
 
         // If buffer is getting too large without a newline, process it anyway
         if buffer.len() >= READ_CHUNK_SIZE * 2 {
-            let mut ctx = FoldContext {
-                spaces,
-                width,
-                mode,
-                writer,
-                output: &mut output,
-                col_count: &mut col_count,
-                last_space: &mut last_space,
-            };
+            let process_up_to = find_incomplete_utf8_start(&buffer);
 
-            match std::str::from_utf8(&buffer) {
-                Ok(s) => process_utf8_line(s, &mut ctx)?,
-                Err(_) => process_non_utf8_line(&buffer, &mut ctx)?,
-            }
+            if process_up_to > 0 {
+                let mut ctx = FoldContext {
+                    spaces,
+                    width,
+                    mode,
+                    writer,
+                    output: &mut output,
+                    col_count: &mut col_count,
+                    last_space: &mut last_space,
+                };
 
-            buffer.clear();
+                match std::str::from_utf8(&buffer[..process_up_to]) {
+                    Ok(s) => process_utf8_line(s, &mut ctx)?,
+                    Err(_) => process_non_utf8_line(&buffer[..process_up_to], &mut ctx)?,
+                }
+
+                buffer.drain(..process_up_to);
+            }
         }
     }
 

From 0a1ca4794f4088ca60a1b6a5c68f35afd9b02286 Mon Sep 17 00:00:00 2001
From: FidelSch <honoratofidel@protonmail.com>
Date: Wed, 21 Jan 2026 09:57:25 -0300
Subject: [PATCH 3/3] fold: handle multi-byte characters at chunk borders

---
 src/uu/fold/src/fold.rs | 74 +++++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 14 deletions(-)

diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs
index a232ddb70d9..d12188be98b 100644
--- a/src/uu/fold/src/fold.rs
+++ b/src/uu/fold/src/fold.rs
@@ -295,6 +295,48 @@ fn next_tab_stop(col_count: usize) -> usize {
     col_count + TAB_WIDTH - col_count % TAB_WIDTH
 }
 
+/// Find the start of an incomplete UTF-8 sequence at the end of a buffer.
+/// Returns the index where the incomplete sequence starts, or buffer.len() if complete.
+fn find_incomplete_utf8_start(buffer: &[u8]) -> usize {
+    let len = buffer.len();
+
+    // Check last 3 bytes looking for an incomplete UTF-8 sequence
+    // UTF-8 continuation bytes start with 0b10xxxxxx
+    // UTF-8 start bytes: 0b0xxxxxxx (1-byte), 0b110xxxxx (2-byte), 0b1110xxxx (3-byte), 0b11110xxx (4-byte)
+
+    for i in (len.saturating_sub(3)..len).rev() {
+        let byte = buffer[i];
+
+        // Check if this is a UTF-8 start byte
+        if byte & 0b10000000 == 0 {
+            // Single-byte character (ASCII), complete
+            return len;
+        } else if byte & 0b11000000 == 0b11000000 {
+            // This is a UTF-8 start byte
+            let expected_len = if byte & 0b11100000 == 0b11000000 {
+                2
+            } else if byte & 0b11110000 == 0b11100000 {
+                3
+            } else if byte & 0b11111000 == 0b11110000 {
+                4
+            } else {
+                // Invalid UTF-8 start byte
+                return len;
+            };
+
+            let actual_len = len - i;
+            if actual_len < expected_len {
+                // Incomplete sequence found
+                return i;
+            }
+
+            return len;
+        }
+    }
+
+    len
+}
+
 fn compute_col_count(buffer: &[u8], mode: WidthMode) -> usize {
     match mode {
         WidthMode::Characters => std::str::from_utf8(buffer)
@@ -680,22 +722,26 @@ fn fold_file<T: Read, W: Write>(
 
         // If buffer is getting too large without a newline, process it anyway
         if buffer.len() >= READ_CHUNK_SIZE * 2 {
-            let mut ctx = FoldContext {
-                spaces,
-                width,
-                mode,
-                writer,
-                output: &mut output,
-                col_count: &mut col_count,
-                last_space: &mut last_space,
-            };
+            let process_up_to = find_incomplete_utf8_start(&buffer);
 
-            match std::str::from_utf8(&buffer) {
-                Ok(s) => process_utf8_line(s, &mut ctx)?,
-                Err(_) => process_non_utf8_line(&buffer, &mut ctx)?,
-            }
+            if process_up_to > 0 {
+                let mut ctx = FoldContext {
+                    spaces,
+                    width,
+                    mode,
+                    writer,
+                    output: &mut output,
+                    col_count: &mut col_count,
+                    last_space: &mut last_space,
+                };
 
-            buffer.clear();
+                match std::str::from_utf8(&buffer[..process_up_to]) {
+                    Ok(s) => process_utf8_line(s, &mut ctx)?,
+                    Err(_) => process_non_utf8_line(&buffer[..process_up_to], &mut ctx)?,
+                }
+
+                buffer.drain(..process_up_to);
+            }
         }
     }