From 0a34ac365b66ff71bb97fe76366f19627db13961 Mon Sep 17 00:00:00 2001 From: Cocoon-Break <54054995+kuishou68@users.noreply.github.com> Date: Fri, 10 Apr 2026 08:05:55 +0000 Subject: [PATCH] fix(chunking): preserve sentence order in NlpSentenceChunking Using list(set(sens)) destroys sentence order and incorrectly deduplicates. Fix: return the sentences list directly. --- crawl4ai/chunking_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl4ai/chunking_strategy.py b/crawl4ai/chunking_strategy.py index f46cb667c..31f8abdb5 100644 --- a/crawl4ai/chunking_strategy.py +++ b/crawl4ai/chunking_strategy.py @@ -86,7 +86,7 @@ def chunk(self, text: str) -> list: sentences = sent_tokenize(text) sens = [sent.strip() for sent in sentences] - return list(set(sens)) + return sens # Topic-based segmentation using TextTiling