Skip to content

Commit c2a1eda

Browse files
authored
Merge pull request #2086 from bcgov/feature/AB#32007-AddPdfExtractionSupport-Optimization
AB#32007 Apply bounded append memory optimization to PDF text extraction
2 parents 7ec9247 + 88d5f91 commit c2a1eda

1 file changed

Lines changed: 3 additions & 13 deletions

File tree

applications/Unity.GrantManager/src/Unity.GrantManager.Application/AI/TextExtractionService.cs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -131,24 +131,14 @@ private string ExtractTextFromPdfFile(string fileName, byte[] fileContent)
131131

132132
foreach (var pageText in document.GetPages().Select(page => page.Text))
133133
{
134-
if (builder.Length >= MaxExtractedTextLength)
134+
var limitReached = AppendWithLimit(builder, pageText, MaxExtractedTextLength, Environment.NewLine);
135+
if (limitReached)
135136
{
136137
break;
137138
}
138-
139-
if (!string.IsNullOrWhiteSpace(pageText))
140-
{
141-
builder.AppendLine(pageText);
142-
}
143-
}
144-
145-
var text = builder.ToString();
146-
if (text.Length > MaxExtractedTextLength)
147-
{
148-
text = text.Substring(0, MaxExtractedTextLength);
149139
}
150140

151-
return text;
141+
return builder.ToString();
152142
}
153143
catch (Exception ex)
154144
{

0 commit comments

Comments
 (0)