From 286e9c833a561e65fbcb6dee6f386fde8a3b0ae4 Mon Sep 17 00:00:00 2001 From: Jacob Smith Date: Thu, 30 Apr 2026 16:13:23 -0700 Subject: [PATCH 1/2] AB#32302 stream attachment summaries for lower memory Replace the byte[]-based attachment extraction pipeline with a streaming one to avoid full in-memory buffering of CHEFS attachments when generating AI summaries. - ISubmissionAppService gains GetChefsFileAttachmentStream returning a ChefsFileAttachmentStream (Stream + content-type) backed by a temp file with FileOptions.DeleteOnClose. Implementation uses HttpCompletionOption.ResponseHeadersRead so the HTTP layer streams the response body directly to disk instead of buffering it. Temp file is cleaned up on copy/open failure. - IResilientHttpRequest.HttpAsync gains an optional HttpCompletionOption parameter (default ResponseContentRead preserves all existing callers). - ITextExtractionService.ExtractTextAsync now takes a Stream. The byte[] overload, the byte[] extractor dictionary, and the per-format byte[] private methods are removed. PDF/Word/Excel/PowerPoint extractors consume the stream directly; the text-file extractor reads incrementally via StreamReader. - AttachmentSummaryRequest drops byte[] FileContent and gains string? ExtractedText. AttachmentSummaryService streams the file, extracts text once, and passes ExtractedText to the AI runtime. - OpenAIRuntimeService.GenerateAttachmentSummaryAsync uses request.ExtractedText directly; the byte[] extraction fallback, sizeBytes payload field, and ITextExtractionService dependency are removed. Build clean. Application.Tests 337/337 and Web.Tests 16/16 pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../AI/Extraction/ITextExtractionService.cs | 3 +- .../AI/Requests/AttachmentSummaryRequest.cs | 6 +- .../AI/Extraction/TextExtractionService.cs | 157 +++++++++--------- .../AI/Operations/AttachmentSummaryService.cs | 29 ++-- .../AI/Runtime/OpenAIRuntimeService.cs | 26 ++- .../Http/IResilientHttpRequest.cs | 1 + .../Http/ResilientHttpRequest.cs | 8 +- .../Intakes/ChefsFileAttachmentStream.cs | 28 ++++ .../Intakes/ISubmissionAppService.cs | 6 + .../Intakes/SubmissionAppService.cs | 81 +++++++++ 10 files changed, 227 insertions(+), 118 deletions(-) create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ChefsFileAttachmentStream.cs diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Extraction/ITextExtractionService.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Extraction/ITextExtractionService.cs index d1c4d9f992..5ad9336026 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Extraction/ITextExtractionService.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Extraction/ITextExtractionService.cs @@ -1,9 +1,10 @@ +using System.IO; using System.Threading.Tasks; namespace Unity.AI.Extraction { public interface ITextExtractionService { - Task ExtractTextAsync(string fileName, byte[] fileContent, string contentType); + Task ExtractTextAsync(string fileName, Stream fileContent, string contentType); } } diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Requests/AttachmentSummaryRequest.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Requests/AttachmentSummaryRequest.cs index e1703a01ff..4731a6e631 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Requests/AttachmentSummaryRequest.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application.Contracts/AI/Requests/AttachmentSummaryRequest.cs @@ -7,12 +7,12 @@ public class AttachmentSummaryRequest [JsonPropertyName("fileName")] public string FileName { get; set; } = string.Empty; - [JsonPropertyName("fileContent")] - public byte[] FileContent { get; set; } = System.Array.Empty(); - [JsonPropertyName("contentType")] public string ContentType { get; set; } = "application/octet-stream"; + [JsonPropertyName("extractedText")] + public string? ExtractedText { get; set; } + [JsonPropertyName("promptVersion")] public string? PromptVersion { get; set; } } diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Extraction/TextExtractionService.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Extraction/TextExtractionService.cs index 8d91759dce..e7490cfacc 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Extraction/TextExtractionService.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Extraction/TextExtractionService.cs @@ -26,30 +26,17 @@ public partial class TextExtractionService : ITextExtractionService, ITransientD private const int MaxDocxTableCellsPerRow = 50; private const int MaxPowerPointSlides = 200; private readonly ILogger _logger; - private readonly Dictionary> _extractorsByExtension; public TextExtractionService(ILogger logger) { _logger = logger; - _extractorsByExtension = new Dictionary>(StringComparer.OrdinalIgnoreCase) - { - [".txt"] = (_, content) => ExtractTextFromTextFile(content), - [".csv"] = (_, content) => ExtractTextFromTextFile(content), - [".json"] = (_, content) => ExtractTextFromTextFile(content), - [".xml"] = (_, content) => ExtractTextFromTextFile(content), - [".pdf"] = ExtractTextFromPdfFile, - [".docx"] = ExtractTextFromWordDocx, - [".xls"] = ExtractTextFromExcelFile, - [".xlsx"] = ExtractTextFromExcelFile, - [".pptx"] = ExtractTextFromPowerPointFile - }; } - public Task ExtractTextAsync(string fileName, byte[] fileContent, string contentType) + public Task ExtractTextAsync(string fileName, Stream fileContent, string contentType) { - if (fileContent == null || fileContent.Length == 0) + if (fileContent == null) { - _logger.LogDebug("File content is empty for {FileName}", fileName); + _logger.LogDebug("File content stream is null for {FileName}", fileName); return Task.FromResult(string.Empty); } @@ -64,48 +51,23 @@ public Task ExtractTextAsync(string fileName, byte[] fileContent, string return Task.FromResult(string.Empty); } - if (_extractorsByExtension.TryGetValue(extension, out var extractor)) + var rawText = extension switch { - var rawText = extractor(fileName, fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); - } - - if (normalizedContentType.Contains("text/")) - { - var rawText = ExtractTextFromTextFile(fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); - } - - if (normalizedContentType.Contains("pdf")) - { - var rawText = ExtractTextFromPdfFile(fileName, fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); - } - - if (normalizedContentType.Contains("word") || - normalizedContentType.Contains("msword") || - normalizedContentType.Contains("officedocument.wordprocessingml")) - { - var rawText = ExtractTextFromWordDocx(fileName, fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); - } - - if (normalizedContentType.Contains("excel") || normalizedContentType.Contains("spreadsheet")) + ".txt" or ".csv" or ".json" or ".xml" => ExtractTextFromTextFile(fileContent), + ".pdf" => ExtractTextFromPdfFile(fileName, fileContent), + ".docx" => ExtractTextFromWordDocx(fileName, fileContent), + ".xls" or ".xlsx" => ExtractTextFromExcelFile(fileName, fileContent), + ".pptx" => ExtractTextFromPowerPointFile(fileName, fileContent), + _ => ExtractByContentType(fileName, fileContent, normalizedContentType) + }; + + if (string.IsNullOrEmpty(rawText)) { - var rawText = ExtractTextFromExcelFile(fileName, fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); + _logger.LogDebug("No text extraction available for content type {ContentType} with extension {Extension}", + contentType, extension); } - if (normalizedContentType.Contains("presentation") || - normalizedContentType.Contains("powerpoint")) - { - var rawText = ExtractTextFromPowerPointFile(fileName, fileContent); - return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); - } - - _logger.LogDebug("No text extraction available for content type {ContentType} with extension {Extension}", - contentType, extension); - return Task.FromResult(string.Empty); + return Task.FromResult(NormalizeAndLimitText(rawText, fileName)); } catch (Exception ex) { @@ -114,25 +76,64 @@ public Task ExtractTextAsync(string fileName, byte[] fileContent, string } } - private string ExtractTextFromTextFile(byte[] fileContent) + private string ExtractByContentType(string fileName, Stream fileContent, string normalizedContentType) { - try + if (normalizedContentType.Contains("text/")) { - var text = Encoding.UTF8.GetString(fileContent); + return ExtractTextFromTextFile(fileContent); + } + if (normalizedContentType.Contains("pdf")) + { + return ExtractTextFromPdfFile(fileName, fileContent); + } + if (normalizedContentType.Contains("word") || + normalizedContentType.Contains("msword") || + normalizedContentType.Contains("officedocument.wordprocessingml")) + { + return ExtractTextFromWordDocx(fileName, fileContent); + } + if (normalizedContentType.Contains("excel") || normalizedContentType.Contains("spreadsheet")) + { + return ExtractTextFromExcelFile(fileName, fileContent); + } + if (normalizedContentType.Contains("presentation") || normalizedContentType.Contains("powerpoint")) + { + return ExtractTextFromPowerPointFile(fileName, fileContent); + } + return string.Empty; + } - if (text.Contains('\uFFFD')) - { - text = Encoding.ASCII.GetString(fileContent); - } + private static void RewindIfPossible(Stream stream) + { + if (stream.CanSeek) + { + stream.Position = 0; + } + } - if (text.Length > MaxExtractedTextLength) + private string ExtractTextFromTextFile(Stream fileContent) + { + try + { + RewindIfPossible(fileContent); + using var reader = new StreamReader(fileContent, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, bufferSize: 4096, leaveOpen: true); + var buffer = new char[Math.Min(MaxExtractedTextLength, 8192)]; + var builder = new StringBuilder(capacity: Math.Min(MaxExtractedTextLength, 8192)); + int read; + while ((read = reader.Read(buffer, 0, buffer.Length)) > 0) { - text = text.Substring(0, MaxExtractedTextLength); - _logger.LogDebug("Truncated text content to {MaxLength} characters", MaxExtractedTextLength); + var remaining = MaxExtractedTextLength - builder.Length; + if (remaining <= 0) break; + builder.Append(buffer, 0, Math.Min(read, remaining)); + if (builder.Length >= MaxExtractedTextLength) + { + _logger.LogDebug("Truncated text content to {MaxLength} characters", MaxExtractedTextLength); + break; + } } - _logger.LogDebug("Extracted {CharacterCount} characters from text-based content.", text.Length); - return text; + _logger.LogDebug("Extracted {CharacterCount} characters from text-based content.", builder.Length); + return builder.ToString(); } catch (Exception ex) { @@ -141,12 +142,12 @@ private string ExtractTextFromTextFile(byte[] fileContent) } } - private string ExtractTextFromPdfFile(string fileName, byte[] fileContent) + private string ExtractTextFromPdfFile(string fileName, Stream fileContent) { try { - using var stream = new MemoryStream(fileContent, writable: false); - using var document = PdfDocument.Open(stream); + RewindIfPossible(fileContent); + using var document = PdfDocument.Open(fileContent); var builder = new StringBuilder(); var processedPageCount = 0; var pageTexts = document.GetPages() @@ -177,12 +178,12 @@ private string ExtractTextFromPdfFile(string fileName, byte[] fileContent) } } - private string ExtractTextFromWordDocx(string fileName, byte[] fileContent) + private string ExtractTextFromWordDocx(string fileName, Stream fileContent) { try { - using var stream = new MemoryStream(fileContent, writable: false); - using var document = new XWPFDocument(stream); + RewindIfPossible(fileContent); + using var document = new XWPFDocument(fileContent); var builder = new StringBuilder(); var processedParagraphCount = AppendDocxParagraphText(document, builder); var processedTableRowCount = AppendDocxTableText(document, builder); @@ -268,12 +269,12 @@ private static int AppendDocxTableText(XWPFDocument document, StringBuilder buil return processedTableRowCount; } - private string ExtractTextFromExcelFile(string fileName, byte[] fileContent) + private string ExtractTextFromExcelFile(string fileName, Stream fileContent) { try { - using var stream = new MemoryStream(fileContent, writable: false); - using var workbook = WorkbookFactory.Create(stream); + RewindIfPossible(fileContent); + using var workbook = WorkbookFactory.Create(fileContent); var builder = new StringBuilder(); var sheetCount = Math.Min(workbook.NumberOfSheets, MaxExcelSheets); var processedSheetCount = 0; @@ -314,12 +315,12 @@ private string ExtractTextFromExcelFile(string fileName, byte[] fileContent) } } - private string ExtractTextFromPowerPointFile(string fileName, byte[] fileContent) + private string ExtractTextFromPowerPointFile(string fileName, Stream fileContent) { try { - using var stream = new MemoryStream(fileContent, writable: false); - using var archive = new ZipArchive(stream, ZipArchiveMode.Read, leaveOpen: false); + RewindIfPossible(fileContent); + using var archive = new ZipArchive(fileContent, ZipArchiveMode.Read, leaveOpen: true); var builder = new StringBuilder(); var slideEntries = GetOrderedPowerPointSlideEntries(archive) .Take(MaxPowerPointSlides); diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs index 1225970df5..b3a74688e9 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; +using Unity.AI.Extraction; using Unity.AI.Requests; using Unity.GrantManager.Applications; using Unity.GrantManager.Intakes; @@ -13,23 +14,25 @@ namespace Unity.AI.Operations; public class AttachmentSummaryService( IApplicationChefsFileAttachmentRepository applicationChefsFileAttachmentRepository, ISubmissionAppService submissionAppService, + ITextExtractionService textExtractionService, IAIService aiService, ILogger logger) : IAttachmentSummaryService, ITransientDependency { - private const string DefaultContentType = "application/octet-stream"; private const string SummaryGenerationFailedMessage = "AI summary generation failed."; public async Task GenerateAndSaveAsync(Guid attachmentId, string? promptVersion = null) { var attachment = await applicationChefsFileAttachmentRepository.GetAsync(attachmentId); var fileName = string.IsNullOrWhiteSpace(attachment.FileName) ? "unknown" : attachment.FileName; - var (fileContent, contentType) = await GetAttachmentContentForSummaryAsync(attachment, fileName); + + await using var attachmentStream = await OpenAttachmentStreamAsync(attachment, fileName); + var extractedText = await textExtractionService.ExtractTextAsync(fileName, attachmentStream.Content, attachmentStream.ContentType); var summaryResponse = await aiService.GenerateAttachmentSummaryAsync(new AttachmentSummaryRequest { FileName = fileName, - FileContent = fileContent, - ContentType = contentType, + ContentType = attachmentStream.ContentType, + ExtractedText = extractedText, PromptVersion = promptVersion, }); @@ -68,7 +71,7 @@ public async Task> GenerateForApplicationAsync(Guid applicationId, return await GenerateAndSaveAsync(attachmentIds, promptVersion); } - private async Task<(byte[] Content, string ContentType)> GetAttachmentContentForSummaryAsync(ApplicationChefsFileAttachment attachment, string fileName) + private async Task OpenAttachmentStreamAsync(ApplicationChefsFileAttachment attachment, string fileName) { if (!Guid.TryParse(attachment.ChefsSubmissionId, out var submissionId) || !Guid.TryParse(attachment.ChefsFileId, out var fileId)) @@ -76,21 +79,13 @@ public async Task> GenerateForApplicationAsync(Guid applicationId, logger.LogWarning( "Attachment {AttachmentId} has invalid CHEFS IDs. Falling back to metadata-only summary generation.", attachment.Id); - return (Array.Empty(), DefaultContentType); + return ChefsFileAttachmentStream.Empty; } try { - var fileDto = await submissionAppService.GetChefsFileAttachment(submissionId, fileId, fileName); - if (fileDto?.Content == null) - { - logger.LogWarning( - "Attachment {AttachmentId} has no retrievable content. Falling back to metadata-only summary generation.", - attachment.Id); - return (Array.Empty(), DefaultContentType); - } - - return (fileDto.Content, string.IsNullOrWhiteSpace(fileDto.ContentType) ? DefaultContentType : fileDto.ContentType); + var stream = await submissionAppService.GetChefsFileAttachmentStream(submissionId, fileId, fileName); + return stream ?? ChefsFileAttachmentStream.Empty; } catch (Exception ex) { @@ -98,7 +93,7 @@ public async Task> GenerateForApplicationAsync(Guid applicationId, ex, "Failed retrieving CHEFS content for attachment {AttachmentId}. Falling back to metadata-only summary generation.", attachment.Id); - return (Array.Empty(), DefaultContentType); + return ChefsFileAttachmentStream.Empty; } } } diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Runtime/OpenAIRuntimeService.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Runtime/OpenAIRuntimeService.cs index d9886d1f97..7d7ad49e34 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Runtime/OpenAIRuntimeService.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Runtime/OpenAIRuntimeService.cs @@ -6,7 +6,6 @@ using System.Linq; using System.Text.Json; using System.Threading.Tasks; -using Unity.AI.Extraction; using Unity.AI.Models; using Unity.AI.Prompts; using Unity.AI.Requests; @@ -20,7 +19,6 @@ public class OpenAIRuntimeService : IAIService, ITransientDependency { private readonly IConfiguration _configuration; private readonly ILogger _logger; - private readonly ITextExtractionService _textExtractionService; private readonly OpenAITransportService _openAITransportService; private readonly OpenAIConfigurationResolver _openAIConfigurationResolver; private const string ApplicationAnalysisPromptType = AIPromptTypes.ApplicationAnalysis; @@ -51,13 +49,11 @@ public class OpenAIRuntimeService : IAIService, ITransientDependency public OpenAIRuntimeService( IConfiguration configuration, ILogger logger, - ITextExtractionService textExtractionService, OpenAITransportService openAITransportService, OpenAIConfigurationResolver openAIConfigurationResolver) { _configuration = configuration; _logger = logger; - _textExtractionService = textExtractionService; _openAITransportService = openAITransportService; _openAIConfigurationResolver = openAIConfigurationResolver; } @@ -132,19 +128,18 @@ public async Task GenerateAttachmentSummaryAsync(Atta { ArgumentNullException.ThrowIfNull(request); var fileName = request.FileName ?? string.Empty; - var fileContent = request.FileContent ?? Array.Empty(); var contentType = request.ContentType ?? "application/octet-stream"; var promptVersion = OpenAIPromptRenderer.ResolvePromptVersion(request.PromptVersion ?? ResolvePromptVersionSetting(AttachmentSummaryPromptType)); try { - var extractedText = await _textExtractionService.ExtractTextAsync(fileName, fileContent, contentType); + var extractedText = request.ExtractedText; var prompt = OpenAIPromptRenderer.BuildAttachmentSummarySystemPrompt(promptVersion); var attachmentText = string.IsNullOrWhiteSpace(extractedText) ? null : extractedText; if (attachmentText != null) { - _logger.LogDebug("Extracted {TextLength} characters from {FileName}", extractedText.Length, fileName); + _logger.LogDebug("Received {TextLength} extracted characters for {FileName}", attachmentText.Length, fileName); } else { @@ -155,21 +150,20 @@ public async Task GenerateAttachmentSummaryAsync(Atta { name = fileName, contentType, - sizeBytes = fileContent.Length, text = attachmentText }; var attachment = JsonSerializer.Serialize(attachmentPayload, JsonLogOptions); var contentToAnalyze = OpenAIPromptRenderer.BuildAttachmentSummaryUserPrompt(promptVersion, attachment); await LogPromptInputAsync(AttachmentSummaryPromptType, promptVersion, prompt, contentToAnalyze); - var result = await GenerateWithRetryAsync( - () => _openAITransportService.GenerateSummaryAsync( - contentToAnalyze, - prompt, - AttachmentSummaryCompletionTokens, - operationName: AttachmentSummaryPromptType, - promptVersion: promptVersion, - fileName: fileName), + var result = await GenerateWithRetryAsync( + () => _openAITransportService.GenerateSummaryAsync( + contentToAnalyze, + prompt, + AttachmentSummaryCompletionTokens, + operationName: AttachmentSummaryPromptType, + promptVersion: promptVersion, + fileName: fileName), AIProviderPayloadValidator.IsValidAttachmentSummaryText, "attachment summary"); await LogPromptOutputAsync(AttachmentSummaryPromptType, promptVersion, result.CaptureOutput); diff --git a/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/IResilientHttpRequest.cs b/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/IResilientHttpRequest.cs index 8e83068427..ea461b2d2c 100644 --- a/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/IResilientHttpRequest.cs +++ b/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/IResilientHttpRequest.cs @@ -17,6 +17,7 @@ Task HttpAsync( object? body = null, string? authToken = null, (string username, string password)? basicAuth = null, + HttpCompletionOption completionOption = HttpCompletionOption.ResponseContentRead, CancellationToken cancellationToken = default); /// diff --git a/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/ResilientHttpRequest.cs b/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/ResilientHttpRequest.cs index 3933d93c2f..109c0a816d 100644 --- a/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/ResilientHttpRequest.cs +++ b/applications/Unity.GrantManager/modules/Unity.SharedKernel/Http/ResilientHttpRequest.cs @@ -113,10 +113,11 @@ public async Task HttpAsync( object? body = null, string? authToken = null, (string username, string password)? basicAuth = null, + HttpCompletionOption completionOption = HttpCompletionOption.ResponseContentRead, CancellationToken cancellationToken = default) { return await SendWithClientAsync( - _httpClient, httpVerb, resource, body, authToken, basicAuth, cancellationToken); + _httpClient, httpVerb, resource, body, authToken, basicAuth, completionOption, cancellationToken); } @@ -137,7 +138,7 @@ public Task HttpAsyncSecured( EnsureMutualTlsClient(certPath, certPassword); return SendWithClientAsync( - _mtlsClient!, httpVerb, resource, body, authToken, basicAuth, cancellationToken); + _mtlsClient!, httpVerb, resource, body, authToken, basicAuth, HttpCompletionOption.ResponseContentRead, cancellationToken); } @@ -191,6 +192,7 @@ private async Task SendWithClientAsync( object? body, string? authToken, (string username, string password)? basicAuth, + HttpCompletionOption completionOption, CancellationToken cancellationToken) { // Build final URL @@ -208,7 +210,7 @@ private async Task SendWithClientAsync( using var requestMessage = BuildRequestMessage(httpVerb, fullUrl, body, authToken, basicAuth); - return await client.SendAsync(requestMessage, ct) + return await client.SendAsync(requestMessage, completionOption, ct) .ConfigureAwait(false); }, cancellationToken); diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ChefsFileAttachmentStream.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ChefsFileAttachmentStream.cs new file mode 100644 index 0000000000..ed9f19f4be --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ChefsFileAttachmentStream.cs @@ -0,0 +1,28 @@ +using System; +using System.IO; +using System.Threading.Tasks; + +namespace Unity.GrantManager.Intakes; + +/// +/// Stream of a CHEFS file attachment plus its content type. +/// The Content stream owns its underlying temp file; dispose to release. +/// +public sealed class ChefsFileAttachmentStream : IDisposable, IAsyncDisposable +{ + public Stream Content { get; } + public string ContentType { get; } + + public ChefsFileAttachmentStream(Stream content, string contentType) + { + Content = content ?? throw new ArgumentNullException(nameof(content)); + ContentType = string.IsNullOrWhiteSpace(contentType) ? "application/octet-stream" : contentType; + } + + public static ChefsFileAttachmentStream Empty { get; } = + new(Stream.Null, "application/octet-stream"); + + public void Dispose() => Content.Dispose(); + + public ValueTask DisposeAsync() => Content.DisposeAsync(); +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs index 618cf756ac..da04b9c730 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs @@ -33,4 +33,10 @@ public interface ISubmissionAppService : IApplicationService /// File name of the chefs attachment /// BlobDto Task GetChefsFileAttachment(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name); + + /// + /// Get a CHEFS file attachment as a Stream backed by a temp file (deleted on close). + /// Avoids buffering the full file in managed memory. + /// + Task GetChefsFileAttachmentStream(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name); } diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs index 7d2faae6a8..a00e104e9c 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs @@ -1,6 +1,7 @@ using Microsoft.AspNetCore.Authorization; using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Text.Json; using System.Text.Json.Serialization; @@ -111,6 +112,86 @@ public async Task GetChefsFileAttachment(Guid? formSubmissionId, Guid? return new BlobDto { Name = name, Content = contentBytes, ContentType = contentType }; } + [AllowAnonymous] + public async Task GetChefsFileAttachmentStream(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name) + { + if (formSubmissionId == null) + { + throw new ApiException(400, "Missing required parameter 'formId' when calling GetSubmission"); + } + + if (chefsFileAttachmentId == null) + { + throw new ApiException(400, "Missing required parameter 'chefsFileAttachmentId' when calling GetFileAttachment"); + } + + ApplicationForm? applicationForm = await GetApplicationFormBySubmissionId(formSubmissionId) ?? throw new ApiException(400, "Missing Form configuration"); + if (applicationForm.ChefsApplicationFormGuid == null) + { + throw new ApiException(400, "Missing CHEFS form Id"); + } + + if (applicationForm.ApiKey == null) + { + throw new ApiException(400, "Missing CHEFS Api Key"); + } + + string chefsApi = await endpointManagementAppService.GetChefsApiBaseUrlAsync(); + string url = $"{chefsApi}/files/{chefsFileAttachmentId}"; + var decryptedApiKey = stringEncryptionService.Decrypt(applicationForm.ApiKey!); + + using var response = await resilientRestClient.HttpAsync( + HttpMethod.Get, + url, + null, + null, + basicAuth: (applicationForm.ChefsApplicationFormGuid!, decryptedApiKey ?? string.Empty), + completionOption: HttpCompletionOption.ResponseHeadersRead + ); + + if (((int)response.StatusCode) != 200) + { + var errorContent = response.Content != null ? await response.Content.ReadAsStringAsync() : string.Empty; + throw new ApiException((int)response.StatusCode, "Error calling GetChefsFileAttachment: " + errorContent, response.ReasonPhrase ?? $"{response.StatusCode}"); + } + + var contentType = response.Content?.Headers?.ContentType?.MediaType ?? "application/octet-stream"; + var extension = !string.IsNullOrEmpty(name) ? Path.GetExtension(Uri.UnescapeDataString(name)) : string.Empty; + var tempPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}{extension}"); + + try + { + await using (var writeStream = new FileStream(tempPath, FileMode.CreateNew, FileAccess.Write, FileShare.None, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan)) + await using (var contentStream = await response.Content!.ReadAsStreamAsync()) + { + await contentStream.CopyToAsync(writeStream); + } + + var readStream = new FileStream(tempPath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan | FileOptions.DeleteOnClose); + return new ChefsFileAttachmentStream(readStream, contentType); + } + catch + { + TryDeleteTempFile(tempPath); + throw; + } + } + + private static void TryDeleteTempFile(string tempPath) + { + try + { + if (File.Exists(tempPath)) + { + File.Delete(tempPath); + } + } + catch + { + // Best-effort cleanup; never throw from cleanup path. + } + } + public async Task GetApplicationFormBySubmissionId(Guid? formSubmissionId) { From e12d3e40deb18e6536c79bc40c5ada628f324efb Mon Sep 17 00:00:00 2001 From: Jacob Smith Date: Wed, 6 May 2026 10:03:42 -0700 Subject: [PATCH 2/2] AB#32302 keep attachment streams internal --- .../AI/Operations/AttachmentSummaryService.cs | 4 +- .../IChefsFileAttachmentStreamProvider.cs | 9 ++ .../Intakes/ISubmissionAppService.cs | 5 - .../ChefsFileAttachmentStreamProvider.cs | 102 ++++++++++++++++++ .../Intakes/SubmissionAppService.cs | 96 ++--------------- .../AttachmentSummaryServiceTests.cs | 77 +++++++++++++ 6 files changed, 197 insertions(+), 96 deletions(-) create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/IChefsFileAttachmentStreamProvider.cs create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/ChefsFileAttachmentStreamProvider.cs create mode 100644 applications/Unity.GrantManager/test/Unity.GrantManager.Application.Tests/AI/Operations/AttachmentSummaryServiceTests.cs diff --git a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs index b3a74688e9..b103aedb73 100644 --- a/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs +++ b/applications/Unity.GrantManager/modules/Unity.AI/src/Unity.AI.Application/AI/Operations/AttachmentSummaryService.cs @@ -13,7 +13,7 @@ namespace Unity.AI.Operations; public class AttachmentSummaryService( IApplicationChefsFileAttachmentRepository applicationChefsFileAttachmentRepository, - ISubmissionAppService submissionAppService, + IChefsFileAttachmentStreamProvider chefsFileAttachmentStreamProvider, ITextExtractionService textExtractionService, IAIService aiService, ILogger logger) : IAttachmentSummaryService, ITransientDependency @@ -84,7 +84,7 @@ private async Task OpenAttachmentStreamAsync(Applicat try { - var stream = await submissionAppService.GetChefsFileAttachmentStream(submissionId, fileId, fileName); + var stream = await chefsFileAttachmentStreamProvider.OpenAsync(submissionId, fileId, fileName); return stream ?? ChefsFileAttachmentStream.Empty; } catch (Exception ex) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/IChefsFileAttachmentStreamProvider.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/IChefsFileAttachmentStreamProvider.cs new file mode 100644 index 0000000000..36bc4b778b --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/IChefsFileAttachmentStreamProvider.cs @@ -0,0 +1,9 @@ +using System; +using System.Threading.Tasks; + +namespace Unity.GrantManager.Intakes; + +public interface IChefsFileAttachmentStreamProvider +{ + Task OpenAsync(Guid formSubmissionId, Guid chefsFileAttachmentId, string name); +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs index da04b9c730..784fa67f15 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application.Contracts/Intakes/ISubmissionAppService.cs @@ -34,9 +34,4 @@ public interface ISubmissionAppService : IApplicationService /// BlobDto Task GetChefsFileAttachment(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name); - /// - /// Get a CHEFS file attachment as a Stream backed by a temp file (deleted on close). - /// Avoids buffering the full file in managed memory. - /// - Task GetChefsFileAttachmentStream(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name); } diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/ChefsFileAttachmentStreamProvider.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/ChefsFileAttachmentStreamProvider.cs new file mode 100644 index 0000000000..d043403326 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/ChefsFileAttachmentStreamProvider.cs @@ -0,0 +1,102 @@ +using System; +using System.IO; +using System.Net.Http; +using System.Threading.Tasks; +using Unity.GrantManager.Applications; +using Unity.GrantManager.Integrations; +using Unity.Modules.Shared.Http; +using Volo.Abp.DependencyInjection; +using Volo.Abp.Domain.Repositories; +using Volo.Abp.Security.Encryption; + +namespace Unity.GrantManager.Intakes; + +public class ChefsFileAttachmentStreamProvider( + IApplicationFormSubmissionRepository applicationFormSubmissionRepository, + IRepository applicationFormRepository, + IEndpointManagementAppService endpointManagementAppService, + IResilientHttpRequest resilientRestClient, + IStringEncryptionService stringEncryptionService) + : IChefsFileAttachmentStreamProvider, ITransientDependency +{ + public async Task OpenAsync(Guid formSubmissionId, Guid chefsFileAttachmentId, string name) + { + var applicationForm = await GetApplicationFormBySubmissionId(formSubmissionId) + ?? throw new ApiException(400, "Missing Form configuration"); + + if (applicationForm.ChefsApplicationFormGuid == null) + { + throw new ApiException(400, "Missing CHEFS form Id"); + } + + if (applicationForm.ApiKey == null) + { + throw new ApiException(400, "Missing CHEFS Api Key"); + } + + var chefsApi = await endpointManagementAppService.GetChefsApiBaseUrlAsync(); + var url = $"{chefsApi}/files/{chefsFileAttachmentId}"; + var decryptedApiKey = stringEncryptionService.Decrypt(applicationForm.ApiKey); + + using var response = await resilientRestClient.HttpAsync( + HttpMethod.Get, + url, + null, + null, + basicAuth: (applicationForm.ChefsApplicationFormGuid, decryptedApiKey ?? string.Empty), + completionOption: HttpCompletionOption.ResponseHeadersRead + ); + + if (((int)response.StatusCode) != 200) + { + var errorContent = response.Content != null ? await response.Content.ReadAsStringAsync() : string.Empty; + throw new ApiException((int)response.StatusCode, "Error calling GetChefsFileAttachment: " + errorContent, response.ReasonPhrase ?? $"{response.StatusCode}"); + } + + var contentType = response.Content?.Headers?.ContentType?.MediaType ?? "application/octet-stream"; + var extension = !string.IsNullOrEmpty(name) ? Path.GetExtension(Uri.UnescapeDataString(name)) : string.Empty; + var tempPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}{extension}"); + + try + { + await using (var writeStream = new FileStream(tempPath, FileMode.CreateNew, FileAccess.Write, FileShare.None, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan)) + await using (var contentStream = await response.Content!.ReadAsStreamAsync()) + { + await contentStream.CopyToAsync(writeStream); + } + + var readStream = new FileStream(tempPath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan | FileOptions.DeleteOnClose); + return new ChefsFileAttachmentStream(readStream, contentType); + } + catch + { + TryDeleteTempFile(tempPath); + throw; + } + } + + private async Task GetApplicationFormBySubmissionId(Guid formSubmissionId) + { + var submission = await applicationFormSubmissionRepository.FirstOrDefaultAsync( + x => x.ChefsSubmissionGuid == formSubmissionId.ToString()); + + return submission == null + ? null + : await applicationFormRepository.FirstOrDefaultAsync(x => x.Id == submission.ApplicationFormId); + } + + private static void TryDeleteTempFile(string tempPath) + { + try + { + if (File.Exists(tempPath)) + { + File.Delete(tempPath); + } + } + catch + { + // Best-effort cleanup; never throw from cleanup path. + } + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs index a00e104e9c..66efab4511 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Application/Intakes/SubmissionAppService.cs @@ -1,10 +1,9 @@ -using Microsoft.AspNetCore.Authorization; -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text.Json; -using System.Text.Json.Serialization; +using Microsoft.AspNetCore.Authorization; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Serialization; using System.Threading.Tasks; using Unity.GrantManager.Applications; using Unity.GrantManager.Attachments; @@ -112,88 +111,7 @@ public async Task GetChefsFileAttachment(Guid? formSubmissionId, Guid? return new BlobDto { Name = name, Content = contentBytes, ContentType = contentType }; } - [AllowAnonymous] - public async Task GetChefsFileAttachmentStream(Guid? formSubmissionId, Guid? chefsFileAttachmentId, string name) - { - if (formSubmissionId == null) - { - throw new ApiException(400, "Missing required parameter 'formId' when calling GetSubmission"); - } - - if (chefsFileAttachmentId == null) - { - throw new ApiException(400, "Missing required parameter 'chefsFileAttachmentId' when calling GetFileAttachment"); - } - - ApplicationForm? applicationForm = await GetApplicationFormBySubmissionId(formSubmissionId) ?? throw new ApiException(400, "Missing Form configuration"); - if (applicationForm.ChefsApplicationFormGuid == null) - { - throw new ApiException(400, "Missing CHEFS form Id"); - } - - if (applicationForm.ApiKey == null) - { - throw new ApiException(400, "Missing CHEFS Api Key"); - } - - string chefsApi = await endpointManagementAppService.GetChefsApiBaseUrlAsync(); - string url = $"{chefsApi}/files/{chefsFileAttachmentId}"; - var decryptedApiKey = stringEncryptionService.Decrypt(applicationForm.ApiKey!); - - using var response = await resilientRestClient.HttpAsync( - HttpMethod.Get, - url, - null, - null, - basicAuth: (applicationForm.ChefsApplicationFormGuid!, decryptedApiKey ?? string.Empty), - completionOption: HttpCompletionOption.ResponseHeadersRead - ); - - if (((int)response.StatusCode) != 200) - { - var errorContent = response.Content != null ? await response.Content.ReadAsStringAsync() : string.Empty; - throw new ApiException((int)response.StatusCode, "Error calling GetChefsFileAttachment: " + errorContent, response.ReasonPhrase ?? $"{response.StatusCode}"); - } - - var contentType = response.Content?.Headers?.ContentType?.MediaType ?? "application/octet-stream"; - var extension = !string.IsNullOrEmpty(name) ? Path.GetExtension(Uri.UnescapeDataString(name)) : string.Empty; - var tempPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}{extension}"); - - try - { - await using (var writeStream = new FileStream(tempPath, FileMode.CreateNew, FileAccess.Write, FileShare.None, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan)) - await using (var contentStream = await response.Content!.ReadAsStreamAsync()) - { - await contentStream.CopyToAsync(writeStream); - } - - var readStream = new FileStream(tempPath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, FileOptions.Asynchronous | FileOptions.SequentialScan | FileOptions.DeleteOnClose); - return new ChefsFileAttachmentStream(readStream, contentType); - } - catch - { - TryDeleteTempFile(tempPath); - throw; - } - } - - private static void TryDeleteTempFile(string tempPath) - { - try - { - if (File.Exists(tempPath)) - { - File.Delete(tempPath); - } - } - catch - { - // Best-effort cleanup; never throw from cleanup path. - } - } - - - public async Task GetApplicationFormBySubmissionId(Guid? formSubmissionId) + public async Task GetApplicationFormBySubmissionId(Guid? formSubmissionId) { ApplicationForm? applicationFormData = new(); diff --git a/applications/Unity.GrantManager/test/Unity.GrantManager.Application.Tests/AI/Operations/AttachmentSummaryServiceTests.cs b/applications/Unity.GrantManager/test/Unity.GrantManager.Application.Tests/AI/Operations/AttachmentSummaryServiceTests.cs new file mode 100644 index 0000000000..90d79cf6df --- /dev/null +++ b/applications/Unity.GrantManager/test/Unity.GrantManager.Application.Tests/AI/Operations/AttachmentSummaryServiceTests.cs @@ -0,0 +1,77 @@ +using Microsoft.Extensions.Logging.Abstractions; +using NSubstitute; +using Shouldly; +using System; +using System.IO; +using System.Threading.Tasks; +using Unity.AI; +using Unity.AI.Extraction; +using Unity.AI.Operations; +using Unity.AI.Requests; +using Unity.AI.Responses; +using Unity.GrantManager.Applications; +using Unity.GrantManager.Intakes; +using Xunit; +using Xunit.Abstractions; + +namespace Unity.GrantManager.AI.Operations; + +public class AttachmentSummaryServiceTests : GrantManagerApplicationTestBase +{ + public AttachmentSummaryServiceTests(ITestOutputHelper outputHelper) : base(outputHelper) + { + } + + [Fact] + public async Task GenerateAndSaveAsync_Uses_Streamed_Attachment_Text() + { + var attachmentId = Guid.NewGuid(); + var submissionId = Guid.NewGuid(); + var fileId = Guid.NewGuid(); + var stream = new MemoryStream([1, 2, 3]); + AttachmentSummaryRequest? capturedRequest = null; + + var attachment = new ApplicationChefsFileAttachment + { + ApplicationId = Guid.NewGuid(), + FileName = "test.txt", + ChefsSubmissionId = submissionId.ToString(), + ChefsFileId = fileId.ToString() + }; + + var attachmentRepository = Substitute.For(); + attachmentRepository.GetAsync(attachmentId).Returns(attachment); + + var streamProvider = Substitute.For(); + streamProvider.OpenAsync(submissionId, fileId, "test.txt") + .Returns(new ChefsFileAttachmentStream(stream, "text/plain")); + + var textExtractionService = Substitute.For(); + textExtractionService.ExtractTextAsync("test.txt", stream, "text/plain") + .Returns("extracted text"); + + var aiService = Substitute.For(); + aiService.GenerateAttachmentSummaryAsync(Arg.Do(request => capturedRequest = request)) + .Returns(new AttachmentSummaryResponse { Summary = "summary text" }); + + var service = new AttachmentSummaryService( + attachmentRepository, + streamProvider, + textExtractionService, + aiService, + NullLogger.Instance); + + var result = await service.GenerateAndSaveAsync(attachmentId, "v1"); + + result.ShouldBe("summary text"); + capturedRequest.ShouldNotBeNull(); + capturedRequest.FileName.ShouldBe("test.txt"); + capturedRequest.ContentType.ShouldBe("text/plain"); + capturedRequest.ExtractedText.ShouldBe("extracted text"); + capturedRequest.PromptVersion.ShouldBe("v1"); + attachment.AISummary.ShouldBe("summary text"); + + await attachmentRepository.Received(1).UpdateAsync(attachment); + stream.CanRead.ShouldBeFalse(); + } +}