Skip to content

Commit 97c842b

Browse files
authored
Merge pull request #17 from managedcode/copilot/fix-a5739cef-4fa8-47cd-abad-93850f97a530
Fix async/await: Remove unnecessary Task.Run usage in document converters
2 parents 27e1aa7 + 563a2d6 commit 97c842b

4 files changed

Lines changed: 69 additions & 84 deletions

File tree

src/MarkItDown/Converters/DocxConverter.cs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,22 +89,19 @@ public async Task<DocumentConverterResult> ConvertAsync(Stream stream, StreamInf
8989
}
9090
}
9191

92-
private static async Task<string> ExtractTextFromDocxAsync(Stream stream, CancellationToken cancellationToken)
92+
private static Task<string> ExtractTextFromDocxAsync(Stream stream, CancellationToken cancellationToken)
9393
{
9494
var result = new StringBuilder();
9595

96-
await Task.Run(() =>
97-
{
98-
using var wordDocument = WordprocessingDocument.Open(stream, false);
99-
var body = wordDocument.MainDocumentPart?.Document?.Body;
96+
using var wordDocument = WordprocessingDocument.Open(stream, false);
97+
var body = wordDocument.MainDocumentPart?.Document?.Body;
10098

101-
if (body != null)
102-
{
103-
ProcessBodyElements(body, result, cancellationToken);
104-
}
105-
}, cancellationToken);
99+
if (body != null)
100+
{
101+
ProcessBodyElements(body, result, cancellationToken);
102+
}
106103

107-
return result.ToString().Trim();
104+
return Task.FromResult(result.ToString().Trim());
108105
}
109106

110107
private static void ProcessBodyElements(Body body, StringBuilder result, CancellationToken cancellationToken)

src/MarkItDown/Converters/PdfConverter.cs

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -271,33 +271,30 @@ private sealed class PdfPigTextExtractor : IPdfTextExtractor
271271
{
272272
public Task<string> ExtractTextAsync(byte[] pdfBytes, CancellationToken cancellationToken)
273273
{
274-
return Task.Run(() =>
275-
{
276-
var builder = new StringBuilder();
277-
278-
using var pdfDocument = PdfDocument.Open(pdfBytes);
274+
var builder = new StringBuilder();
279275

280-
for (var pageNumber = 1; pageNumber <= pdfDocument.NumberOfPages; pageNumber++)
281-
{
282-
cancellationToken.ThrowIfCancellationRequested();
283-
var page = pdfDocument.GetPage(pageNumber);
284-
var pageText = page.Text;
276+
using var pdfDocument = PdfDocument.Open(pdfBytes);
285277

286-
if (string.IsNullOrWhiteSpace(pageText))
287-
{
288-
continue;
289-
}
278+
for (var pageNumber = 1; pageNumber <= pdfDocument.NumberOfPages; pageNumber++)
279+
{
280+
cancellationToken.ThrowIfCancellationRequested();
281+
var page = pdfDocument.GetPage(pageNumber);
282+
var pageText = page.Text;
290283

291-
if (builder.Length > 0)
292-
{
293-
builder.AppendLine("\n---\n");
294-
}
284+
if (string.IsNullOrWhiteSpace(pageText))
285+
{
286+
continue;
287+
}
295288

296-
builder.AppendLine(pageText.Trim());
289+
if (builder.Length > 0)
290+
{
291+
builder.AppendLine("\n---\n");
297292
}
298293

299-
return builder.ToString();
300-
}, cancellationToken);
294+
builder.AppendLine(pageText.Trim());
295+
}
296+
297+
return Task.FromResult(builder.ToString());
301298
}
302299
}
303300

@@ -322,34 +319,31 @@ public Task<IReadOnlyList<string>> RenderImagesAsync(byte[] pdfBytes, Cancellati
322319
[SupportedOSPlatform("ios")]
323320
private static Task<IReadOnlyList<string>> RenderOnSupportedPlatformsAsync(byte[] pdfBytes, CancellationToken cancellationToken)
324321
{
325-
return Task.Run(() =>
322+
var images = new List<string>();
323+
var options = new RenderOptions
326324
{
327-
var images = new List<string>();
328-
var options = new RenderOptions
329-
{
330-
Dpi = 144,
331-
WithAnnotations = true,
332-
WithAspectRatio = true,
333-
AntiAliasing = PdfAntiAliasing.All,
334-
};
325+
Dpi = 144,
326+
WithAnnotations = true,
327+
WithAspectRatio = true,
328+
AntiAliasing = PdfAntiAliasing.All,
329+
};
335330

336331
#pragma warning disable CA1416
337-
foreach (var bitmap in Conversion.ToImages(pdfBytes, password: null, options))
332+
foreach (var bitmap in Conversion.ToImages(pdfBytes, password: null, options))
333+
{
334+
cancellationToken.ThrowIfCancellationRequested();
335+
using var bmp = bitmap;
336+
using var data = bmp.Encode(SKEncodedImageFormat.Png, quality: 90);
337+
if (data is null)
338338
{
339-
cancellationToken.ThrowIfCancellationRequested();
340-
using var bmp = bitmap;
341-
using var data = bmp.Encode(SKEncodedImageFormat.Png, quality: 90);
342-
if (data is null)
343-
{
344-
continue;
345-
}
346-
347-
images.Add(Convert.ToBase64String(data.Span));
339+
continue;
348340
}
341+
342+
images.Add(Convert.ToBase64String(data.Span));
343+
}
349344
#pragma warning restore CA1416
350345

351-
return (IReadOnlyList<string>)images;
352-
}, cancellationToken);
346+
return Task.FromResult<IReadOnlyList<string>>(images);
353347
}
354348
}
355349
}

src/MarkItDown/Converters/PptxConverter.cs

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -91,31 +91,28 @@ public async Task<DocumentConverterResult> ConvertAsync(Stream stream, StreamInf
9191
}
9292
}
9393

94-
private static async Task<string> ExtractContentFromPptxAsync(Stream stream, CancellationToken cancellationToken)
94+
private static Task<string> ExtractContentFromPptxAsync(Stream stream, CancellationToken cancellationToken)
9595
{
9696
var result = new StringBuilder();
9797

98-
await Task.Run(() =>
98+
using var presentationDocument = PresentationDocument.Open(stream, false);
99+
var presentationPart = presentationDocument.PresentationPart;
100+
101+
if (presentationPart?.Presentation?.SlideIdList != null)
99102
{
100-
using var presentationDocument = PresentationDocument.Open(stream, false);
101-
var presentationPart = presentationDocument.PresentationPart;
103+
var slideCount = 0;
102104

103-
if (presentationPart?.Presentation?.SlideIdList != null)
105+
foreach (var slideId in presentationPart.Presentation.SlideIdList.Elements<SlideId>())
104106
{
105-
var slideCount = 0;
107+
cancellationToken.ThrowIfCancellationRequested();
106108

107-
foreach (var slideId in presentationPart.Presentation.SlideIdList.Elements<SlideId>())
108-
{
109-
cancellationToken.ThrowIfCancellationRequested();
110-
111-
slideCount++;
112-
var slidePart = (SlidePart)presentationPart.GetPartById(slideId.RelationshipId!);
113-
ProcessSlide(slidePart, slideCount, result);
114-
}
109+
slideCount++;
110+
var slidePart = (SlidePart)presentationPart.GetPartById(slideId.RelationshipId!);
111+
ProcessSlide(slidePart, slideCount, result);
115112
}
116-
}, cancellationToken);
113+
}
117114

118-
return result.ToString().Trim();
115+
return Task.FromResult(result.ToString().Trim());
119116
}
120117

121118
private static void ProcessSlide(SlidePart slidePart, int slideNumber, StringBuilder result)

src/MarkItDown/Converters/XlsxConverter.cs

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,28 +89,25 @@ public async Task<DocumentConverterResult> ConvertAsync(Stream stream, StreamInf
8989
}
9090
}
9191

92-
private static async Task<string> ExtractDataFromXlsxAsync(Stream stream, CancellationToken cancellationToken)
92+
private static Task<string> ExtractDataFromXlsxAsync(Stream stream, CancellationToken cancellationToken)
9393
{
9494
var result = new StringBuilder();
9595

96-
await Task.Run(() =>
96+
using var spreadsheetDocument = SpreadsheetDocument.Open(stream, false);
97+
var workbookPart = spreadsheetDocument.WorkbookPart;
98+
99+
if (workbookPart?.Workbook?.Sheets != null)
97100
{
98-
using var spreadsheetDocument = SpreadsheetDocument.Open(stream, false);
99-
var workbookPart = spreadsheetDocument.WorkbookPart;
100-
101-
if (workbookPart?.Workbook?.Sheets != null)
101+
foreach (var sheet in workbookPart.Workbook.Sheets.Elements<Sheet>())
102102
{
103-
foreach (var sheet in workbookPart.Workbook.Sheets.Elements<Sheet>())
104-
{
105-
cancellationToken.ThrowIfCancellationRequested();
106-
107-
var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id!);
108-
ProcessWorksheet(worksheetPart, sheet.Name?.Value ?? "Sheet", result, workbookPart);
109-
}
103+
cancellationToken.ThrowIfCancellationRequested();
104+
105+
var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id!);
106+
ProcessWorksheet(worksheetPart, sheet.Name?.Value ?? "Sheet", result, workbookPart);
110107
}
111-
}, cancellationToken);
108+
}
112109

113-
return result.ToString().Trim();
110+
return Task.FromResult(result.ToString().Trim());
114111
}
115112

116113
private static void ProcessWorksheet(WorksheetPart worksheetPart, string sheetName, StringBuilder result, WorkbookPart workbookPart)

0 commit comments

Comments
 (0)