From 86d1b1e70b7f8ed73bb7a211b68bbf8903fc7a35 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 16 Apr 2026 14:15:12 +1200 Subject: [PATCH 1/6] Add support for per-chapter remarks --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 2 +- .../Corpora/UpdateUsfmParserHandler.cs | 70 +++++++++++++---- .../Corpora/UpdateUsfmParserHandlerTests.cs | 77 ++++++++++++++++++- 3 files changed, 129 insertions(+), 20 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 5b0731c4..13b71b84 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -29,7 +29,7 @@ public string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76a59336..1a891d4c 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -60,7 +60,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly HashSet _preserveParagraphStyles; private readonly Stack _updateBlocks; private readonly Stack _updateBlockHandlers; - private readonly List _remarks; + private readonly List<(int, string)> _remarks; private readonly Stack _replace; private int _tokenIndex; private readonly Func _errorHandler; @@ -76,7 +76,7 @@ public UpdateUsfmParserHandler( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) @@ -107,7 +107,7 @@ public UpdateUsfmParserHandler( preserveParagraphStyles == null ? new HashSet { "r", "rem" } : new HashSet(preserveParagraphStyles); - _remarks = remarks?.ToList() ?? new List(); + _remarks = remarks?.ToList() ?? new List<(int, string)>(); _errorHandler = errorHandler; if (_errorHandler == null) _errorHandler = (error) => false; @@ -433,26 +433,66 @@ public string GetUsfm(string stylesheetFileName = "usfm.sty") public string GetUsfm(UsfmStylesheet stylesheet) { var tokenizer = new UsfmTokenizer(stylesheet); - List tokens = new List(_tokens); - if (_remarks.Count() > 0) + var tokens = new List(_tokens); + if (_remarks.Count > 0) { - var remarkTokens = new List(); - foreach (string remark in _remarks) + var remarkTokensByChapter = new Dictionary>(); + foreach ((int chapterNum, string remark) in _remarks) { - remarkTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); - remarkTokens.Add(new UsfmToken(remark)); + // Add the remark tokens for each chapter that is to have remarks + if (!remarkTokensByChapter.TryGetValue(chapterNum, out List chapterTokens)) + { + chapterTokens = new List(); + remarkTokensByChapter.Add(chapterNum, chapterTokens); + } + + chapterTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); + chapterTokens.Add(new UsfmToken(remark)); } if (tokens.Count > 0) { - int index = 0; - HashSet markersToSkip = new HashSet() { "id", "ide", "rem" }; - while (markersToSkip.Contains(tokens[index].Marker)) + foreach (KeyValuePair> remarkTokens in remarkTokensByChapter) { - index++; - if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + int index; + HashSet markersToSkip; + if (remarkTokens.Key == 0) + { + // Add the remarks at the top level of the USFM, + // after the book id, encode, and any initial comments + index = 0; + markersToSkip = new HashSet { "id", "ide", "rem" }; + } + else + { + // Add the remarks just after the specified chapter + index = tokens.FindIndex(t => + t.Type == UsfmTokenType.Chapter + && int.TryParse(t.Data, out int chapterNumber) + && chapterNumber == remarkTokens.Key + ); + if (index == -1) + continue; index++; + markersToSkip = new HashSet(); + } + + if (index >= tokens.Count) + { + // The remark insertion point is at the very end + tokens.AddRange(remarkTokens.Value); + } + else + { + while (markersToSkip.Contains(tokens[index].Marker)) + { + index++; + if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + index++; + } + + tokens.InsertRange(index, remarkTokens.Value); + } } - tokens.InsertRange(index, remarkTokens); } } diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 9b5219c3..e6f6ac65 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1380,7 +1380,7 @@ public void GetUsfm_IdTags() } [Test] - public void GetUsfm_PreferExisting_AddRemark() + public void GetUsfm_PreferExisting_AddRemarkToStart() { var rows = new List { @@ -1400,7 +1400,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark"] + remarks: [(0, "New remark")] ); string result = @"\id MAT - Test @@ -1419,7 +1419,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, target, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark 2"] + remarks: [(0, "New remark 2")] ); result = @"\id MAT - Test @@ -1436,6 +1436,75 @@ public void GetUsfm_PreferExisting_AddRemark() AssertUsfmEquals(target, result); } + [Test] + public void GetUsfm_PreferExisting_AddRemarkToChapter() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"), + }; + string usfm = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem Existing remark +\v 1 Some text +\v 2 +\v 3 Other text +\c 3 +"; + string target = UpdateUsfm( + rows, + usfm, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")] + ); + string result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + + target = UpdateUsfm( + rows, + target, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(1, "New remark 2"), (2, "New remark 3")] + ); + result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\rem New remark 2 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark 3 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + } + [Test] public void UpdateBlock_FootnoteInPublishedChapterNumber() { @@ -1536,7 +1605,7 @@ private static string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable? preserveParagraphStyles = null, IEnumerable? usfmUpdateBlockHandlers = null, - IEnumerable? remarks = null, + IEnumerable<(int, string)>? remarks = null, bool compareSegments = false ) { From e941fc09ec7f163692edea9c311b2998a8563ccd Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 23 Apr 2026 14:57:49 +1200 Subject: [PATCH 2/6] Port of per-chapter USFM filtering --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 6 +- src/SIL.Machine/Corpora/UsfmTokenizer.cs | 54 +++++++++++++- .../Corpora/UpdateUsfmParserHandlerTests.cs | 72 ++++++++++++++++++- 3 files changed, 128 insertions(+), 4 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 13b71b84..241e2330 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -22,6 +22,7 @@ ParatextProjectSettings settings public string UpdateUsfm( string bookId, IReadOnlyList rows, + IReadOnlyList chapters = null, string fullName = null, UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting, UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, @@ -59,7 +60,10 @@ public string UpdateUsfm( ); try { - UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); + var tokenizer = new UsfmTokenizer(_settings.Stylesheet); + IReadOnlyList tokens = tokenizer.Tokenize(usfm, filterTokensByChapter: chapters); + var parser = new UsfmParser(tokens, handler, _settings.Stylesheet, _settings.Versification); + parser.ProcessTokens(); return handler.GetUsfm(_settings.Stylesheet); } catch (Exception ex) diff --git a/src/SIL.Machine/Corpora/UsfmTokenizer.cs b/src/SIL.Machine/Corpora/UsfmTokenizer.cs index f07886df..a2ab70f4 100644 --- a/src/SIL.Machine/Corpora/UsfmTokenizer.cs +++ b/src/SIL.Machine/Corpora/UsfmTokenizer.cs @@ -37,7 +37,11 @@ public UsfmTokenizer(UsfmStylesheet stylesheet, RtlReferenceOrder rtlReferenceOr public UsfmStylesheet Stylesheet { get; } public RtlReferenceOrder RtlReferenceOrder { get; } - public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = false) + public IReadOnlyList Tokenize( + string usfm, + bool preserveWhitespace = false, + IReadOnlyList filterTokensByChapter = null + ) { List tokens = new List(); @@ -409,7 +413,7 @@ public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = } } - return tokens; + return FilterTokensByChapter(tokens, filterTokensByChapter); } public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespace = false) @@ -534,6 +538,52 @@ public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespac return usfm.ToString(); } + /// + /// Filters tokens by the specified chapters. + /// + /// The tokens. + /// The chapters. If null, all tokens are returned. + /// The filtered tokens. + private static IReadOnlyList FilterTokensByChapter( + IReadOnlyList tokens, + IReadOnlyList chapters = null + ) + { + if (chapters is null) + return tokens; + + var tokensWithinChapters = new List(); + bool inChapter = false; + bool inIdMarker = false; + + for (int index = 0; index < tokens.Count; index++) + { + UsfmToken token = tokens[index]; + if (index == 0 && token.Marker == "id") + { + inIdMarker = true; + if (chapters.Contains(1)) + inChapter = true; + } + else if (inIdMarker && token.Marker != null && token.Marker != "id") + { + inIdMarker = false; + } + else if (token.Type == UsfmTokenType.Chapter) + { + inChapter = + !string.IsNullOrEmpty(token.Data) + && int.TryParse(token.Data, out int chapter) + && chapters.Contains(chapter); + } + + if (inIdMarker || inChapter) + tokensWithinChapters.Add(token); + } + + return tokensWithinChapters; + } + /// /// Gets the next word in the usfm and advances the index past it /// diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index e6f6ac65..9f0b0025 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1590,6 +1590,71 @@ public void UpdateBlock_FootnoteAtStartOfChapterWithPrecedingText() ); } + [Test] + public void FilterChapters() + { + string usfm = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 2 +\v 1 Some text +\c 3 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + string target = UpdateUsfm(source: usfm, chapters: [2, 4]); + + string result = + @"\id MAT - Test +\c 2 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + AssertUsfmEquals(target, result); + } + + [Test] + public void FilterChapters_WithChapterOneAndHeader() + { + string usfm = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 2 +\v 1 Some text +\c 3 +\v 1 Some text +\c 4 +\v 1 Some text +"; + + string target = UpdateUsfm(source: usfm, chapters: [1, 3]); + + string result = + @"\id MAT - Test +\h Matthew +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +\c 3 +\v 1 Some text +"; + + AssertUsfmEquals(target, result); + } + private static ScriptureRef[] ScrRef(params string[] refs) { return refs.Select(r => ScriptureRef.Parse(r)).ToArray(); @@ -1598,6 +1663,7 @@ private static ScriptureRef[] ScrRef(params string[] refs) private static string UpdateUsfm( IReadOnlyList? rows = null, string? source = null, + IReadOnlyList? chapters = null, string? idText = null, UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferNew, UpdateUsfmMarkerBehavior paragraphBehavior = UpdateUsfmMarkerBehavior.Preserve, @@ -1615,6 +1681,7 @@ private static string UpdateUsfm( return updater.UpdateUsfm( "MAT", rows, + chapters, idText, textBehavior, paragraphBehavior, @@ -1643,7 +1710,10 @@ private static string UpdateUsfm( (_) => false, compareSegments ); - UsfmParser.Parse(source, updater); + var tokenizer = new UsfmTokenizer(); + IReadOnlyList tokens = tokenizer.Tokenize(source, filterTokensByChapter: chapters); + var parser = new UsfmParser(tokens, updater); + parser.ProcessTokens(); return updater.GetUsfm(); } } From c3c5ca603f08c4a19733dde5a6947925f8c69044 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Tue, 28 Apr 2026 14:26:39 +1200 Subject: [PATCH 3/6] Update to match machine.py --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 51 ++++++- .../Corpora/UpdateUsfmParserHandler.cs | 2 +- src/SIL.Machine/Corpora/UsfmTokenizer.cs | 54 +------- .../Corpora/UpdateUsfmParserHandlerTests.cs | 129 +++++++++++------- 4 files changed, 134 insertions(+), 102 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 241e2330..48736ed6 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Text; namespace SIL.Machine.Corpora @@ -61,7 +62,8 @@ public string UpdateUsfm( try { var tokenizer = new UsfmTokenizer(_settings.Stylesheet); - IReadOnlyList tokens = tokenizer.Tokenize(usfm, filterTokensByChapter: chapters); + IReadOnlyList tokens = tokenizer.Tokenize(usfm); + tokens = FilterTokensByChapter(tokens, chapters); var parser = new UsfmParser(tokens, handler, _settings.Stylesheet, _settings.Versification); parser.ProcessTokens(); return handler.GetUsfm(_settings.Stylesheet); @@ -77,6 +79,53 @@ public string UpdateUsfm( } } + /// + /// Filters tokens by the specified chapters. + /// + /// The tokens. + /// The chapters. If null, all tokens are returned. + /// The filtered tokens. + /// This is marked internal so test classes can use it. + internal static IReadOnlyList FilterTokensByChapter( + IReadOnlyList tokens, + IReadOnlyList chapters = null + ) + { + if (chapters is null) + return tokens; + + var tokensWithinChapters = new List(); + bool inChapter = false; + bool inIdMarker = false; + + for (int index = 0; index < tokens.Count; index++) + { + UsfmToken token = tokens[index]; + if (index == 0 && token.Marker == "id") + { + inIdMarker = true; + if (chapters.Contains(1)) + inChapter = true; + } + else if (inIdMarker && token.Marker != null && token.Marker != "id") + { + inIdMarker = false; + } + else if (token.Type == UsfmTokenType.Chapter) + { + inChapter = + !string.IsNullOrEmpty(token.Data) + && int.TryParse(token.Data, out int chapter) + && chapters.Contains(chapter); + } + + if (inIdMarker || inChapter) + tokensWithinChapters.Add(token); + } + + return tokensWithinChapters; + } + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 1a891d4c..a5687439 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -473,7 +473,7 @@ public string GetUsfm(UsfmStylesheet stylesheet) if (index == -1) continue; index++; - markersToSkip = new HashSet(); + markersToSkip = new HashSet { "rem" }; } if (index >= tokens.Count) diff --git a/src/SIL.Machine/Corpora/UsfmTokenizer.cs b/src/SIL.Machine/Corpora/UsfmTokenizer.cs index a2ab70f4..f07886df 100644 --- a/src/SIL.Machine/Corpora/UsfmTokenizer.cs +++ b/src/SIL.Machine/Corpora/UsfmTokenizer.cs @@ -37,11 +37,7 @@ public UsfmTokenizer(UsfmStylesheet stylesheet, RtlReferenceOrder rtlReferenceOr public UsfmStylesheet Stylesheet { get; } public RtlReferenceOrder RtlReferenceOrder { get; } - public IReadOnlyList Tokenize( - string usfm, - bool preserveWhitespace = false, - IReadOnlyList filterTokensByChapter = null - ) + public IReadOnlyList Tokenize(string usfm, bool preserveWhitespace = false) { List tokens = new List(); @@ -413,7 +409,7 @@ public IReadOnlyList Tokenize( } } - return FilterTokensByChapter(tokens, filterTokensByChapter); + return tokens; } public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespace = false) @@ -538,52 +534,6 @@ public string Detokenize(IEnumerable tokens, bool tokensHaveWhitespac return usfm.ToString(); } - /// - /// Filters tokens by the specified chapters. - /// - /// The tokens. - /// The chapters. If null, all tokens are returned. - /// The filtered tokens. - private static IReadOnlyList FilterTokensByChapter( - IReadOnlyList tokens, - IReadOnlyList chapters = null - ) - { - if (chapters is null) - return tokens; - - var tokensWithinChapters = new List(); - bool inChapter = false; - bool inIdMarker = false; - - for (int index = 0; index < tokens.Count; index++) - { - UsfmToken token = tokens[index]; - if (index == 0 && token.Marker == "id") - { - inIdMarker = true; - if (chapters.Contains(1)) - inChapter = true; - } - else if (inIdMarker && token.Marker != null && token.Marker != "id") - { - inIdMarker = false; - } - else if (token.Type == UsfmTokenType.Chapter) - { - inChapter = - !string.IsNullOrEmpty(token.Data) - && int.TryParse(token.Data, out int chapter) - && chapters.Contains(chapter); - } - - if (inIdMarker || inChapter) - tokensWithinChapters.Add(token); - } - - return tokensWithinChapters; - } - /// /// Gets the next word in the usfm and advances the index past it /// diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 9f0b0025..177e6e2c 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1380,13 +1380,14 @@ public void GetUsfm_IdTags() } [Test] - public void GetUsfm_PreferExisting_AddRemarkToStart() + public void GetUsfm_PassRemark() { var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), }; + string usfm = @"\id MAT - Test \ide UTF-8 @@ -1395,111 +1396,116 @@ public void GetUsfm_PreferExisting_AddRemarkToStart() \v 1 Some text \v 2 \v 3 Other text +\c 2 +\rem Existing remark +\v 1 More text +\c 3 "; + string target = UpdateUsfm( rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(0, "New remark")] + remarks: [(0, "New remark 0"), (1, "New remark 1"), (2, "New remark 2"), (3, "New remark 3")] ); + string result = @"\id MAT - Test \ide UTF-8 \rem Existing remark -\rem New remark +\rem New remark 0 \c 1 +\rem New remark 1 \v 1 Some text \v 2 Update 2 \v 3 Other text -"; - - AssertUsfmEquals(target, result); - - target = UpdateUsfm( - rows, - target, - textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(0, "New remark 2")] - ); - result = - @"\id MAT - Test -\ide UTF-8 +\c 2 \rem Existing remark -\rem New remark \rem New remark 2 -\c 1 -\v 1 Some text -\v 2 Update 2 -\v 3 Other text +\v 1 More text +\c 3 +\rem New remark 3 "; AssertUsfmEquals(target, result); } [Test] - public void GetUsfm_PreferExisting_AddRemarkToChapter() + public void GetUsfm_PassRemark0_NoExistingRemark() { var rows = new List { - new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"), - new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"), + new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), }; + string usfm = @"\id MAT - Test \ide UTF-8 \c 1 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem Existing remark \v 1 Some text \v 2 \v 3 Other text -\c 3 "; + string target = UpdateUsfm( rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")] + remarks: [(0, "New remark 0")] ); + string result = @"\id MAT - Test \ide UTF-8 +\rem New remark 0 \c 1 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem New remark -\rem Existing remark \v 1 Some text \v 2 Update 2 \v 3 Other text -\c 3 -\rem Last remark "; AssertUsfmEquals(target, result); + } - target = UpdateUsfm( + [Test] + public void GetUsfm_MultipleRemarksSameChapter() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), + }; + + string usfm = + @"\id MAT - Test +\ide UTF-8 +\rem Existing remark +\c 1 +\v 1 Some text +\v 2 +\v 3 Other text +"; + + string target = UpdateUsfm( rows, - target, + usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: [(1, "New remark 2"), (2, "New remark 3")] + remarks: [(0, "New remark 0.1"), (0, "New remark 0.2"), (1, "New remark 1.1"), (1, "New remark 1.2")] ); - result = + + string result = @"\id MAT - Test \ide UTF-8 -\c 1 -\rem New remark 2 -\v 1 Chapter 1, Verse 1 -\c 2 -\rem New remark 3 -\rem New remark \rem Existing remark +\rem New remark 0.1 +\rem New remark 0.2 +\c 1 +\rem New remark 1.1 +\rem New remark 1.2 \v 1 Some text \v 2 Update 2 \v 3 Other text -\c 3 -\rem Last remark "; AssertUsfmEquals(target, result); @@ -1655,6 +1661,32 @@ public void FilterChapters_WithChapterOneAndHeader() AssertUsfmEquals(target, result); } + [Test] + public void FilterChapters_WithBadChapterReference() + { + string usfm = + @"\id MAT - Test +\c 1. +\v 1 Some text +\c 2. +\v 1 Some text +\c 3 +\v 1 Some text with good chapter reference +\c 4 +\v 1 Some text with good chapter reference +"; + + string target = UpdateUsfm(source: usfm, chapters: [2, 4]); + + string result = + @"\id MAT - Test +\c 4 +\v 1 Some text with good chapter reference +"; + + AssertUsfmEquals(target, result); + } + private static ScriptureRef[] ScrRef(params string[] refs) { return refs.Select(r => ScriptureRef.Parse(r)).ToArray(); @@ -1711,7 +1743,8 @@ private static string UpdateUsfm( compareSegments ); var tokenizer = new UsfmTokenizer(); - IReadOnlyList tokens = tokenizer.Tokenize(source, filterTokensByChapter: chapters); + IReadOnlyList tokens = tokenizer.Tokenize(source); + tokens = ParatextProjectTextUpdaterBase.FilterTokensByChapter(tokens, chapters); var parser = new UsfmParser(tokens, updater); parser.ProcessTokens(); return updater.GetUsfm(); From 4ba48e6bb8a6c8a0a74aa5eebde12707f5183150 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Mon, 11 May 2026 09:43:55 +1200 Subject: [PATCH 4/6] Update to match machine.py main branch --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 3 +- src/SIL.Machine/Corpora/UsfmParser.cs | 30 +++++++++++++++ .../MemoryParatextProjectFileHandler.cs | 15 ++------ .../MemoryParatextProjectTextUpdater.cs | 4 ++ .../Corpora/UpdateUsfmParserHandlerTests.cs | 37 ++++++++++--------- 5 files changed, 59 insertions(+), 30 deletions(-) create mode 100644 tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 48736ed6..e38e528f 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -64,8 +64,7 @@ public string UpdateUsfm( var tokenizer = new UsfmTokenizer(_settings.Stylesheet); IReadOnlyList tokens = tokenizer.Tokenize(usfm); tokens = FilterTokensByChapter(tokens, chapters); - var parser = new UsfmParser(tokens, handler, _settings.Stylesheet, _settings.Versification); - parser.ProcessTokens(); + UsfmParser.Parse(tokens, handler, _settings.Stylesheet, _settings.Versification); return handler.GetUsfm(_settings.Stylesheet); } catch (Exception ex) diff --git a/src/SIL.Machine/Corpora/UsfmParser.cs b/src/SIL.Machine/Corpora/UsfmParser.cs index d40e288a..e7282f9b 100644 --- a/src/SIL.Machine/Corpora/UsfmParser.cs +++ b/src/SIL.Machine/Corpora/UsfmParser.cs @@ -26,6 +26,17 @@ public static void Parse( Parse(usfm, handler, new UsfmStylesheet(stylesheetFileName), versification, preserveWhitespace); } + public static void Parse( + IReadOnlyList tokens, + IUsfmParserHandler handler, + string stylesheetFileName = "usfm.sty", + ScrVers versification = null, + bool preserveWhitespace = false + ) + { + Parse(tokens, handler, new UsfmStylesheet(stylesheetFileName), versification, preserveWhitespace); + } + public static void Parse( string usfm, IUsfmParserHandler handler, @@ -45,6 +56,25 @@ public static void Parse( parser.ProcessTokens(); } + public static void Parse( + IReadOnlyList tokens, + IUsfmParserHandler handler, + UsfmStylesheet stylesheet, + ScrVers versification = null, + bool preserveWhitespace = false + ) + { + var parser = new UsfmParser( + tokens, + handler, + stylesheet ?? new UsfmStylesheet("usfm.sty"), + versification, + preserveWhitespace + ); + + parser.ProcessTokens(); + } + private static readonly Regex OptBreakSplitter = new Regex("(//)", RegexOptions.Compiled); public UsfmParser( diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs index b0858686..9fc826ca 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs @@ -7,22 +7,15 @@ public class MemoryParatextProjectFileHandler(IDictionary? files { public IDictionary Files { get; } = files ?? new Dictionary(); - public UsfmStylesheet CreateStylesheet(string fileName) - { - if (fileName is "usfm.sty" or "usfm_sb.sty") - return new UsfmStylesheet(fileName); - throw new NotImplementedException(); - } + public UsfmStylesheet CreateStylesheet(string fileName) => + fileName is "usfm.sty" or "usfm_sb.sty" ? new UsfmStylesheet(fileName) : throw new NotImplementedException(); public bool Exists(string fileName) { return Files.ContainsKey(fileName); } - public string Find(string extension) - { - throw new NotImplementedException(); - } + public string? Find(string extension) => Files.Keys.FirstOrDefault(item => item.EndsWith(extension)); public Stream? Open(string fileName) { @@ -66,5 +59,5 @@ public class DefaultParatextProjectSettings( translationType, parentGuid, parentName - ) { } + ); } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs new file mode 100644 index 00000000..b313a9f9 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs @@ -0,0 +1,4 @@ +namespace SIL.Machine.Corpora; + +public class MemoryParatextProjectTextUpdater(IDictionary? files, ParatextProjectSettings settings) + : ParatextProjectTextUpdaterBase(new MemoryParatextProjectFileHandler(files), settings); diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 177e6e2c..6b4b18f9 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1382,11 +1382,11 @@ public void GetUsfm_IdTags() [Test] public void GetUsfm_PassRemark() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), - }; + ]; string usfm = @"\id MAT - Test @@ -1433,11 +1433,11 @@ public void GetUsfm_PassRemark() [Test] public void GetUsfm_PassRemark0_NoExistingRemark() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), - }; + ]; string usfm = @"\id MAT - Test @@ -1471,11 +1471,11 @@ public void GetUsfm_PassRemark0_NoExistingRemark() [Test] public void GetUsfm_MultipleRemarksSameChapter() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), - }; + ]; string usfm = @"\id MAT - Test @@ -1707,11 +1707,12 @@ private static string UpdateUsfm( bool compareSegments = false ) { + const string BookId = "MAT"; if (source is null) { var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTestProjectPath); return updater.UpdateUsfm( - "MAT", + BookId, rows, chapters, idText, @@ -1729,8 +1730,16 @@ private static string UpdateUsfm( else { source = source.Trim().ReplaceLineEndings("\r\n") + "\r\n"; - var updater = new UpdateUsfmParserHandler( + var settings = new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + fileNameForm: BookId, + fileNameSuffix: string.Empty + ); + var files = new Dictionary { [BookId] = source }; + var updater = new MemoryParatextProjectTextUpdater(files, settings); + return updater.UpdateUsfm( + BookId, rows, + chapters, idText, textBehavior, paragraphBehavior, @@ -1742,12 +1751,6 @@ private static string UpdateUsfm( (_) => false, compareSegments ); - var tokenizer = new UsfmTokenizer(); - IReadOnlyList tokens = tokenizer.Tokenize(source); - tokens = ParatextProjectTextUpdaterBase.FilterTokensByChapter(tokens, chapters); - var parser = new UsfmParser(tokens, updater); - parser.ProcessTokens(); - return updater.GetUsfm(); } } From 6399bc0118e31bd34e132ad38b8db62163d9190a Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Mon, 11 May 2026 09:49:04 +1200 Subject: [PATCH 5/6] Refactor DefaultParatextProjectSettings to its own file --- .../Corpora/DefaultParatextProjectSettings.cs | 41 +++++++++++++++++++ .../MemoryParatextProjectFileHandler.cs | 38 ----------------- .../MemoryParatextProjectTermsParser.cs | 2 +- .../MemoryParatextProjectTextUpdater.cs | 10 ++++- ...ratextProjectVersificationErrorDetector.cs | 4 +- .../ParatextProjectTermsParserTests.cs | 10 ++--- .../ParatextProjectVersificationErrorTests.cs | 8 +--- .../Corpora/UpdateUsfmParserHandlerTests.cs | 5 +-- ...yParatextProjectQuoteConventionDetector.cs | 4 +- 9 files changed, 62 insertions(+), 60 deletions(-) create mode 100644 tests/SIL.Machine.Tests/Corpora/DefaultParatextProjectSettings.cs diff --git a/tests/SIL.Machine.Tests/Corpora/DefaultParatextProjectSettings.cs b/tests/SIL.Machine.Tests/Corpora/DefaultParatextProjectSettings.cs new file mode 100644 index 00000000..ed24d9e5 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/DefaultParatextProjectSettings.cs @@ -0,0 +1,41 @@ +using System.Text; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +public class DefaultParatextProjectSettings( + string id = "Id", + string name = "Test", + string fullName = "TestProject", + Encoding? encoding = null, + ScrVers? versification = null, + UsfmStylesheet? stylesheet = null, + string fileNamePrefix = "", + string fileNameForm = "41MAT", + string fileNameSuffix = "Test.SFM", + string biblicalTermsListType = "Project", + string biblicalTermsProjectName = "Test", + string biblicalTermsFileName = "ProjectBiblicalTerms.xml", + string languageCode = "en", + string translationType = "Standard", + string? parentGuid = null, + string? parentName = null +) + : ParatextProjectSettings( + id, + name, + fullName, + encoding ?? Encoding.UTF8, + versification ?? ScrVers.English, + stylesheet ?? new UsfmStylesheet("usfm.sty"), + fileNamePrefix, + fileNameForm, + fileNameSuffix, + biblicalTermsListType, + biblicalTermsProjectName, + biblicalTermsFileName, + languageCode, + translationType, + parentGuid, + parentName + ); diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs index 9fc826ca..4b4f5fba 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs @@ -1,5 +1,4 @@ using System.Text; -using SIL.Scripture; namespace SIL.Machine.Corpora; @@ -23,41 +22,4 @@ public bool Exists(string fileName) return null; return new MemoryStream(Encoding.UTF8.GetBytes(contents)); } - - public class DefaultParatextProjectSettings( - string id = "Id", - string name = "Test", - string fullName = "TestProject", - Encoding? encoding = null, - ScrVers? versification = null, - UsfmStylesheet? stylesheet = null, - string fileNamePrefix = "", - string fileNameForm = "41MAT", - string fileNameSuffix = "Test.SFM", - string biblicalTermsListType = "Project", - string biblicalTermsProjectName = "Test", - string biblicalTermsFileName = "ProjectBiblicalTerms.xml", - string languageCode = "en", - string translationType = "Standard", - string? parentGuid = null, - string? parentName = null - ) - : ParatextProjectSettings( - id, - name, - fullName, - encoding ?? Encoding.UTF8, - versification ?? ScrVers.English, - stylesheet ?? new UsfmStylesheet("usfm.sty"), - fileNamePrefix, - fileNameForm, - fileNameSuffix, - biblicalTermsListType, - biblicalTermsProjectName, - biblicalTermsFileName, - languageCode, - translationType, - parentGuid, - parentName - ); } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs index a8c4c7c8..ffb815d7 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs @@ -3,5 +3,5 @@ namespace SIL.Machine.Corpora; public class MemoryParatextProjectTermsParser(IDictionary? files, ParatextProjectSettings? settings) : ParatextProjectTermsParserBase( new MemoryParatextProjectFileHandler(files), - settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + settings ?? new DefaultParatextProjectSettings() ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs index b313a9f9..721fc20a 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTextUpdater.cs @@ -1,4 +1,10 @@ namespace SIL.Machine.Corpora; -public class MemoryParatextProjectTextUpdater(IDictionary? files, ParatextProjectSettings settings) - : ParatextProjectTextUpdaterBase(new MemoryParatextProjectFileHandler(files), settings); +public class MemoryParatextProjectTextUpdater( + IDictionary? files = null, + ParatextProjectSettings? settings = null +) + : ParatextProjectTextUpdaterBase( + new MemoryParatextProjectFileHandler(files), + settings ?? new DefaultParatextProjectSettings() + ); diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs index d8f00008..c624d5c9 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs @@ -6,5 +6,5 @@ public class MemoryParatextProjectVersificationErrorDetector( ) : ParatextProjectVersificationErrorDetectorBase( new MemoryParatextProjectFileHandler(files), - settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() - ) { } + settings ?? new DefaultParatextProjectSettings() + ); diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs index 95ec0675..03d0f58a 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs @@ -45,7 +45,7 @@ public void TestGetKeyTermsFromTermsRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() { var env = new TestEnvironment( - new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -60,7 +60,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermGlosses() { var env = new TestEnvironment( - new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -74,7 +74,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermG public void TestGetKeyTermsFromTermsLocalizations() { var env = new TestEnvironment( - new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -90,7 +90,7 @@ public void TestGetKeyTermsFromTermsLocalizations() public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() { var env = new TestEnvironment( - new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -113,7 +113,7 @@ public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLocalization() { var env = new TestEnvironment( - new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + new DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs index af462680..b4678fd9 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs @@ -212,9 +212,7 @@ public void GetUsfmVersificationErrors_ExtraVerseSegment() public void GetUsfmVersificationErrors_MissingVerseSegment() { var env = new TestEnvironment( - settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( - versification: GetCustomVersification(@"*3JN 1:13,a,b") - ), + settings: new DefaultParatextProjectSettings(versification: GetCustomVersification(@"*3JN 1:13,a,b")), files: new Dictionary() { { @@ -270,9 +268,7 @@ public void GetUsfmVersificationErrors_IgnoreNonCanonicals() public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs() { var env = new TestEnvironment( - settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( - versification: GetCustomVersification(@"-3JN 1:13") - ), + settings: new DefaultParatextProjectSettings(versification: GetCustomVersification(@"-3JN 1:13")), files: new Dictionary() { { diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 6b4b18f9..1a73c6e1 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1730,10 +1730,7 @@ private static string UpdateUsfm( else { source = source.Trim().ReplaceLineEndings("\r\n") + "\r\n"; - var settings = new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( - fileNameForm: BookId, - fileNameSuffix: string.Empty - ); + var settings = new DefaultParatextProjectSettings(fileNameForm: BookId, fileNameSuffix: string.Empty); var files = new Dictionary { [BookId] = source }; var updater = new MemoryParatextProjectTextUpdater(files, settings); return updater.UpdateUsfm( diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs index 6116b8f8..afd7009d 100644 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs @@ -8,5 +8,5 @@ public class MemoryParatextProjectQuoteConventionDetector( ) : ParatextProjectQuoteConventionDetector( new MemoryParatextProjectFileHandler(files), - settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() - ) { } + settings ?? new DefaultParatextProjectSettings() + ); From eb42d8d8fbabd49620b2579f1e557db508d6bae5 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Mon, 11 May 2026 10:05:20 +1200 Subject: [PATCH 6/6] Clean up UpdateUsfmParserHandlerTests formatting --- .../Corpora/UpdateUsfmParserHandlerTests.cs | 333 ++++++++---------- 1 file changed, 151 insertions(+), 182 deletions(-) diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 1a73c6e1..d168f8c6 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -9,10 +9,7 @@ public class UpdateUsfmParserHandlerTests [Test] public void GetUsfm_Verse_CharStyle() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 1:1"), "First verse of the first chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "First verse of the first chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\id MAT - Test\r\n")); @@ -34,11 +31,11 @@ public void GetUsfm_IdText() [Test] public void GetUsfm_StripAllText() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:3"), "Update 3"), - }; + ]; string usfm = @"\id MAT - Test \c 1 @@ -103,12 +100,12 @@ public void GetUsfm_StripAllText() [Test] public void GetUsfm_StripParagraphs_PreserveParagraphStyles() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:0/1:rem"), "New remark"), new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "Another new remark"), new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), - }; + ]; string usfm = @"\id MAT \c 1 @@ -157,11 +154,11 @@ public void GetUsfm_StripParagraphs_PreserveParagraphStyles() [Test] public void GetUsfm_PreserveParagraphs() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:0/1:rem"), "Update remark"), new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), - }; + ]; string usfm = @"\id MAT \c 1 @@ -204,7 +201,7 @@ public void GetUsfm_PreserveParagraphs() [Test] public void GetUsfm_ParagraphInVerse() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 @@ -248,11 +245,11 @@ public void GetUsfm_ParagraphInVerse() [Test] public void GetUsfm_PreferExisting() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "Update 2"), - }; + ]; string usfm = @"\id MAT - Test \c 1 @@ -274,11 +271,11 @@ public void GetUsfm_PreferExisting() [Test] public void GetUsfm_PreferRows() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:6"), "Text 6"), new UpdateUsfmRow(ScrRef("MAT 1:7"), "Text 7"), - }; + ]; string target = UpdateUsfm(rows, textBehavior: UpdateUsfmTextBehavior.PreferNew); Assert.That(target, Contains.Substring("\\id MAT - Test\r\n")); Assert.That(target, Contains.Substring("\\v 6 Text 6\r\n")); @@ -288,10 +285,7 @@ public void GetUsfm_PreferRows() [Test] public void GetUsfm_Verse_StripNote() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:1"), "First verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:1"), "First verse of the second chapter.")]; string target = UpdateUsfm(rows, embedBehavior: UpdateUsfmMarkerBehavior.Strip); Assert.That(target, Contains.Substring("\\v 1 First verse of the second chapter.\r\n")); @@ -300,7 +294,7 @@ public void GetUsfm_Verse_StripNote() [Test] public void GetUsfm_Verse_ReplaceWithNote() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "updated text") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "updated text")]; string usfm = @"\id MAT - Test \c 1 @@ -318,10 +312,7 @@ public void GetUsfm_Verse_ReplaceWithNote() [Test] public void GetUsfm_Verse_RowVerseSegment() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:1a"), "First verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:1a"), "First verse of the second chapter.")]; string target = UpdateUsfm(rows); Assert.That( @@ -335,10 +326,7 @@ public void GetUsfm_Verse_RowVerseSegment() [Test] public void GetUsfm_Verse_UsfmVerseSegment() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:7"), "Seventh verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:7"), "Seventh verse of the second chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\v 7a Seventh verse of the second chapter.\r\n")); @@ -347,10 +335,7 @@ public void GetUsfm_Verse_UsfmVerseSegment() [Test] public void GetUsfm_Verse_MultipleParas() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 1:2"), "Second verse of the first chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:2"), "Second verse of the first chapter.")]; string target = UpdateUsfm(rows); Assert.That( @@ -364,10 +349,7 @@ public void GetUsfm_Verse_MultipleParas() [Test] public void GetUsfm_Verse_Table() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:9"), "Ninth verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:9"), "Ninth verse of the second chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\v 9 Ninth verse of the second chapter. \\tcr2 \\tc3 \\tcr4\r\n")); @@ -376,13 +358,13 @@ public void GetUsfm_Verse_Table() [Test] public void GetUsfm_Verse_RangeSingleRowMultipleVerses() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow( ScrRef("MAT 2:11", "MAT 2:12"), "Eleventh verse of the second chapter. Twelfth verse of the second chapter." ), - }; + ]; string target = UpdateUsfm(rows); Assert.That( @@ -396,10 +378,7 @@ public void GetUsfm_Verse_RangeSingleRowMultipleVerses() [Test] public void GetUsfm_Verse_RangeSingleRowSingleVerse() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:11"), "Eleventh verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:11"), "Eleventh verse of the second chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\v 11-12 Eleventh verse of the second chapter.\r\n")); @@ -408,11 +387,11 @@ public void GetUsfm_Verse_RangeSingleRowSingleVerse() [Test] public void GetUsfm_Verse_RangeMultipleRowsSingleVerse() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:11"), "Eleventh verse of the second chapter."), new UpdateUsfmRow(ScrRef("MAT 2:12"), "Twelfth verse of the second chapter."), - }; + ]; string target = UpdateUsfm(rows); Assert.That( @@ -426,12 +405,12 @@ public void GetUsfm_Verse_RangeMultipleRowsSingleVerse() [Test] public void GetUsfm_MergeVerseSegments() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:2"), "Verse 2."), new UpdateUsfmRow(ScrRef("MAT 2:2a"), "Verse 2a."), new UpdateUsfmRow(ScrRef("MAT 2:2b"), "Verse 2b."), - }; + ]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n")); @@ -440,11 +419,11 @@ public void GetUsfm_MergeVerseSegments() [Test] public void GetUsfm_Verse_OptBreak() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:2"), "Second verse of the second chapter."), new UpdateUsfmRow(ScrRef("MAT 2:3"), "Third verse of the second chapter."), - }; + ]; string target = UpdateUsfm(rows, embedBehavior: UpdateUsfmMarkerBehavior.Strip); Assert.That( @@ -456,10 +435,7 @@ public void GetUsfm_Verse_OptBreak() [Test] public void GetUsfm_Verse_Milestone() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 2:10"), "Tenth verse of the second chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:10"), "Tenth verse of the second chapter.")]; string target = UpdateUsfm(rows); Assert.That( @@ -471,10 +447,7 @@ public void GetUsfm_Verse_Milestone() [Test] public void GetUsfm_Verse_Unmatched() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 1:3"), "Third verse of the first chapter."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:3"), "Third verse of the first chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\v 3 Third verse of the first chapter.\r\n")); @@ -483,7 +456,7 @@ public void GetUsfm_Verse_Unmatched() [Test] public void GetUsfm_NonVerse_CharStyle() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 2:0/3:s1"), "The second chapter.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:0/3:s1"), "The second chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\s1 The second chapter.\r\n")); @@ -492,7 +465,7 @@ public void GetUsfm_NonVerse_CharStyle() [Test] public void GetUsfm_NonVerse_Paragraph() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:0/8:s"), "The first chapter.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:0/8:s"), "The first chapter.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\s The first chapter.\r\n")); @@ -501,14 +474,14 @@ public void GetUsfm_NonVerse_Paragraph() [Test] public void GetUsfm_NonVerse_Relaxed() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:0/s"), "The first chapter."), new UpdateUsfmRow(ScrRef("MAT 1:1"), "First verse of the first chapter."), new UpdateUsfmRow(ScrRef("MAT 2:0/tr/tc1"), "The first cell of the table."), new UpdateUsfmRow(ScrRef("MAT 2:0/tr/tc2"), "The second cell of the table."), new UpdateUsfmRow(ScrRef("MAT 2:0/tr/tc1"), "The third cell of the table."), - }; + ]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\s The first chapter.\r\n")); @@ -531,10 +504,10 @@ public void GetUsfm_NonVerse_Relaxed() [Test] public void GetUsfm_NonVerse_Sidebar() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:3/1:esb/1:ms"), "The first paragraph of the sidebar."), - }; + ]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\ms The first paragraph of the sidebar.\r\n")); @@ -543,11 +516,11 @@ public void GetUsfm_NonVerse_Sidebar() [Test] public void GetUsfm_NonVerse_Table() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:0/1:tr/1:tc1"), "The first cell of the table."), new UpdateUsfmRow(ScrRef("MAT 2:0/2:tr/1:tc1"), "The third cell of the table."), - }; + ]; string target = UpdateUsfm(rows); Assert.That( @@ -563,10 +536,10 @@ public void GetUsfm_NonVerse_Table() [Test] public void GetUsfm_NonVerse_OptBreak() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 2:3/1:esb/2:p"), "The second paragraph of the sidebar."), - }; + ]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\p The second paragraph of the sidebar.\r\n")); @@ -575,7 +548,7 @@ public void GetUsfm_NonVerse_OptBreak() [Test] public void GetUsfm_NonVerse_Milestone() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 2:7a/1:s"), "A new section header.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 2:7a/1:s"), "A new section header.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\s A new section header. \\ts-s\\*\r\n")); @@ -584,7 +557,7 @@ public void GetUsfm_NonVerse_Milestone() [Test] public void GetUsfm_NonVerse_SkipNote() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.")]; string target = UpdateUsfm(rows, embedBehavior: UpdateUsfmMarkerBehavior.Strip); Assert.That(target, Contains.Substring("\\ip The introductory paragraph.\r\n")); @@ -593,7 +566,7 @@ public void GetUsfm_NonVerse_SkipNote() [Test] public void GetUsfm_NonVerse_ReplaceWithNote() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.")]; string target = UpdateUsfm(rows); Assert.That( @@ -605,10 +578,7 @@ public void GetUsfm_NonVerse_ReplaceWithNote() [Test] public void GetUsfm_Verse_DoubleVaVp() { - var rows = new List - { - new UpdateUsfmRow(ScrRef("MAT 3:1"), "Updating later in the book to start."), - }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 3:1"), "Updating later in the book to start.")]; string target = UpdateUsfm(rows); Assert.That(target, Contains.Substring("\\id MAT - Test\r\n")); @@ -621,7 +591,7 @@ public void GetUsfm_Verse_DoubleVaVp() [Test] public void GetUsfm_Verse_LastSegment() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Updating the last verse.") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Updating the last verse.")]; string usfm = @"\id MAT - Test \c 1 @@ -644,15 +614,15 @@ public void GetUsfm_Verse_LastSegment() [Test] public void GetUsfm_Verse_UpdateRowsBeforeText() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("GEN 1:1"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("GEN 1:2"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("GEN 1:3"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("GEN 1:4"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("GEN 1:5"), "Update rows before the start"), new UpdateUsfmRow(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph."), - }; + ]; string target = UpdateUsfm(rows); Assert.That( @@ -664,11 +634,11 @@ public void GetUsfm_Verse_UpdateRowsBeforeText() [Test] public void GetUsfm_StripParagraphs() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:0/2:p"), "Update Paragraph"), new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update Verse 1"), - }; + ]; string usfm = @"\id MAT - Test @@ -697,7 +667,7 @@ public void GetUsfm_StripParagraphs() AssertUsfmEquals(target, resultP); target = UpdateUsfm(rows, usfm, paragraphBehavior: UpdateUsfmMarkerBehavior.Strip); - string resultS = + string results = @"\id MAT - Test \c 1 \p This is a paragraph before any verses @@ -706,16 +676,16 @@ public void GetUsfm_StripParagraphs() \p \v 2 Hello World "; - AssertUsfmEquals(target, resultS); + AssertUsfmEquals(target, results); } [Test] public void GetUsfm_PreservationRawStrings() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), @"Update all in one row \f \fr 1.1 \ft Some note \f*"), - }; + ]; string usfm = @"\id MAT - Test @@ -735,7 +705,7 @@ public void GetUsfm_PreservationRawStrings() [Test] public void GetUsfm_BeginningOfVerseEmbed() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), @"Updated text") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Updated text")]; string usfm = @"\id MAT - Test @@ -755,7 +725,7 @@ public void GetUsfm_BeginningOfVerseEmbed() [Test] public void CrossReferenceDontUpdate() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1/1:x"), "Update the cross reference") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1/1:x"), "Update the cross reference")]; string usfm = @"\id MAT - Test \c 1 @@ -773,7 +743,7 @@ public void CrossReferenceDontUpdate() [Test] public void PreserveFig() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update")]; string usfm = @"\id MAT - Test \c 1 @@ -791,11 +761,11 @@ public void PreserveFig() [Test] public void NoteExplicitEndMarkers() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update text"), new UpdateUsfmRow(ScrRef("MAT 1:1/1:f"), "Update note"), - }; + ]; string usfm = @"\id MAT - Test \c 1 @@ -821,13 +791,13 @@ public void NoteExplicitEndMarkers() [Test] public void UpdateBlock_Verse_PreserveParas() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse 1 \p inner verse paragraph "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -835,7 +805,7 @@ public void UpdateBlock_Verse_PreserveParas() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -851,13 +821,13 @@ public void UpdateBlock_Verse_PreserveParas() [Test] public void UpdateBlock_Verse_StripParas() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse 1 \p inner verse paragraph "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -865,7 +835,7 @@ public void UpdateBlock_Verse_StripParas() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -881,13 +851,13 @@ public void UpdateBlock_Verse_StripParas() [Test] public void UpdateBlock_Verse_Range() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1-3 verse 1 through 3 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -895,7 +865,7 @@ public void UpdateBlock_Verse_Range() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -909,21 +879,24 @@ public void UpdateBlock_Verse_Range() [Test] public void UpdateBlock_Verse_Range_RightToLeftMarker() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1", "MAT 1:2", "MAT 1:3"), "Update 1-3") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1", "MAT 1:2", "MAT 1:3"), "Update 1-3")]; string usfm = @"\id MAT - Test \c 1 \v 1‏-3 verse 1 through 3 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); string updatedUsfm = UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); string expectedUsfm = @"\id MAT - Test \c 1 \v 1-3 Update 1-3 "; - Assert.That(updatedUsfm, Is.EqualTo(expectedUsfm).IgnoreLineEndings()); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + using (Assert.EnterMultipleScope()) + { + Assert.That(updatedUsfm, Is.EqualTo(expectedUsfm).IgnoreLineEndings()); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); + } UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -937,13 +910,13 @@ public void UpdateBlock_Verse_Range_RightToLeftMarker() [Test] public void UpdateBlock_Footnote_PreserveEmbeds() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse\f \fr 1.1 \ft Some note \f* 1 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -951,7 +924,7 @@ public void UpdateBlock_Footnote_PreserveEmbeds() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -967,13 +940,13 @@ public void UpdateBlock_Footnote_PreserveEmbeds() [Test] public void UpdateBlock_Footnote_StripEmbeds() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse\f \fr 1.1 \ft Some note \f* 1 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -981,7 +954,7 @@ public void UpdateBlock_Footnote_StripEmbeds() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -997,17 +970,17 @@ public void UpdateBlock_Footnote_StripEmbeds() [Test] public void UpdateBlock_NonVerse() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:0/1:s"), "Updated section Header") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:0/1:s"), "Updated section Header")]; string usfm = @"\id MAT - Test \s Section header \c 1 \v 1 verse 1 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(2)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(2)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -1021,13 +994,13 @@ public void UpdateBlock_NonVerse() [Test] public void UpdateBlock_Verse_PreserveStyles() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse \bd 1\bd* "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -1035,7 +1008,7 @@ public void UpdateBlock_Verse_PreserveStyles() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -1053,13 +1026,13 @@ public void UpdateBlock_Verse_PreserveStyles() [Test] public void UpdateBlock_Verse_StripStyles() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 \v 1 verse \bd 1\bd* "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm( rows, usfm, @@ -1067,7 +1040,7 @@ public void UpdateBlock_Verse_StripStyles() usfmUpdateBlockHandlers: [usfmUpdateBlockHandler] ); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(1)); UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0]; AssertUpdateBlockEquals( @@ -1085,7 +1058,7 @@ public void UpdateBlock_Verse_StripStyles() [Test] public void UpdateBlock_Verse_SectionHeader() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 @@ -1095,10 +1068,10 @@ public void UpdateBlock_Verse_SectionHeader() \p \v 2 Verse 2 "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(4)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(4)); AssertUpdateBlockEquals(usfmUpdateBlockHandler.Blocks[0], ["MAT 1:0/1:p"]); AssertUpdateBlockEquals( usfmUpdateBlockHandler.Blocks[1], @@ -1123,7 +1096,7 @@ public void UpdateBlock_Verse_SectionHeader() [Test] public void UpdateBlock_Verse_SectionHeaderInVerse() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 @@ -1132,10 +1105,10 @@ public void UpdateBlock_Verse_SectionHeaderInVerse() \s Section header \p end of verse "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(3)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(3)); AssertUpdateBlockEquals(usfmUpdateBlockHandler.Blocks[0], ["MAT 1:0/1:p"]); AssertUpdateBlockEquals( usfmUpdateBlockHandler.Blocks[1], @@ -1156,7 +1129,7 @@ public void UpdateBlock_Verse_SectionHeaderInVerse() [Test] public void UpdateBlock_NonVerse_ParagraphEndOfVerse() { - var rows = new List { new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1") }; + List rows = [new UpdateUsfmRow(ScrRef("MAT 1:1"), "Update 1")]; string usfm = @"\id MAT - Test \c 1 @@ -1164,10 +1137,10 @@ public void UpdateBlock_NonVerse_ParagraphEndOfVerse() \v 1 Verse 1 \s Section header "; - TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); + var usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler(); UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(3)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(3)); AssertUpdateBlockEquals(usfmUpdateBlockHandler.Blocks[0], ["MAT 1:0/1:p"]); AssertUpdateBlockEquals( usfmUpdateBlockHandler.Blocks[1], @@ -1185,14 +1158,14 @@ public void UpdateBlock_NonVerse_ParagraphEndOfVerse() [Test] public void GetUsfm_HeaderReferenceParagraphs() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), new UpdateUsfmRow(ScrRef("MAT 2:1"), "new verse 1"), new UpdateUsfmRow(ScrRef("MAT 2:2"), "new verse 2"), - }; + ]; string usfm = @"\id MAT @@ -1240,8 +1213,8 @@ public void GetUsfm_HeaderReferenceParagraphs() [Test] public void GetUsfm_OutOfOrderVerses() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), @@ -1252,7 +1225,7 @@ public void GetUsfm_OutOfOrderVerses() new UpdateUsfmRow(ScrRef("MAT 1:6b/1:s"), "new section"), new UpdateUsfmRow(ScrRef("MAT 1:7"), "new verse 7"), new UpdateUsfmRow(ScrRef("MAT 1:8"), "new verse 8"), - }; + ]; string usfm = @"\id MAT @@ -1299,13 +1272,13 @@ public void GetUsfm_OutOfOrderVerses() [Test] public void GetUsfm_DuplicateVerses() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), new UpdateUsfmRow(ScrRef("MAT 1:4"), "new verse 4"), - }; + ]; string usfm = @"\id MAT @@ -1337,14 +1310,14 @@ public void GetUsfm_DuplicateVerses() [Test] public void GetUsfm_IdTags() { - var rows = new List - { + List rows = + [ new UpdateUsfmRow(ScrRef("MAT 1:0/1:s"), "new section header"), new UpdateUsfmRow(ScrRef("MAT 1:1"), "new verse 1"), new UpdateUsfmRow(ScrRef("MAT 1:2"), "new verse 2"), new UpdateUsfmRow(ScrRef("MAT 1:3"), "new verse 3"), new UpdateUsfmRow(ScrRef("MAT 1:4"), "new verse 4"), - }; + ]; string usfm = @"\id MAT @@ -1540,7 +1513,7 @@ public void UpdateBlock_FootnoteInPublishedChapterNumber() "; AssertUsfmEquals(target, result); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(2)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(2)); AssertUpdateBlockEquals( usfmUpdateBlockHandler.Blocks[0], ["ESG 1:0/1:f"], @@ -1582,7 +1555,7 @@ public void UpdateBlock_FootnoteAtStartOfChapterWithPrecedingText() "; AssertUsfmEquals(target, result); - Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(2)); + Assert.That(usfmUpdateBlockHandler.Blocks, Has.Count.EqualTo(2)); AssertUpdateBlockEquals( usfmUpdateBlockHandler.Blocks[0], ["ESG 1:0/1:f"], @@ -1687,10 +1660,7 @@ public void FilterChapters_WithBadChapterReference() AssertUsfmEquals(target, result); } - private static ScriptureRef[] ScrRef(params string[] refs) - { - return refs.Select(r => ScriptureRef.Parse(r)).ToArray(); - } + private static ScriptureRef[] ScrRef(params string[] refs) => [.. refs.Select(r => ScriptureRef.Parse(r))]; private static string UpdateUsfm( IReadOnlyList? rows = null, @@ -1754,12 +1724,10 @@ private static string UpdateUsfm( private static void AssertUsfmEquals(string target, string truth) { Assert.That(target, Is.Not.Null); - string[] target_lines = target.Split(["\n"], StringSplitOptions.None); - string[] truth_lines = truth.Split(["\n"], StringSplitOptions.None); - for (int i = 0; i < truth_lines.Length; i++) - { - Assert.That(target_lines[i].Trim(), Is.EqualTo(truth_lines[i].Trim()), message: $"Line {i}"); - } + string[] targetLines = target.Split('\n'); + string[] truthLines = truth.Split('\n'); + for (int i = 0; i < truthLines.Length; i++) + Assert.That(targetLines[i].Trim(), Is.EqualTo(truthLines[i].Trim()), message: $"Line {i}"); } private static void AssertUpdateBlockEquals( @@ -1769,29 +1737,30 @@ private static void AssertUpdateBlockEquals( ) { IEnumerable parsedExtractedRefs = expectedRefs.Select(r => ScriptureRef.Parse(r)); - Assert.That(block.Refs.SequenceEqual(parsedExtractedRefs)); - Assert.That(block.Elements.Count, Is.EqualTo(expectedElements.Length)); - foreach ( - ( - UsfmUpdateBlockElement element, - (UsfmUpdateBlockElementType expectedType, string expectedUsfm, bool expectedMarkedForRemoval) - ) in block.Elements.Zip(expectedElements) - ) + using (Assert.EnterMultipleScope()) { - Assert.That(element.Type, Is.EqualTo(expectedType)); - Assert.That(string.Join("", element.Tokens.Select(t => t.ToUsfm())), Is.EqualTo(expectedUsfm)); - Assert.That(element.MarkedForRemoval, Is.EqualTo(expectedMarkedForRemoval)); + Assert.That(block.Refs.SequenceEqual(parsedExtractedRefs)); + Assert.That(block.Elements, Has.Count.EqualTo(expectedElements.Length)); + foreach ( + ( + UsfmUpdateBlockElement element, + (UsfmUpdateBlockElementType expectedType, string expectedUsfm, bool expectedMarkedForRemoval) + ) in block.Elements.Zip(expectedElements) + ) + { + Assert.That(element.Type, Is.EqualTo(expectedType)); + Assert.That( + string.Join(string.Empty, element.Tokens.Select(t => t.ToUsfm())), + Is.EqualTo(expectedUsfm) + ); + Assert.That(element.MarkedForRemoval, Is.EqualTo(expectedMarkedForRemoval)); + } } } private class TestUsfmUpdateBlockHandler : IUsfmUpdateBlockHandler { - public List Blocks { get; } - - public TestUsfmUpdateBlockHandler() - { - Blocks = new List(); - } + public List Blocks { get; } = []; public UsfmUpdateBlock ProcessBlock(UsfmUpdateBlock block) {