diff --git a/src/SIL.Machine/Corpora/CorporaExtensions.cs b/src/SIL.Machine/Corpora/CorporaExtensions.cs index e757c297..2cf7ca98 100644 --- a/src/SIL.Machine/Corpora/CorporaExtensions.cs +++ b/src/SIL.Machine/Corpora/CorporaExtensions.cs @@ -289,10 +289,7 @@ public static ITextCorpus Flatten(this IEnumerable corpora) continue; VerseRef vref = scriptureRef.VerseRef; - if ( - curRef.HasValue - && vref.CompareTo(curRef.Value, null, compareAllVerses: true, compareSegments: false) != 0 - ) + if (curRef.HasValue && VerseRefComparer.IgnoreSegments.Compare(vref, (VerseRef)curRef) != 0) { yield return (curTrgLineRange ? "" : curTrgLine.ToString(), curRef.Value, curTrgRef.Value); curTrgLineRange = curTrgLineRange || curTrgLine.Length > 0; diff --git a/src/SIL.Machine/Corpora/ScriptureRef.cs b/src/SIL.Machine/Corpora/ScriptureRef.cs index c92121a8..666fbe78 100644 --- a/src/SIL.Machine/Corpora/ScriptureRef.cs +++ b/src/SIL.Machine/Corpora/ScriptureRef.cs @@ -88,7 +88,7 @@ public ScriptureRef ToRelaxed() public ScriptureRef ChangeVersification(ScrVers versification) { VerseRef vr = VerseRef.Clone(); - vr.ChangeVersification(versification); + vr = vr.ChangeVersificationWithSegments(versification); return new ScriptureRef(vr, Path); } diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index a5687439..8010affd 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -776,7 +776,7 @@ private void UpdateVerseRows() // We are using a dictionary, which uses an equality comparer. As a result, we need to change the // source verse ref to use the row versification. If we used a SortedList, it wouldn't be necessary, but it // would be less efficient. - vref.ChangeVersification(_updateRowsVersification); + vref = vref.ChangeVersificationWithSegments(_updateRowsVersification); _verseRows.Clear(); _verseRowIndex = 0; diff --git a/src/SIL.Machine/Corpora/VerseRefComparer.cs b/src/SIL.Machine/Corpora/VerseRefComparer.cs index c0aea648..5996e59d 100644 --- a/src/SIL.Machine/Corpora/VerseRefComparer.cs +++ b/src/SIL.Machine/Corpora/VerseRefComparer.cs @@ -20,13 +20,15 @@ public VerseRefComparer(bool compareSegments = true) public int Compare(VerseRef x, VerseRef y) { + y = y.ChangeVersificationWithSegments(x.Versification); + if (!x.HasMultiple && !y.HasMultiple) + { return x.CompareTo(y, null, compareAllVerses: false, _compareSegments); + } // Correctly implement comparing all verses in a range or sequence. The implementation of // VerseRef.CompareTo() that compares all verses does not handle segments correctly. - if (x.Versification != y.Versification) - y.ChangeVersification(x.Versification); VerseRef[] xArray = x.AllVerses().ToArray(); VerseRef[] yArray = y.AllVerses().ToArray(); foreach ((VerseRef sx, VerseRef sy) in xArray.Zip(yArray)) diff --git a/src/SIL.Machine/Corpora/VerseRefExtensions.cs b/src/SIL.Machine/Corpora/VerseRefExtensions.cs new file mode 100644 index 00000000..ff92e2ff --- /dev/null +++ b/src/SIL.Machine/Corpora/VerseRefExtensions.cs @@ -0,0 +1,57 @@ +using System.Collections.Generic; +using System.Linq; +using SIL.Scripture; + +namespace SIL.Machine.Corpora +{ + public static class VerseRefExtensions + { + public static VerseRef RemoveSegments(this VerseRef verseRef) + { + if (string.IsNullOrEmpty(verseRef.Segment())) + { + return verseRef.Clone(); + } + try + { + return new VerseRef( + $"{verseRef.Book} {verseRef.ChapterNum}:{string.Join(",", verseRef.AllVerses().Select(vr => vr.VerseNum).ToArray())}", + verseRef.Versification + ); + } + catch (VerseRefException) + { + VerseRef newVerseRef = verseRef.Clone(); + newVerseRef.Simplify(); + return newVerseRef; + } + } + + public static VerseRef ChangeVersificationWithSegments(this VerseRef verseRef, ScrVers versification) + { + VerseRef vr = verseRef.Clone(); + vr.ChangeVersification(versification); + if (string.IsNullOrEmpty(vr.Segment())) + return vr; + VerseRef verseRefWithoutSegments = verseRef.RemoveSegments(); + verseRefWithoutSegments.ChangeVersification(versification); + if (!verseRefWithoutSegments.Equals(vr.RemoveSegments())) + { + IEnumerable verses = verseRef + .AllVerses() + .Zip( + verseRefWithoutSegments.AllVerses(), + (verseWithSegments, verseWithCorrectNumber) => (verseWithSegments, verseWithCorrectNumber) + ) + .Select( + (verseTuple) => verseTuple.verseWithCorrectNumber.Verse + verseTuple.verseWithSegments.Segment() + ); + return new VerseRef( + $"{verseRefWithoutSegments.Book} {verseRefWithoutSegments.ChapterNum}:{string.Join(",", verses)}", + versification + ); + } + return vr; + } + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs index 47227b7d..45f6a40b 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs @@ -832,7 +832,7 @@ public void GetRows_SameRefMiddleManyToMany() } [Test] - public void GetGetRows_SameRefMiddleOneToMany() + public void GetRows_SameRefMiddleOneToMany() { var sourceCorpus = new DictionaryTextCorpus( new MemoryText( @@ -872,7 +872,7 @@ public void GetGetRows_SameRefMiddleOneToMany() } [Test] - public void GetGetRows_SameRefMiddleManyToOne() + public void GetRows_SameRefMiddleManyToOne() { var sourceCorpus = new DictionaryTextCorpus( new MemoryText( @@ -912,7 +912,7 @@ public void GetGetRows_SameRefMiddleManyToOne() } [Test] - public void GetGetRows_SameRefLastOneToMany() + public void GetRows_SameRefLastOneToMany() { var sourceCorpus = new DictionaryTextCorpus( new MemoryText( @@ -947,7 +947,7 @@ public void GetGetRows_SameRefLastOneToMany() } [Test] - public void GetGetRows_SameRefLastManyToOne() + public void GetRows_SameRefLastManyToOne() { var sourceCorpus = new DictionaryTextCorpus( new MemoryText( @@ -982,7 +982,7 @@ public void GetGetRows_SameRefLastManyToOne() } [Test] - public void GetGetRows_SameVerseRefOneToMany() + public void GetRows_SameVerseRefOneToMany() { Versification.Table.Implementation.RemoveAllUnknownVersifications(); string src = "&MAT 1:2-3 = MAT 1:2\nMAT 1:4 = MAT 1:3\n"; @@ -1049,7 +1049,7 @@ public void GetGetRows_SameVerseRefOneToMany() } [Test] - public void GetGetRows_VerseRefOutOfOrder() + public void GetRows_VerseRefOutOfOrder() { Versification.Table.Implementation.RemoveAllUnknownVersifications(); string src = "&MAT 1:4-5 = MAT 1:4\nMAT 1:2 = MAT 1:3\nMAT 1:3 = MAT 1:2\n"; @@ -1123,6 +1123,121 @@ public void GetGetRows_VerseRefOutOfOrder() ); } + [Test] + public void GetRows_DifferentVersificationsWithVerseSegments() + { + Versification.Table.Implementation.RemoveAllUnknownVersifications(); + var sourceCorpus = new DictionaryTextCorpus( + new MemoryText( + "NUM", + new[] + { + TextRow( + "NUM", + ScriptureRef.Parse("NUM 17:1a", ScrVers.Original), + "source chapter seventeen, verse one a ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 17:1b", ScrVers.Original), + "source chapter seventeen, verse one b ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 17:2", ScrVers.Original), + "source chapter seventeen, verse two ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 17:3", ScrVers.Original), + "source chapter seventeen, verse three ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 17:4", ScrVers.Original), + "source chapter seventeen, verse four ." + ), + } + ) + ) + { + Versification = ScrVers.Original, + }; + var targetCorpus = new DictionaryTextCorpus( + new MemoryText( + "NUM", + new[] + { + TextRow( + "NUM", + ScriptureRef.Parse("NUM 16:36", ScrVers.English), + "target chapter sixteen, verse thirty six ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 16:37", ScrVers.English), + "target chapter sixteen, verse thirty seven ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 16:38", ScrVers.English), + "target chapter sixteen, verse thirty eight ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 16:39a", ScrVers.English), + "target chapter sixteen, verse thirty nine a ." + ), + TextRow( + "NUM", + ScriptureRef.Parse("NUM 16:39b", ScrVers.English), + "target chapter sixteen, verse thirty nine b ." + ), + } + ) + ) + { + Versification = ScrVers.English, + }; + + // English vs. Original + // NUM 16:36-50 = NUM 17:1-15 + // NUM 17:1-13 = NUM 17:16-28 + var parallelCorpus = new ParallelTextCorpus(sourceCorpus, targetCorpus); + ParallelTextRow[] rows = parallelCorpus.ToArray(); + Assert.That(rows.Length, Is.EqualTo(6)); + + Assert.That(rows[0].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:1a", ScrVers.Original)])); + Assert.That(rows[0].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:36", ScrVers.English)])); + Assert.That(rows[0].SourceSegment, Is.EqualTo("source chapter seventeen, verse one a .".Split())); + Assert.That(rows[0].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); + + Assert.That(rows[1].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:1b", ScrVers.Original)])); + Assert.That(rows[1].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:36", ScrVers.English)])); + Assert.That(rows[1].SourceSegment, Is.EqualTo("source chapter seventeen, verse one b .".Split())); + Assert.That(rows[1].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty six .".Split())); + + Assert.That(rows[2].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:2", ScrVers.Original)])); + Assert.That(rows[2].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:37", ScrVers.English)])); + Assert.That(rows[2].SourceSegment, Is.EqualTo("source chapter seventeen, verse two .".Split())); + Assert.That(rows[2].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty seven .".Split())); + + Assert.That(rows[3].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:3", ScrVers.Original)])); + Assert.That(rows[3].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:38", ScrVers.English)])); + Assert.That(rows[3].SourceSegment, Is.EqualTo("source chapter seventeen, verse three .".Split())); + Assert.That(rows[3].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty eight .".Split())); + + Assert.That(rows[4].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); + Assert.That(rows[4].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39a", ScrVers.English)])); + Assert.That(rows[4].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); + Assert.That(rows[4].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine a .".Split())); + + Assert.That(rows[5].SourceRefs, Is.EqualTo([ScriptureRef.Parse("NUM 17:4", ScrVers.Original)])); + Assert.That(rows[5].TargetRefs, Is.EqualTo([ScriptureRef.Parse("NUM 16:39b", ScrVers.English)])); + Assert.That(rows[5].SourceSegment, Is.EqualTo("source chapter seventeen, verse four .".Split())); + Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split())); + } + [Test] public void Count_NoRows() { diff --git a/tests/SIL.Machine.Tests/Corpora/VerseRefExtensionsTests.cs b/tests/SIL.Machine.Tests/Corpora/VerseRefExtensionsTests.cs new file mode 100644 index 00000000..e4d71e17 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/VerseRefExtensionsTests.cs @@ -0,0 +1,65 @@ +using NUnit.Framework; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +[TestFixture] +public class VerseRefExtensionsTests +{ + [Test] + [TestCase("MAT 1:1", "MAT 1:1")] + [TestCase("MAT 1:1a", "MAT 1:1")] + [TestCase("MAT 1:1a-2b,5a", "MAT 1:1,2,5")] + [TestCase("MAT 1:1a-3b", "MAT 1:1,2,3")] + public void RemoveSegments(string verseRefStrWithSegments, string verseRefStrWithoutSegments) + { + var verseRef = new VerseRef(verseRefStrWithSegments, ScrVers.English); + var result = verseRef.RemoveSegments(); + Assert.That(result.ToString(), Is.EqualTo(verseRefStrWithoutSegments)); + } + + [Test] + public void ChangeVersificationWithSegments() + { + // English vs. Original + // NUM 16:36-50 = NUM 17:1-15 + // NUM 17:1-13 = NUM 17:16-28 + // ESG 1:1 = ESG 1:1a + // ESG 1:2 = ESG 1:1b + + VerseRef verseRef = new VerseRef("NUM 17:1", ScrVers.English); + VerseRef result = verseRef.ChangeVersificationWithSegments(ScrVers.Original); + Assert.That(result.Versification, Is.EqualTo(ScrVers.Original)); + Assert.That(result.ToString(), Is.EqualTo("NUM 17:16")); + + verseRef = new VerseRef("NUM 17:1a", ScrVers.English); + result = verseRef.ChangeVersificationWithSegments(ScrVers.Original); + Assert.That(result.Versification, Is.EqualTo(ScrVers.Original)); + Assert.That(result.ToString(), Is.EqualTo("NUM 17:16a")); + + verseRef = new VerseRef("NUM 17:1a-2b,5a", ScrVers.English); + result = verseRef.ChangeVersificationWithSegments(ScrVers.Original); + Assert.That(result.Versification, Is.EqualTo(ScrVers.Original)); + Assert.That(result.ToString(), Is.EqualTo("NUM 17:16a,17b,20a")); + + verseRef = new VerseRef("NUM 17:13a-15a", ScrVers.Original); + result = verseRef.ChangeVersificationWithSegments(ScrVers.English); + Assert.That(result.Versification, Is.EqualTo(ScrVers.English)); + Assert.That(result.ToString(), Is.EqualTo("NUM 16:48a,49,50a")); + + verseRef = new VerseRef("NUM 17:1a", ScrVers.English); + result = verseRef.ChangeVersificationWithSegments(ScrVers.English); + Assert.That(result.Versification, Is.EqualTo(ScrVers.English)); + Assert.That(result.ToString(), Is.EqualTo("NUM 17:1a")); + + verseRef = new VerseRef("ESG 1:1b", ScrVers.Original); + result = verseRef.ChangeVersificationWithSegments(ScrVers.English); + Assert.That(result.Versification, Is.EqualTo(ScrVers.English)); + Assert.That(result.ToString(), Is.EqualTo("ESG 1:2")); + + verseRef = new VerseRef("ESG 1:2", ScrVers.English); + result = verseRef.ChangeVersificationWithSegments(ScrVers.Original); + Assert.That(result.Versification, Is.EqualTo(ScrVers.Original)); + Assert.That(result.ToString(), Is.EqualTo("ESG 1:1b")); + } +}