Skip to content

Commit 44d11a0

Browse files
authored
Remove RTL markers from verse token data (#378)
1 parent 7e848e3 commit 44d11a0

3 files changed

Lines changed: 74 additions & 0 deletions

File tree

src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,11 @@ private void CollectUpdatableTokens(UsfmParserState state)
518518
while (_tokenIndex <= state.Index + state.SpecialTokenCount)
519519
{
520520
UsfmToken token = state.Tokens[_tokenIndex];
521+
if (token.Type == UsfmTokenType.Verse)
522+
{
523+
string sanitizedVerseData = SanitizeVerseData(token.Data);
524+
token = new UsfmToken(token.Type, token.Marker, token.Text, token.EndMarker, sanitizedVerseData);
525+
}
521526
if (CurrentTextType == ScriptureTextType.Embed)
522527
{
523528
_embedTokens.Add(token);
@@ -752,6 +757,11 @@ private void UpdateVerseRows()
752757
}
753758
}
754759

760+
private static string SanitizeVerseData(string verseData)
761+
{
762+
return verseData.Replace("\u200F", "");
763+
}
764+
755765
private class RowInfo
756766
{
757767
public RowInfo(int rowIndex)

tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,34 @@ public void UpdateBlock_Verse_Range()
906906
);
907907
}
908908

909+
[Test]
910+
public void UpdateBlock_Verse_Range_RightToLeftMarker()
911+
{
912+
var rows = new List<UpdateUsfmRow> { new UpdateUsfmRow(ScrRef("MAT 1:1", "MAT 1:2", "MAT 1:3"), "Update 1-3") };
913+
string usfm =
914+
@"\id MAT - Test
915+
\c 1
916+
\v 1‏-3 verse 1 through 3
917+
";
918+
TestUsfmUpdateBlockHandler usfmUpdateBlockHandler = new TestUsfmUpdateBlockHandler();
919+
string updatedUsfm = UpdateUsfm(rows, usfm, usfmUpdateBlockHandlers: [usfmUpdateBlockHandler]);
920+
string expectedUsfm =
921+
@"\id MAT - Test
922+
\c 1
923+
\v 1-3 Update 1-3
924+
";
925+
Assert.That(updatedUsfm, Is.EqualTo(expectedUsfm).IgnoreLineEndings());
926+
Assert.That(usfmUpdateBlockHandler.Blocks.Count, Is.EqualTo(1));
927+
928+
UsfmUpdateBlock usfmUpdateBlock = usfmUpdateBlockHandler.Blocks[0];
929+
AssertUpdateBlockEquals(
930+
usfmUpdateBlock,
931+
["MAT 1:1", "MAT 1:2", "MAT 1:3"],
932+
(UsfmUpdateBlockElementType.Text, "Update 1-3 ", false),
933+
(UsfmUpdateBlockElementType.Text, "verse 1 through 3 ", true)
934+
);
935+
}
936+
909937
[Test]
910938
public void UpdateBlock_Footnote_PreserveEmbeds()
911939
{

tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,42 @@ public void GetRows_PrivateUseMarker()
425425
});
426426
}
427427

428+
[Test]
429+
public void GetRows_VerseRangeWithRightToLeftMarker()
430+
{
431+
TextRow[] rows = GetRows(
432+
@"\id MAT - Test
433+
\h
434+
\mt
435+
\c 1
436+
\v 1"
437+
+ "\u200f"
438+
+ @"-2 Verse one and two.
439+
"
440+
);
441+
442+
Assert.Multiple(() =>
443+
{
444+
Assert.That(rows, Has.Length.EqualTo(2));
445+
446+
Assert.That(
447+
rows[0].Ref,
448+
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
449+
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
450+
);
451+
Assert.That(
452+
rows[0].Text,
453+
Is.EqualTo("Verse one and two."),
454+
string.Join(",", rows.ToList().Select(tr => tr.Text))
455+
);
456+
Assert.That(
457+
rows[1].Ref,
458+
Is.EqualTo(ScriptureRef.Parse("MAT 1:2")),
459+
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
460+
);
461+
});
462+
}
463+
428464
[Test]
429465
public void GetRows_NonLatinVerseNumber()
430466
{

0 commit comments

Comments
 (0)