Skip to content

Commit 68784ea

Browse files
committed
Add versification warnings for invalid chapter or verse numbers in USFM
1 parent 9ef0877 commit 68784ea

2 files changed

Lines changed: 124 additions & 3 deletions

File tree

src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ public enum UsfmVersificationErrorType
1212
ExtraVerse,
1313
InvalidVerseRange,
1414
MissingVerseSegment,
15-
ExtraVerseSegment
15+
ExtraVerseSegment,
16+
InvalidChapterNumber,
17+
InvalidVerseNumber
1618
}
1719

1820
public class UsfmVersificationError
@@ -22,6 +24,7 @@ public class UsfmVersificationError
2224
private readonly int _expectedVerse;
2325
private readonly int _actualChapter;
2426
private readonly int _actualVerse;
27+
private readonly string _actualValue;
2528
private VerseRef? _verseRef = null;
2629

2730
public UsfmVersificationError(
@@ -43,6 +46,21 @@ public UsfmVersificationError(
4346
ProjectName = projectName;
4447
}
4548

49+
public UsfmVersificationError(
50+
int bookNum,
51+
int expectedChapter,
52+
string actualValue,
53+
string projectName,
54+
UsfmVersificationErrorType type
55+
)
56+
{
57+
_bookNum = bookNum;
58+
_expectedChapter = expectedChapter;
59+
_actualValue = actualValue;
60+
ProjectName = projectName;
61+
Type = type;
62+
}
63+
4664
public string ProjectName { get; private set; }
4765

4866
public UsfmVersificationErrorType Type { get; private set; }
@@ -104,8 +122,14 @@ public string ExpectedVerseRef
104122
{
105123
get
106124
{
107-
if (Type == UsfmVersificationErrorType.ExtraVerse)
125+
if (
126+
Type == UsfmVersificationErrorType.ExtraVerse
127+
|| Type == UsfmVersificationErrorType.InvalidChapterNumber
128+
|| Type == UsfmVersificationErrorType.InvalidVerseNumber
129+
)
130+
{
108131
return "";
132+
}
109133

110134
// We do not want to throw an exception here, and the VerseRef constructor can throw
111135
// an exception with certain invalid verse data; use TryParse instead.
@@ -154,11 +178,20 @@ out VerseRef correctedVerseRangeRef
154178
return defaultVerseRef.ToString();
155179
}
156180
}
181+
157182
public string ActualVerseRef
158183
{
159184
get
160185
{
161-
if (_verseRef != null)
186+
if (Type == UsfmVersificationErrorType.InvalidChapterNumber)
187+
{
188+
return $"{Canon.BookNumberToId(_bookNum)} {_actualValue}";
189+
}
190+
else if (Type == UsfmVersificationErrorType.InvalidVerseNumber)
191+
{
192+
return $"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_actualValue}";
193+
}
194+
else if (_verseRef != null)
162195
{
163196
return _verseRef.ToString();
164197
}
@@ -254,6 +287,22 @@ string pubNumber
254287

255288
_currentChapter = state.VerseRef.ChapterNum;
256289
_currentVerse = new VerseRef();
290+
291+
// See whether the chapter number is invalid
292+
VerseRef verseRef = state.VerseRef.Clone();
293+
verseRef.Chapter = number;
294+
if (verseRef.ChapterNum == -1)
295+
{
296+
_errors.Add(
297+
new UsfmVersificationError(
298+
_currentBook,
299+
_currentChapter,
300+
number,
301+
_projectName,
302+
UsfmVersificationErrorType.InvalidChapterNumber
303+
)
304+
);
305+
}
257306
}
258307

259308
public override void Verse(
@@ -264,6 +313,7 @@ public override void Verse(
264313
string pubNumber
265314
)
266315
{
316+
bool verseInError = false;
267317
_currentVerse = state.VerseRef;
268318
if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0)
269319
{
@@ -277,7 +327,29 @@ string pubNumber
277327
_currentVerse
278328
);
279329
if (versificationError.CheckError())
330+
{
280331
_errors.Add(versificationError);
332+
verseInError = true;
333+
}
334+
}
335+
336+
if (!verseInError)
337+
{
338+
// See whether the verse number is invalid
339+
VerseRef verseRef = _currentVerse.Clone();
340+
verseRef.Verse = number;
341+
if (verseRef.VerseNum == -1)
342+
{
343+
_errors.Add(
344+
new UsfmVersificationError(
345+
_currentBook,
346+
_currentChapter,
347+
number,
348+
_projectName,
349+
UsfmVersificationErrorType.InvalidVerseNumber
350+
)
351+
);
352+
}
281353
}
282354
}
283355
}

tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,55 @@ public void GetUsfmVersificationErrors_MultipleChapters()
397397
Assert.That(errors[1].ActualVerseRef, Is.EqualTo("2JN 2:1"));
398398
}
399399

400+
[Test]
401+
public void GetUsfmVersificationErrors_InvalidChapterNumber()
402+
{
403+
var env = new TestEnvironment(
404+
files: new Dictionary<string, string>()
405+
{
406+
{
407+
"653JNTest.SFM",
408+
@"\id 3JN
409+
\c 1.
410+
"
411+
}
412+
}
413+
);
414+
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
415+
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
416+
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidChapterNumber));
417+
Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter));
418+
Assert.That(errors[0].ExpectedVerseRef, Is.Empty);
419+
Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
420+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1."));
421+
Assert.That(errors[1].ActualVerseRef, Is.EqualTo("3JN -1:0"));
422+
}
423+
424+
[Test]
425+
public void GetUsfmVersificationErrors_InvalidVerseNumber()
426+
{
427+
var env = new TestEnvironment(
428+
files: new Dictionary<string, string>()
429+
{
430+
{
431+
"653JNTest.SFM",
432+
@"\id 3JN
433+
\c 1
434+
\v v1
435+
"
436+
}
437+
}
438+
);
439+
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
440+
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
441+
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseNumber));
442+
Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
443+
Assert.That(errors[0].ExpectedVerseRef, Is.Empty);
444+
Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
445+
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:v1"));
446+
Assert.That(errors[1].ActualVerseRef, Is.EqualTo("3JN 1:0"));
447+
}
448+
400449
private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary<string, string>? files = null)
401450
{
402451
public ParatextProjectVersificationErrorDetectorBase Detector { get; } =

0 commit comments

Comments
 (0)