Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dbcdf38
Converted existing class comments into xml comments.
Jim-Johnson-Programmer Jan 13, 2026
33f59a4
Converted class comment into xml comment.
Jim-Johnson-Programmer Jan 13, 2026
5ca0766
Added xml comments to shingle base class.
Jim-Johnson-Programmer Jan 13, 2026
9427521
Converted class comment to xml summary comment.
Jim-Johnson-Programmer Jan 13, 2026
343e403
Added missing comments.
Jim-Johnson-Programmer Jan 13, 2026
d5e08bb
Remaining classes have xml documentation added on all items with publ…
Jim-Johnson-Programmer Jan 13, 2026
822ad87
Interfaces have interface and method level comments added.
Jim-Johnson-Programmer Jan 13, 2026
1c8fa2a
Extension method classes have xml comments added for documentation
Jim-Johnson-Programmer Jan 13, 2026
666770b
Moved summary xml tag to line separate from C# code per copilot pr re…
Jim-Johnson-Programmer Jan 14, 2026
49b04e8
Removed extra spaces in xml comment per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
670de6c
Fixed comment spelling error per copilot requirement.
Jim-Johnson-Programmer Jan 14, 2026
c2b0cbb
Fixed spacing/wrap issue in comment per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
17d35f1
Moved xml comment to separate line per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
b2de2f2
Spacing issue resolved per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
4bbe901
Fixed spacing/wrap issue per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
510d864
Added space extra space row for formatting.
Jim-Johnson-Programmer Jan 14, 2026
3025b07
Resolved spacing issues per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
3d3f72f
Resolved spacing issue per copilot pr requirement.
Jim-Johnson-Programmer Jan 14, 2026
0089199
Made grammar update per copilot pr requirement.
Jim-Johnson-Programmer Jan 15, 2026
02875ae
Made grammar update per copilot pr requirement.
Jim-Johnson-Programmer Jan 15, 2026
df041a2
Made grammar update per copilot pr requirement.
Jim-Johnson-Programmer Jan 15, 2026
e59d1df
Made grammar update per copilot pr requirement.
Jim-Johnson-Programmer Jan 15, 2026
3dccf81
Parameters for method were previously overlooked in xml comments. Ad…
Jim-Johnson-Programmer Jan 15, 2026
91427a2
Added parameters to method that were overlooked.
Jim-Johnson-Programmer Jan 15, 2026
1c0dff0
Applying missing period suggestion.
Jim-Johnson-Programmer Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/F23.StringSimilarity/Damerau.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ public class Damerau : IMetricStringDistance, IMetricSpanDistance
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());

/// <summary>
/// Calculates the Damerau-Levenshtein distance between two sequences.
/// </summary>
/// <remarks>The Damerau-Levenshtein distance is a metric for measuring the edit distance between
/// two sequences, allowing for the following operations: <list type="bullet"> <item><description>Insertion of a
/// single element.</description></item> <item><description>Deletion of a single element.</description></item>
/// <item><description>Substitution of one element for another.</description></item>
/// <item><description>Transposition of two adjacent elements.</description></item> </list> This method is
/// case-sensitive for sequences of strings or characters.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be <see langword="null"/>.</param>
/// <param name="s2">The second sequence to compare. Cannot be <see langword="null"/>.</param>
/// <returns>The Damerau-Levenshtein distance between the two sequences, which represents the minimum number of operations
/// (insertions, deletions, substitutions, or transpositions) required to transform one sequence into the other.
/// Returns 0 if the sequences are equal.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is <see langword="null"/>.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
Expand Down
2 changes: 2 additions & 0 deletions src/F23.StringSimilarity/ICharacterSubstitution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@

namespace F23.StringSimilarity
{
/// <summary>
/// Used to indicate the cost of character substitution.
///
/// Cost should always be in [0.0 .. 1.0]
/// For example, in an OCR application, cost('o', 'a') could be 0.4
/// In a checkspelling application, cost('u', 'i') could be 0.4 because these are
/// next to each other on the keyboard...
/// </summary>
public interface ICharacterSubstitution
{
/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// An interface for normalized distance measures that operate on spans.
/// </summary>
public interface INormalizedSpanDistance : ISpanDistance
{
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Defines a contract for calculating the similarity between spans of text, normalized to a range of 0 to 1.
/// </summary>
/// <remarks>This interface extends <see cref="ISpanSimilarity"/> by ensuring that similarity scores are
/// normalized. A score of 0 indicates no similarity, while a score of 1 indicates identical spans.</remarks>
public interface INormalizedSpanSimilarity : ISpanSimilarity
{
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Interface for normalized string distance algorithms.
/// </summary>
public interface INormalizedStringDistance : IStringDistance
{
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Interface for normalized string similarity algorithms.
/// </summary>
public interface INormalizedStringSimilarity : IStringSimilarity
{
}
Expand Down
3 changes: 3 additions & 0 deletions src/F23.StringSimilarity/Interfaces/ISpanDistance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// An interface for distance measures that operate on spans.
/// </summary>
public interface ISpanDistance
{
/// <summary>
Expand Down
3 changes: 3 additions & 0 deletions src/F23.StringSimilarity/Interfaces/ISpanSimilarity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Interface for span similarity algorithms.
/// </summary>
public interface ISpanSimilarity
{
/// <summary>
Expand Down
3 changes: 3 additions & 0 deletions src/F23.StringSimilarity/Interfaces/IStringDistance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Interface for string distance algorithms.
/// </summary>
public interface IStringDistance
{
/// <summary>
Expand Down
3 changes: 3 additions & 0 deletions src/F23.StringSimilarity/Interfaces/IStringSimilarity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

namespace F23.StringSimilarity.Interfaces
{
/// <summary>
/// Interface for string similarity algorithms.
/// </summary>
public interface IStringSimilarity
{
/// <summary>
Expand Down
34 changes: 29 additions & 5 deletions src/F23.StringSimilarity/JaroWinkler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

namespace F23.StringSimilarity
{
/// <summary>
/// The Jaro–Winkler distance metric is designed and best suited for short
/// strings such as person names, and to detect typos; it is (roughly) a
/// variation of Damerau-Levenshtein, where the substitution of 2 close
Expand All @@ -39,6 +40,7 @@ namespace F23.StringSimilarity
/// Jaro-Winkler was developed in the area of record linkage (duplicate
/// detection) (Winkler, 1990). It returns a value in the interval [0.0, 1.0].
/// The distance is computed as 1 - Jaro-Winkler similarity.
/// </summary>
public class JaroWinkler : INormalizedStringSimilarity, INormalizedStringDistance, INormalizedSpanSimilarity, INormalizedSpanDistance
{
private const double DEFAULT_THRESHOLD = 0.7;
Expand All @@ -49,15 +51,15 @@ public class JaroWinkler : INormalizedStringSimilarity, INormalizedStringDistanc
/// The current value of the threshold used for adding the Winkler bonus. The default value is 0.7.
/// </summary>
private double Threshold { get; }

/// <summary>
/// Creates a new instance with default threshold (0.7)
/// </summary>
public JaroWinkler()
{
Threshold = DEFAULT_THRESHOLD;
}

/// <summary>
/// Creates a new instance with given threshold to determine when Winkler bonus should
/// be used. Set threshold to a negative value to get the Jaro distance.
Expand All @@ -77,13 +79,25 @@ public JaroWinkler(double threshold)
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Similarity(string s1, string s2)
=> Similarity(s1.AsSpan(), s2.AsSpan());


/// <summary>
/// Calculates the similarity between two sequences using the Jaro-Winkler distance metric.
/// </summary>
/// <remarks>The similarity is calculated using the Jaro-Winkler distance, which is a measure of
/// similarity between two sequences. The result is adjusted based on common prefixes to give higher scores to
/// sequences that share a common prefix.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be null.</param>
/// <param name="s2">The second sequence to compare. Cannot be null.</param>
/// <returns>A value between 0 and 1 representing the similarity between the two sequences, where 1 indicates identical
/// sequences and 0 indicates no similarity.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is null.</exception>
public double Similarity<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
if (s1 == null)
{
throw new ArgumentNullException(nameof(s1));
throw new ArgumentNullException(nameof(s1));
}

if (s2 == null)
Expand Down Expand Up @@ -122,7 +136,17 @@ public double Similarity<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2)
=> 1.0 - Similarity(s1, s2);


/// <summary>
/// Calculates the distance between two sequences based on their similarity.
/// </summary>
/// <remarks>The distance is calculated as the complement of the similarity between the two
/// sequences.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare.</param>
/// <param name="s2">The second sequence to compare.</param>
/// <returns>A double value representing the distance between the two sequences. The value ranges from 0.0 to 1.0, where
/// 0.0 indicates identical sequences and 1.0 indicates completely dissimilar sequences.</returns>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
=> 1.0 - Similarity(s1, s2);
Expand Down
31 changes: 29 additions & 2 deletions src/F23.StringSimilarity/Levenshtein.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@

namespace F23.StringSimilarity
{
/// <summary>
/// The Levenshtein distance between two words is the Minimum number of
/// single-character edits (insertions, deletions or substitutions) required to
/// change one string into the other.
/// </summary>
public class Levenshtein : IMetricStringDistance, IMetricSpanDistance
{
/// <summary>
Expand Down Expand Up @@ -73,11 +75,36 @@ public class Levenshtein : IMetricStringDistance, IMetricSpanDistance
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2, int limit)
=> Distance(s1.AsSpan(), s2.AsSpan(), limit);


/// <summary>
/// Calculates the distance between two sequences of elements.
/// </summary>
/// <remarks>This method uses a default maximum threshold for the distance calculation. For custom
/// thresholds, use an overload that accepts a threshold parameter.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare.</param>
/// <param name="s2">The second sequence to compare.</param>
/// <returns>A <see cref="double"/> representing the distance between the two sequences. The specific meaning of the
/// distance depends on the implementation of the comparison logic.</returns>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
=> Distance(s1, s2, int.MaxValue);


/// <summary>
/// Calculates the edit distance (Levenshtein distance) between two sequences, with an optional upper limit.
/// </summary>
/// <remarks>The edit distance is a measure of the minimum number of single-element edits
/// (insertions, deletions, or substitutions) required to transform one sequence into the other. This method is
/// optimized to stop processing early if the distance exceeds the specified <paramref name="limit"/>.</remarks>
/// <typeparam name="T">The type of elements in the sequences. The type must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be null.</param>
/// <param name="s2">The second sequence to compare. Cannot be null.</param>
/// <param name="limit">The maximum distance to calculate. If the edit distance exceeds this value, the method returns <paramref
/// name="limit"/>.</param>
/// <returns>The edit distance between <paramref name="s1"/> and <paramref name="s2"/>. If the sequences are identical,
/// the result is 0. If the distance exceeds <paramref name="limit"/>, the method returns <paramref
/// name="limit"/>.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is null.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2, int limit)
where T : IEquatable<T>
{
Expand Down
17 changes: 15 additions & 2 deletions src/F23.StringSimilarity/LongestCommonSubsequence.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

namespace F23.StringSimilarity
{
/// <summary>
/// The longest common subsequence (LCS) problem consists in finding the longest
/// subsequence common to two (or more) sequences. It differs from problems of
/// finding common substrings: unlike substrings, subsequences are not required
Expand All @@ -44,6 +45,7 @@ namespace F23.StringSimilarity
///
/// ! This class currently implements the dynamic programming approach, which has
/// a space requirement O(m * n)!
/// </summary>
public class LongestCommonSubsequence : IStringDistance, ISpanDistance
{
/// <summary>
Expand All @@ -59,7 +61,18 @@ public class LongestCommonSubsequence : IStringDistance, ISpanDistance
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());


/// <summary>
/// Calculates the distance between two sequences based on their similarity.
/// </summary>
/// <remarks>The distance is calculated as the sum of the lengths of the two sequences minus twice
/// the length of their longest common subsequence.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be empty or null.</param>
/// <param name="s2">The second sequence to compare. Cannot be empty or null.</param>
/// <returns>A non-negative <see cref="double"/> representing the distance between the two sequences. Returns 0 if the
/// sequences are identical.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is <see langword="null"/>.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
Expand Down Expand Up @@ -91,7 +104,7 @@ public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public int Length(string s1, string s2)
=> Length(s1.AsSpan(), s2.AsSpan());

internal static int Length<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
Expand Down
15 changes: 14 additions & 1 deletion src/F23.StringSimilarity/MetricLCS.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,20 @@ public class MetricLCS : IMetricStringDistance, INormalizedStringDistance, IMetr
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());


/// <summary>
/// Calculates the normalized distance between two sequences based on their longest common subsequence.
/// </summary>
/// <remarks>The distance is calculated as: <code> 1.0 - (Length of Longest Common Subsequence /
/// Maximum Length of the Two Sequences) </code> This method is case-sensitive for sequences of strings or characters.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be null.</param>
/// <param name="s2">The second sequence to compare. Cannot be null.</param>
/// <returns>A value between 0.0 and 1.0 representing the normalized distance between the two sequences: <list
/// type="bullet"> <item><description>Returns 0.0 if the sequences are identical.</description></item>
/// <item><description>Returns 1.0 if the sequences have no common elements.</description></item>
/// <item><description>Returns a value between 0.0 and 1.0 for partial similarity.</description></item> </list></returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is <see langword="null"/>.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
Expand Down
10 changes: 10 additions & 0 deletions src/F23.StringSimilarity/NGram.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,18 @@ public class NGram : INormalizedStringDistance
private const int DEFAULT_N = 2;
private readonly int n;

/// <summary>
/// Initializes a new instance of the <see cref="NGram"/> class with the default value for N.
/// </summary>
/// <remarks>This constructor sets the N-gram size to the default value defined by <see
/// cref="DEFAULT_N"/>. Use this constructor when you want to create an NGram instance with the default
/// configuration.</remarks>
public NGram() : this(DEFAULT_N) { }

/// <summary>
/// Initializes a new instance of the <see cref="NGram"/> class with the specified size.
/// </summary>
/// <param name="n">The size of the n-gram. Must be a positive integer.</param>
public NGram(int n)
{
this.n = n;
Expand Down
28 changes: 26 additions & 2 deletions src/F23.StringSimilarity/NormalizedLevenshtein.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@

namespace F23.StringSimilarity
{
/// <summary>
/// This distance is computed as levenshtein distance divided by the length of
/// the longest string. The resulting value is always in the interval [0.0 1.0]
/// but it is not a metric anymore! The similarity is computed as 1 - normalized
/// distance.
/// </summary>
public class NormalizedLevenshtein : INormalizedStringDistance, INormalizedStringSimilarity, INormalizedSpanDistance, INormalizedSpanSimilarity
{
private readonly Levenshtein l = new Levenshtein();
Expand All @@ -44,7 +46,19 @@ public class NormalizedLevenshtein : INormalizedStringDistance, INormalizedStrin
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());


/// <summary>
/// Calculates the normalized distance between two sequences of elements.
/// </summary>
/// <remarks>The distance is normalized by the length of the longer sequence. This ensures the
/// result is always in the range [0.0, 1.0], where 0.0 indicates identical sequences and 1.0 indicates
/// maximum dissimilarity.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare. Cannot be empty or null.</param>
/// <param name="s2">The second sequence to compare. Cannot be empty or null.</param>
/// <returns>A double value representing the normalized distance between the two sequences. Returns 0.0 if the sequences
/// are equal or both are empty.</returns>
/// <exception cref="ArgumentNullException">Thrown if <paramref name="s1"/> or <paramref name="s2"/> is null.</exception>
public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
{
Expand Down Expand Up @@ -82,7 +96,17 @@ public double Distance<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
public double Similarity(string s1, string s2)
=> 1.0 - Distance(s1, s2);


/// <summary>
/// Calculates the similarity between two sequences based on their distance.
/// </summary>
/// <remarks>The similarity is calculated as 1.0 minus the distance between the two
/// sequences.</remarks>
/// <typeparam name="T">The type of elements in the sequences. Must implement <see cref="IEquatable{T}"/>.</typeparam>
/// <param name="s1">The first sequence to compare.</param>
/// <param name="s2">The second sequence to compare.</param>
/// <returns>A value between 0.0 and 1.0 representing the similarity of the two sequences, where 1.0 indicates identical
/// sequences and 0.0 indicates completely dissimilar sequences.</returns>
public double Similarity<T>(ReadOnlySpan<T> s1, ReadOnlySpan<T> s2)
where T : IEquatable<T>
=> 1.0 - Distance(s1, s2);
Expand Down
Loading
Loading