diff --git a/src/F23.StringSimilarity/Damerau.cs b/src/F23.StringSimilarity/Damerau.cs
index dd6138e..1a349f2 100644
--- a/src/F23.StringSimilarity/Damerau.cs
+++ b/src/F23.StringSimilarity/Damerau.cs
@@ -56,6 +56,22 @@ public class Damerau : IMetricStringDistance, IMetricSpanDistance
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
+ ///
+ /// Calculates the Damerau-Levenshtein distance between two sequences.
+ ///
+ /// The Damerau-Levenshtein distance is a metric for measuring the edit distance between
+ /// two sequences, allowing for the following operations: - Insertion of a
+ /// single element.
- Deletion of a single element.
+ /// - Substitution of one element for another.
+ /// - Transposition of two adjacent elements.
This method is
+ /// case-sensitive for sequences of strings or characters.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare. Cannot be .
+ /// The second sequence to compare. Cannot be .
+ /// The Damerau-Levenshtein distance between the two sequences, which represents the minimum number of operations
+ /// (insertions, deletions, substitutions, or transpositions) required to transform one sequence into the other.
+ /// Returns 0 if the sequences are equal.
+ /// Thrown if or is .
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
diff --git a/src/F23.StringSimilarity/ICharacterSubstitution.cs b/src/F23.StringSimilarity/ICharacterSubstitution.cs
index 6217b7a..bb64189 100644
--- a/src/F23.StringSimilarity/ICharacterSubstitution.cs
+++ b/src/F23.StringSimilarity/ICharacterSubstitution.cs
@@ -24,12 +24,14 @@
namespace F23.StringSimilarity
{
+ ///
/// Used to indicate the cost of character substitution.
///
/// Cost should always be in [0.0 .. 1.0]
/// For example, in an OCR application, cost('o', 'a') could be 0.4
/// In a checkspelling application, cost('u', 'i') could be 0.4 because these are
/// next to each other on the keyboard...
+ ///
public interface ICharacterSubstitution
{
///
diff --git a/src/F23.StringSimilarity/Interfaces/INormalizedSpanDistance.cs b/src/F23.StringSimilarity/Interfaces/INormalizedSpanDistance.cs
index c23f17b..c586dd0 100644
--- a/src/F23.StringSimilarity/Interfaces/INormalizedSpanDistance.cs
+++ b/src/F23.StringSimilarity/Interfaces/INormalizedSpanDistance.cs
@@ -1,5 +1,8 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// An interface for normalized distance measures that operate on spans.
+ ///
public interface INormalizedSpanDistance : ISpanDistance
{
}
diff --git a/src/F23.StringSimilarity/Interfaces/INormalizedSpanSimilarity.cs b/src/F23.StringSimilarity/Interfaces/INormalizedSpanSimilarity.cs
index c5a46ed..24217b2 100644
--- a/src/F23.StringSimilarity/Interfaces/INormalizedSpanSimilarity.cs
+++ b/src/F23.StringSimilarity/Interfaces/INormalizedSpanSimilarity.cs
@@ -1,5 +1,10 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Defines a contract for calculating the similarity between spans of text, normalized to a range of 0 to 1.
+ ///
+ /// This interface extends by ensuring that similarity scores are
+ /// normalized. A score of 0 indicates no similarity, while a score of 1 indicates identical spans.
public interface INormalizedSpanSimilarity : ISpanSimilarity
{
}
diff --git a/src/F23.StringSimilarity/Interfaces/INormalizedStringDistance.cs b/src/F23.StringSimilarity/Interfaces/INormalizedStringDistance.cs
index 3d7c2ec..25d3705 100644
--- a/src/F23.StringSimilarity/Interfaces/INormalizedStringDistance.cs
+++ b/src/F23.StringSimilarity/Interfaces/INormalizedStringDistance.cs
@@ -24,6 +24,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Interface for normalized string distance algorithms.
+ ///
public interface INormalizedStringDistance : IStringDistance
{
}
diff --git a/src/F23.StringSimilarity/Interfaces/INormalizedStringSimilarity.cs b/src/F23.StringSimilarity/Interfaces/INormalizedStringSimilarity.cs
index 0ea1273..47ecebb 100644
--- a/src/F23.StringSimilarity/Interfaces/INormalizedStringSimilarity.cs
+++ b/src/F23.StringSimilarity/Interfaces/INormalizedStringSimilarity.cs
@@ -24,6 +24,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Interface for normalized string similarity algorithms.
+ ///
public interface INormalizedStringSimilarity : IStringSimilarity
{
}
diff --git a/src/F23.StringSimilarity/Interfaces/ISpanDistance.cs b/src/F23.StringSimilarity/Interfaces/ISpanDistance.cs
index a832ce2..b915c57 100644
--- a/src/F23.StringSimilarity/Interfaces/ISpanDistance.cs
+++ b/src/F23.StringSimilarity/Interfaces/ISpanDistance.cs
@@ -2,6 +2,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// An interface for distance measures that operate on spans.
+ ///
public interface ISpanDistance
{
///
diff --git a/src/F23.StringSimilarity/Interfaces/ISpanSimilarity.cs b/src/F23.StringSimilarity/Interfaces/ISpanSimilarity.cs
index b5ab92a..6bba222 100644
--- a/src/F23.StringSimilarity/Interfaces/ISpanSimilarity.cs
+++ b/src/F23.StringSimilarity/Interfaces/ISpanSimilarity.cs
@@ -2,6 +2,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Interface for span similarity algorithms.
+ ///
public interface ISpanSimilarity
{
///
diff --git a/src/F23.StringSimilarity/Interfaces/IStringDistance.cs b/src/F23.StringSimilarity/Interfaces/IStringDistance.cs
index f268eac..b0f2a07 100644
--- a/src/F23.StringSimilarity/Interfaces/IStringDistance.cs
+++ b/src/F23.StringSimilarity/Interfaces/IStringDistance.cs
@@ -24,6 +24,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Interface for string distance algorithms.
+ ///
public interface IStringDistance
{
///
diff --git a/src/F23.StringSimilarity/Interfaces/IStringSimilarity.cs b/src/F23.StringSimilarity/Interfaces/IStringSimilarity.cs
index 7a9df6a..fa21625 100644
--- a/src/F23.StringSimilarity/Interfaces/IStringSimilarity.cs
+++ b/src/F23.StringSimilarity/Interfaces/IStringSimilarity.cs
@@ -24,6 +24,9 @@
namespace F23.StringSimilarity.Interfaces
{
+ ///
+ /// Interface for string similarity algorithms.
+ ///
public interface IStringSimilarity
{
///
diff --git a/src/F23.StringSimilarity/JaroWinkler.cs b/src/F23.StringSimilarity/JaroWinkler.cs
index 88ad93a..835c0fe 100644
--- a/src/F23.StringSimilarity/JaroWinkler.cs
+++ b/src/F23.StringSimilarity/JaroWinkler.cs
@@ -31,6 +31,7 @@
namespace F23.StringSimilarity
{
+ ///
/// The Jaro–Winkler distance metric is designed and best suited for short
/// strings such as person names, and to detect typos; it is (roughly) a
/// variation of Damerau-Levenshtein, where the substitution of 2 close
@@ -39,6 +40,7 @@ namespace F23.StringSimilarity
/// Jaro-Winkler was developed in the area of record linkage (duplicate
/// detection) (Winkler, 1990). It returns a value in the interval [0.0, 1.0].
/// The distance is computed as 1 - Jaro-Winkler similarity.
+ ///
public class JaroWinkler : INormalizedStringSimilarity, INormalizedStringDistance, INormalizedSpanSimilarity, INormalizedSpanDistance
{
private const double DEFAULT_THRESHOLD = 0.7;
@@ -49,7 +51,7 @@ public class JaroWinkler : INormalizedStringSimilarity, INormalizedStringDistanc
/// The current value of the threshold used for adding the Winkler bonus. The default value is 0.7.
///
private double Threshold { get; }
-
+
///
/// Creates a new instance with default threshold (0.7)
///
@@ -57,7 +59,7 @@ public JaroWinkler()
{
Threshold = DEFAULT_THRESHOLD;
}
-
+
///
/// Creates a new instance with given threshold to determine when Winkler bonus should
/// be used. Set threshold to a negative value to get the Jaro distance.
@@ -77,13 +79,25 @@ public JaroWinkler(double threshold)
/// If s1 or s2 is null.
public double Similarity(string s1, string s2)
=> Similarity(s1.AsSpan(), s2.AsSpan());
-
+
+ ///
+ /// Calculates the similarity between two sequences using the Jaro-Winkler distance metric.
+ ///
+ /// The similarity is calculated using the Jaro-Winkler distance, which is a measure of
+ /// similarity between two sequences. The result is adjusted based on common prefixes to give higher scores to
+ /// sequences that share a common prefix.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare. Cannot be null.
+ /// The second sequence to compare. Cannot be null.
+ /// A value between 0 and 1 representing the similarity between the two sequences, where 1 indicates identical
+ /// sequences and 0 indicates no similarity.
+ /// Thrown if or is null.
public double Similarity(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
if (s1 == null)
{
- throw new ArgumentNullException(nameof(s1));
+ throw new ArgumentNullException(nameof(s1));
}
if (s2 == null)
@@ -122,7 +136,17 @@ public double Similarity(ReadOnlySpan s1, ReadOnlySpan s2)
/// If s1 or s2 is null.
public double Distance(string s1, string s2)
=> 1.0 - Similarity(s1, s2);
-
+
+ ///
+ /// Calculates the distance between two sequences based on their similarity.
+ ///
+ /// The distance is calculated as the complement of the similarity between the two
+ /// sequences.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare.
+ /// The second sequence to compare.
+ /// A double value representing the distance between the two sequences. The value ranges from 0.0 to 1.0, where
+ /// 0.0 indicates identical sequences and 1.0 indicates completely dissimilar sequences.
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
=> 1.0 - Similarity(s1, s2);
diff --git a/src/F23.StringSimilarity/Levenshtein.cs b/src/F23.StringSimilarity/Levenshtein.cs
index 0ccf97d..74e801b 100644
--- a/src/F23.StringSimilarity/Levenshtein.cs
+++ b/src/F23.StringSimilarity/Levenshtein.cs
@@ -29,9 +29,11 @@
namespace F23.StringSimilarity
{
+ ///
/// The Levenshtein distance between two words is the Minimum number of
/// single-character edits (insertions, deletions or substitutions) required to
/// change one string into the other.
+ ///
public class Levenshtein : IMetricStringDistance, IMetricSpanDistance
{
///
@@ -73,11 +75,36 @@ public class Levenshtein : IMetricStringDistance, IMetricSpanDistance
/// If s1 or s2 is null.
public double Distance(string s1, string s2, int limit)
=> Distance(s1.AsSpan(), s2.AsSpan(), limit);
-
+
+ ///
+ /// Calculates the distance between two sequences of elements.
+ ///
+ /// This method uses a default maximum threshold for the distance calculation. For custom
+ /// thresholds, use an overload that accepts a threshold parameter.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare.
+ /// The second sequence to compare.
+ /// A representing the distance between the two sequences. The specific meaning of the
+ /// distance depends on the implementation of the comparison logic.
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
=> Distance(s1, s2, int.MaxValue);
-
+
+ ///
+ /// Calculates the edit distance (Levenshtein distance) between two sequences, with an optional upper limit.
+ ///
+ /// The edit distance is a measure of the minimum number of single-element edits
+ /// (insertions, deletions, or substitutions) required to transform one sequence into the other. This method is
+ /// optimized to stop processing early if the distance exceeds the specified .
+ /// The type of elements in the sequences. The type must implement .
+ /// The first sequence to compare. Cannot be null.
+ /// The second sequence to compare. Cannot be null.
+ /// The maximum distance to calculate. If the edit distance exceeds this value, the method returns .
+ /// The edit distance between and . If the sequences are identical,
+ /// the result is 0. If the distance exceeds , the method returns .
+ /// Thrown if or is null.
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2, int limit)
where T : IEquatable
{
diff --git a/src/F23.StringSimilarity/LongestCommonSubsequence.cs b/src/F23.StringSimilarity/LongestCommonSubsequence.cs
index ce5d4d0..df0cfa3 100644
--- a/src/F23.StringSimilarity/LongestCommonSubsequence.cs
+++ b/src/F23.StringSimilarity/LongestCommonSubsequence.cs
@@ -28,6 +28,7 @@
namespace F23.StringSimilarity
{
+ ///
/// The longest common subsequence (LCS) problem consists in finding the longest
/// subsequence common to two (or more) sequences. It differs from problems of
/// finding common substrings: unlike substrings, subsequences are not required
@@ -44,6 +45,7 @@ namespace F23.StringSimilarity
///
/// ! This class currently implements the dynamic programming approach, which has
/// a space requirement O(m * n)!
+ ///
public class LongestCommonSubsequence : IStringDistance, ISpanDistance
{
///
@@ -59,7 +61,18 @@ public class LongestCommonSubsequence : IStringDistance, ISpanDistance
/// If s1 or s2 is null.
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
-
+
+ ///
+ /// Calculates the distance between two sequences based on their similarity.
+ ///
+ /// The distance is calculated as the sum of the lengths of the two sequences minus twice
+ /// the length of their longest common subsequence.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare. Cannot be empty or null.
+ /// The second sequence to compare. Cannot be empty or null.
+ /// A non-negative representing the distance between the two sequences. Returns 0 if the
+ /// sequences are identical.
+ /// Thrown if or is .
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
@@ -91,7 +104,7 @@ public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
/// If s1 or s2 is null.
public int Length(string s1, string s2)
=> Length(s1.AsSpan(), s2.AsSpan());
-
+
internal static int Length(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
diff --git a/src/F23.StringSimilarity/MetricLCS.cs b/src/F23.StringSimilarity/MetricLCS.cs
index 1d33ae6..a3d2a6d 100644
--- a/src/F23.StringSimilarity/MetricLCS.cs
+++ b/src/F23.StringSimilarity/MetricLCS.cs
@@ -43,7 +43,20 @@ public class MetricLCS : IMetricStringDistance, INormalizedStringDistance, IMetr
/// If s1 or s2 is null.
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
-
+
+ ///
+ /// Calculates the normalized distance between two sequences based on their longest common subsequence.
+ ///
+ /// The distance is calculated as: 1.0 - (Length of Longest Common Subsequence /
+ /// Maximum Length of the Two Sequences) This method is case-sensitive for sequences of strings or characters.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare. Cannot be null.
+ /// The second sequence to compare. Cannot be null.
+ /// A value between 0.0 and 1.0 representing the normalized distance between the two sequences: - Returns 0.0 if the sequences are identical.
+ /// - Returns 1.0 if the sequences have no common elements.
+ /// - Returns a value between 0.0 and 1.0 for partial similarity.
+ /// Thrown if or is .
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
diff --git a/src/F23.StringSimilarity/NGram.cs b/src/F23.StringSimilarity/NGram.cs
index 34739e2..dfc4e8f 100644
--- a/src/F23.StringSimilarity/NGram.cs
+++ b/src/F23.StringSimilarity/NGram.cs
@@ -47,8 +47,18 @@ public class NGram : INormalizedStringDistance
private const int DEFAULT_N = 2;
private readonly int n;
+ ///
+ /// Initializes a new instance of the class with the default value for N.
+ ///
+ /// This constructor sets the N-gram size to the default value defined by . Use this constructor when you want to create an NGram instance with the default
+ /// configuration.
public NGram() : this(DEFAULT_N) { }
+ ///
+ /// Initializes a new instance of the class with the specified size.
+ ///
+ /// The size of the n-gram. Must be a positive integer.
public NGram(int n)
{
this.n = n;
diff --git a/src/F23.StringSimilarity/NormalizedLevenshtein.cs b/src/F23.StringSimilarity/NormalizedLevenshtein.cs
index 208e451..dd223be 100644
--- a/src/F23.StringSimilarity/NormalizedLevenshtein.cs
+++ b/src/F23.StringSimilarity/NormalizedLevenshtein.cs
@@ -27,10 +27,12 @@
namespace F23.StringSimilarity
{
+ ///
/// This distance is computed as levenshtein distance divided by the length of
/// the longest string. The resulting value is always in the interval [0.0 1.0]
/// but it is not a metric anymore! The similarity is computed as 1 - normalized
/// distance.
+ ///
public class NormalizedLevenshtein : INormalizedStringDistance, INormalizedStringSimilarity, INormalizedSpanDistance, INormalizedSpanSimilarity
{
private readonly Levenshtein l = new Levenshtein();
@@ -44,7 +46,19 @@ public class NormalizedLevenshtein : INormalizedStringDistance, INormalizedStrin
/// If s1 or s2 is null.
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
-
+
+ ///
+ /// Calculates the normalized distance between two sequences of elements.
+ ///
+ /// The distance is normalized by the length of the longer sequence. This ensures the
+ /// result is always in the range [0.0, 1.0], where 0.0 indicates identical sequences and 1.0 indicates
+ /// maximum dissimilarity.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare. Cannot be empty or null.
+ /// The second sequence to compare. Cannot be empty or null.
+ /// A double value representing the normalized distance between the two sequences. Returns 0.0 if the sequences
+ /// are equal or both are empty.
+ /// Thrown if or is null.
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
@@ -82,7 +96,17 @@ public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
/// If s1 or s2 is null.
public double Similarity(string s1, string s2)
=> 1.0 - Distance(s1, s2);
-
+
+ ///
+ /// Calculates the similarity between two sequences based on their distance.
+ ///
+ /// The similarity is calculated as 1.0 minus the distance between the two
+ /// sequences.
+ /// The type of elements in the sequences. Must implement .
+ /// The first sequence to compare.
+ /// The second sequence to compare.
+ /// A value between 0.0 and 1.0 representing the similarity of the two sequences, where 1.0 indicates identical
+ /// sequences and 0.0 indicates completely dissimilar sequences.
public double Similarity(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
=> 1.0 - Distance(s1, s2);
diff --git a/src/F23.StringSimilarity/OptimalStringAlignment.cs b/src/F23.StringSimilarity/OptimalStringAlignment.cs
index 13af7a1..f404fc6 100644
--- a/src/F23.StringSimilarity/OptimalStringAlignment.cs
+++ b/src/F23.StringSimilarity/OptimalStringAlignment.cs
@@ -29,6 +29,17 @@
namespace F23.StringSimilarity
{
+ ///
+ /// Provides an implementation of the Optimal String Alignment (OSA) distance algorithm, which calculates the
+ /// minimum number of operations required to transform one string into another. Supported operations include
+ /// insertion, deletion, substitution of a single character, and transposition of two adjacent characters, with the
+ /// constraint that no substring is edited more than once.
+ ///
+ /// This class is designed for use in scenarios where a measure of similarity or difference
+ /// between two strings or spans is required. It supports both string and span-based inputs, making it suitable for
+ /// high-performance applications where memory efficiency is important. The OSA distance is particularly useful in
+ /// applications such as spell checking, approximate string matching, and natural language processing, where
+ /// transpositions (e.g., swapping two adjacent characters) are common errors.
public sealed class OptimalStringAlignment : IStringDistance, ISpanDistance
{
///
@@ -44,6 +55,19 @@ public sealed class OptimalStringAlignment : IStringDistance, ISpanDistance
public double Distance(string s1, string s2)
=> Distance(s1.AsSpan(), s2.AsSpan());
+ ///
+ /// Calculates the Damerau-Levenshtein distance between two sequences.
+ ///
+ /// The Damerau-Levenshtein distance is a metric for measuring the difference between two
+ /// sequences. It extends the Levenshtein distance by allowing transpositions of adjacent characters as a
+ /// single edit operation. This method is case-sensitive for sequences of characters.
+ /// The type of elements in the sequences. The type must implement .
+ /// The first sequence to compare. Cannot be null.
+ /// The second sequence to compare. Cannot be null.
+ /// The Damerau-Levenshtein distance between the two sequences, which represents the minimum number of
+ /// single-character edits (insertions, deletions, substitutions, or transpositions) required to transform one
+ /// sequence into the other.
+ /// Thrown if or is null.
public double Distance(ReadOnlySpan s1, ReadOnlySpan s2)
where T : IEquatable
{
diff --git a/src/F23.StringSimilarity/QGram.cs b/src/F23.StringSimilarity/QGram.cs
index 14a890b..42e1231 100644
--- a/src/F23.StringSimilarity/QGram.cs
+++ b/src/F23.StringSimilarity/QGram.cs
@@ -28,12 +28,14 @@
namespace F23.StringSimilarity
{
+ ///
/// Q-gram distance, as defined by Ukkonen in "Approximate string-matching with
/// q-grams and maximal matches". The distance between two strings is defined as
- /// the L1 norm of the difference of their profiles (the number of occurences of
+ /// the L1 norm of the difference of their profiles (the number of occurrences of
/// each n-gram): SUM( |V1_i - V2_i| ). Q-gram distance is a lower bound on
/// Levenshtein distance, but can be computed in O(m + n), where Levenshtein
/// requires O(m.n).
+ ///
public class QGram : ShingleBased, IStringDistance
{
///
@@ -41,7 +43,7 @@ public class QGram : ShingleBased, IStringDistance
/// string-matching with q-grams and maximal matches",
/// http://www.sciencedirect.com/science/article/pii/0304397592901434 The
/// distance between two strings is defined as the L1 norm of the difference
- /// of their profiles (the number of occurences of each k-shingle). Q-gram
+ /// of their profiles (the number of occurrences of each k-shingle). Q-gram
/// distance is a lower bound on Levenshtein distance, but can be computed in
/// O(|A| + |B|), where Levenshtein requires O(|A|.|B|)
///
diff --git a/src/F23.StringSimilarity/ShingleBased.cs b/src/F23.StringSimilarity/ShingleBased.cs
index bc354d4..f389bd6 100644
--- a/src/F23.StringSimilarity/ShingleBased.cs
+++ b/src/F23.StringSimilarity/ShingleBased.cs
@@ -28,6 +28,9 @@
namespace F23.StringSimilarity
{
+ ///
+ /// Base class for shingle based algorithms.
+ ///
public abstract class ShingleBased
{
private const int DEFAULT_K = 3;
@@ -56,8 +59,21 @@ protected ShingleBased(int k)
this.k = k;
}
+ ///
+ /// Initializes a new instance of the class with the default shingle size.
+ ///
protected ShingleBased() : this(DEFAULT_K) { }
+ ///
+ /// Generates a profile of k-length substrings (shingles) from the specified string, along with their frequency
+ /// of occurrence.
+ ///
+ /// This method processes the input string by normalizing spaces and then extracting
+ /// overlapping substrings of length k. The resulting dictionary provides a frequency count for each unique
+ /// shingle.
+ /// The input string from which to generate the shingle profile. Cannot be null.
+ /// A dictionary where the keys are k-length substrings (shingles) extracted from the input string, and the
+ /// values are the number of times each shingle appears.
public Dictionary GetProfile(string s)
{
var shingles = new Dictionary();
diff --git a/src/F23.StringSimilarity/SorensenDice.cs b/src/F23.StringSimilarity/SorensenDice.cs
index 70683f0..ebb9f08 100644
--- a/src/F23.StringSimilarity/SorensenDice.cs
+++ b/src/F23.StringSimilarity/SorensenDice.cs
@@ -30,8 +30,10 @@
namespace F23.StringSimilarity
{
+ ///
/// Similar to Jaccard index, but this time the similarity is computed as 2 * |V1
/// inter V2| / (|V1| + |V2|). Distance is computed as 1 - cosine similarity.
+ ///
public class SorensenDice : ShingleBased, INormalizedStringDistance, INormalizedStringSimilarity
{
///
@@ -88,7 +90,7 @@ public double Similarity(string s1, string s2)
var union = new HashSet();
union.UnionWith(profile1.Keys);
union.UnionWith(profile2.Keys);
-
+
int inter = 0;
foreach (var key in union)
diff --git a/src/F23.StringSimilarity/Support/ArrayExtensions.cs b/src/F23.StringSimilarity/Support/ArrayExtensions.cs
index cc67614..b2814ea 100644
--- a/src/F23.StringSimilarity/Support/ArrayExtensions.cs
+++ b/src/F23.StringSimilarity/Support/ArrayExtensions.cs
@@ -27,8 +27,25 @@
namespace F23.StringSimilarity.Support
{
+ ///
+ /// Provides extension methods for working with arrays.
+ ///
+ /// This class contains utility methods that extend the functionality of arrays, enabling
+ /// additional operations such as creating a padded version of an array. These methods are designed to simplify
+ /// common array manipulation tasks.
internal static class ArrayExtensions
{
+ ///
+ /// Creates a new array by padding the source array to the specified final length with the given padding value.
+ ///
+ /// The source array to be padded.
+ /// The desired final length of the array after padding. Must be greater than or equal to the length of .
+ /// The value used to pad the array if it is shorter than .
+ /// A new array of length containing the elements of , followed by padding values if necessary.
+ /// The source array to be padded.
+ /// The desired final length of the array after padding. Must be greater than or equal to the length of .
+ /// The value used to pad the array if it is shorter than .
+ /// A new array of length containing the elements of , followed by padding values if necessary.
internal static T[] WithPadding(this T[] source, int finalLength, T paddingValue = default(T))
{
if (finalLength < source.Length)
diff --git a/src/F23.StringSimilarity/WeightedLevenshtein.cs b/src/F23.StringSimilarity/WeightedLevenshtein.cs
index 0ac576d..4db07f1 100644
--- a/src/F23.StringSimilarity/WeightedLevenshtein.cs
+++ b/src/F23.StringSimilarity/WeightedLevenshtein.cs
@@ -31,8 +31,10 @@
namespace F23.StringSimilarity
{
+ ///
/// Implementation of Levenshtein that allows to define different weights for
/// different character substitutions.
+ ///
public class WeightedLevenshtein : IStringDistance
{
private readonly ICharacterSubstitution _characterSubstitution;