diff --git a/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs b/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs
index 9394c65..49765ea 100644
--- a/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs
+++ b/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs
@@ -16,13 +16,6 @@ namespace SubtitleEdit.Plugins.RemoveUnicodeCharacters;
public partial class MainWindow : Window
{
- /// Built-in default replacements (used when the user has no persisted setting for a character).
- private static readonly Dictionary DefaultReplacements = new()
- {
- ['♪'] = "#",
- ['♫'] = "#",
- };
-
private readonly PluginRequest _request;
private readonly List _blocks;
private readonly Dictionary _persistedReplacements;
@@ -125,7 +118,7 @@ private string ResolveDefaultReplacement(char c)
{
return persisted;
}
- return DefaultReplacements.TryGetValue(c, out var builtIn) ? builtIn : string.Empty;
+ return UnicodeDefaults.Map.TryGetValue(c, out var builtIn) ? builtIn : string.Empty;
}
private void UpdateUiForRows()
diff --git a/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs b/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs
new file mode 100644
index 0000000..21c47a3
--- /dev/null
+++ b/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs
@@ -0,0 +1,69 @@
+using System.Collections.Generic;
+
+namespace SubtitleEdit.Plugins.RemoveUnicodeCharacters;
+
+///
+/// Built-in suggested replacements for non-ANSI characters that have a near-lossless
+/// or universally understood ASCII equivalent. Used as the seed value for the
+/// "Replace with" column when the user has no persisted setting for a character.
+/// Debatable cases (bullets, arrows, math, trademark, music accidentals) are
+/// deliberately omitted - the UI shows them blank so the user picks per row.
+///
+internal static class UnicodeDefaults
+{
+ public static readonly Dictionary Map = new()
+ {
+ // Smart quotes -> straight ASCII quotes
+ ['‘'] = "'", // ' left single quotation mark
+ ['’'] = "'", // ' right single quotation mark
+ ['‚'] = ",", // ‚ single low-9 quotation mark
+ ['‛'] = "'", // ‛ single high-reversed-9
+ ['“'] = "\"", // " left double quotation mark
+ ['”'] = "\"", // " right double quotation mark
+ ['„'] = "\"", // „ double low-9 quotation mark
+ ['‟'] = "\"", // ‟ double high-reversed-9
+
+ // Hyphens / dashes -> hyphen-minus
+ ['‐'] = "-", // ‐ hyphen
+ ['‑'] = "-", // ‑ non-breaking hyphen
+ ['‒'] = "-", // ‒ figure dash
+ ['–'] = "-", // – en dash
+ ['—'] = "-", // — em dash
+ ['―'] = "-", // ― horizontal bar
+ ['−'] = "-", // − minus sign
+
+ // Ellipses / dot leaders
+ ['․'] = ".", // ․ one-dot leader
+ ['‥'] = "..", // ‥ two-dot leader
+ ['…'] = "...", // … horizontal ellipsis
+
+ // Music notes (note heads, not accidentals)
+ ['♩'] = "#", // ♩ quarter note
+ ['♪'] = "#", // ♪ eighth note
+ ['♫'] = "#", // ♫ beamed eighth notes
+ ['♬'] = "#", // ♬ beamed sixteenth notes
+
+ // Wide / narrow space variants -> regular space
+ [' '] = " ", // en quad
+ [' '] = " ", // em quad
+ [' '] = " ", // en space
+ [' '] = " ", // em space
+ [' '] = " ", // three-per-em space
+ [' '] = " ", // four-per-em space
+ [' '] = " ", // six-per-em space
+ [' '] = " ", // figure space
+ [' '] = " ", // punctuation space
+ [' '] = " ", // thin space
+ [' '] = " ", // hair space
+ [' '] = " ", // narrow no-break space
+ [' '] = " ", // medium mathematical space
+ [' '] = " ", // ideographic space
+
+ // Zero-width and joiner controls -> remove
+ [''] = "", // zero-width space
+ [''] = "", // zero-width non-joiner
+ [''] = "", // zero-width joiner
+ [''] = "", // word joiner
+ [''] = "", // zero-width no-break space / BOM
+ };
+}