diff --git a/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs b/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs index 9394c65..49765ea 100644 --- a/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs +++ b/se5/RemoveUnicodeCharacters/MainWindow.axaml.cs @@ -16,13 +16,6 @@ namespace SubtitleEdit.Plugins.RemoveUnicodeCharacters; public partial class MainWindow : Window { - /// Built-in default replacements (used when the user has no persisted setting for a character). - private static readonly Dictionary DefaultReplacements = new() - { - ['♪'] = "#", - ['♫'] = "#", - }; - private readonly PluginRequest _request; private readonly List _blocks; private readonly Dictionary _persistedReplacements; @@ -125,7 +118,7 @@ private string ResolveDefaultReplacement(char c) { return persisted; } - return DefaultReplacements.TryGetValue(c, out var builtIn) ? builtIn : string.Empty; + return UnicodeDefaults.Map.TryGetValue(c, out var builtIn) ? builtIn : string.Empty; } private void UpdateUiForRows() diff --git a/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs b/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs new file mode 100644 index 0000000..21c47a3 --- /dev/null +++ b/se5/RemoveUnicodeCharacters/UnicodeDefaults.cs @@ -0,0 +1,69 @@ +using System.Collections.Generic; + +namespace SubtitleEdit.Plugins.RemoveUnicodeCharacters; + +/// +/// Built-in suggested replacements for non-ANSI characters that have a near-lossless +/// or universally understood ASCII equivalent. Used as the seed value for the +/// "Replace with" column when the user has no persisted setting for a character. +/// Debatable cases (bullets, arrows, math, trademark, music accidentals) are +/// deliberately omitted - the UI shows them blank so the user picks per row. +/// +internal static class UnicodeDefaults +{ + public static readonly Dictionary Map = new() + { + // Smart quotes -> straight ASCII quotes + ['‘'] = "'", // ' left single quotation mark + ['’'] = "'", // ' right single quotation mark + ['‚'] = ",", // ‚ single low-9 quotation mark + ['‛'] = "'", // ‛ single high-reversed-9 + ['“'] = "\"", // " left double quotation mark + ['”'] = "\"", // " right double quotation mark + ['„'] = "\"", // „ double low-9 quotation mark + ['‟'] = "\"", // ‟ double high-reversed-9 + + // Hyphens / dashes -> hyphen-minus + ['‐'] = "-", // ‐ hyphen + ['‑'] = "-", // ‑ non-breaking hyphen + ['‒'] = "-", // ‒ figure dash + ['–'] = "-", // – en dash + ['—'] = "-", // — em dash + ['―'] = "-", // ― horizontal bar + ['−'] = "-", // − minus sign + + // Ellipses / dot leaders + ['․'] = ".", // ․ one-dot leader + ['‥'] = "..", // ‥ two-dot leader + ['…'] = "...", // … horizontal ellipsis + + // Music notes (note heads, not accidentals) + ['♩'] = "#", // ♩ quarter note + ['♪'] = "#", // ♪ eighth note + ['♫'] = "#", // ♫ beamed eighth notes + ['♬'] = "#", // ♬ beamed sixteenth notes + + // Wide / narrow space variants -> regular space + [' '] = " ", // en quad + [' '] = " ", // em quad + [' '] = " ", // en space + [' '] = " ", // em space + [' '] = " ", // three-per-em space + [' '] = " ", // four-per-em space + [' '] = " ", // six-per-em space + [' '] = " ", // figure space + [' '] = " ", // punctuation space + [' '] = " ", // thin space + [' '] = " ", // hair space + [' '] = " ", // narrow no-break space + [' '] = " ", // medium mathematical space + [' '] = " ", // ideographic space + + // Zero-width and joiner controls -> remove + ['​'] = "", // zero-width space + ['‌'] = "", // zero-width non-joiner + ['‍'] = "", // zero-width joiner + ['⁠'] = "", // word joiner + [''] = "", // zero-width no-break space / BOM + }; +}