|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using System.Text; |
| 5 | +using System.Text.RegularExpressions; |
| 6 | +using System.Threading.Tasks; |
| 7 | + |
| 8 | +namespace ZXBasicStudio.Common.Txt2Bas |
| 9 | +{ |
| 10 | + /// <summary> |
| 11 | + /// Converts plain text into tokenized Spectrum BASIC binary format. |
| 12 | + /// Based on Yoruguaman work: https://github.com/Ultrahead/SpeccyNextTools |
| 13 | + /// </summary> |
| 14 | + internal class BasConverter |
| 15 | + { |
| 16 | + private readonly TokenMap _tokenMap; |
| 17 | + private readonly List<string> _sortedKeys; |
| 18 | + |
| 19 | + /// <summary> |
| 20 | + /// The Auto-start line number. |
| 21 | + /// Defaults to 32768 (No Auto-start). |
| 22 | + /// Set only via the #autostart directive in the source file. |
| 23 | + /// </summary> |
| 24 | + public int AutoStartLine { get; private set; } = 32768; |
| 25 | + |
| 26 | + /// <summary> |
| 27 | + /// Initializes member variable fields of the <see cref="BasConverter"/> class. |
| 28 | + /// </summary> |
| 29 | + public BasConverter() |
| 30 | + { |
| 31 | + // 1. Initialize the TokenMap dictionary. |
| 32 | + // 2. Create a list of keys sorted by Length Descending. |
| 33 | + // (This ensures greedy matching: e.g., "DEFPROC" is matched before "DEF"). |
| 34 | + |
| 35 | + _tokenMap = new TokenMap(); |
| 36 | + _sortedKeys = _tokenMap.Map.Keys |
| 37 | + .OrderByDescending(k => k.Length) |
| 38 | + .ToList(); |
| 39 | + } |
| 40 | + |
| 41 | + /// <summary> |
| 42 | + /// Reads a text file and converts it to a byte array of tokenized BASIC. |
| 43 | + /// </summary> |
| 44 | + /// <param name="textData">Text to convert</param> |
| 45 | + /// <returns>Byte array representing the BASIC program.</returns> |
| 46 | + public byte[] ConvertFile(string textData) |
| 47 | + { |
| 48 | + // 1. Read all lines from the source text file. |
| 49 | + // 2. Initialize state variables (auto-line counter, output buffer). |
| 50 | + // 3. Iterate through each line: |
| 51 | + // a. Skip whitespace-only lines (source code formatting). |
| 52 | + // b. Process directive (#autostart) then skip the line. |
| 53 | + // c. Skip all other lines starting with # (source code comments). |
| 54 | + // d. Parse explicit or implicit line numbers and tokenize content. |
| 55 | + // 4. Return the aggregated binary data. |
| 56 | + |
| 57 | + string[] lines = textData.Split(new[] { "\r\n", "\n" }, StringSplitOptions.None); |
| 58 | + var output = new List<byte>(); |
| 59 | + |
| 60 | + int currentLineNum = 10; |
| 61 | + |
| 62 | + foreach (string line in lines) |
| 63 | + { |
| 64 | + string text = line.Trim(); |
| 65 | + |
| 66 | + // 1. Skip Empty Lines completely (do not generate a BASIC line) |
| 67 | + if (string.IsNullOrWhiteSpace(text)) continue; |
| 68 | + |
| 69 | + // 2. Handle Lines starting with # |
| 70 | + if (text.StartsWith("#")) |
| 71 | + { |
| 72 | + // Check for directives |
| 73 | + if (text.StartsWith("#autostart", StringComparison.OrdinalIgnoreCase)) |
| 74 | + { |
| 75 | + var parts = text.Split([' '], StringSplitOptions.RemoveEmptyEntries); |
| 76 | + if (parts.Length > 1 && int.TryParse(parts[1], out int autoStartVal)) |
| 77 | + { |
| 78 | + AutoStartLine = autoStartVal; |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + // Whether it was a directive or a comment, skip it in the output. |
| 83 | + continue; |
| 84 | + } |
| 85 | + |
| 86 | + // 3. Handle Standard Lines |
| 87 | + int lineNum = currentLineNum; |
| 88 | + string restOfLine = text; |
| 89 | + |
| 90 | + Match match = Regex.Match(text, @"^(\d+)\s+(.*)"); |
| 91 | + |
| 92 | + if (match.Success) |
| 93 | + { |
| 94 | + lineNum = int.Parse(match.Groups[1].Value); |
| 95 | + restOfLine = match.Groups[2].Value; |
| 96 | + currentLineNum = lineNum + 10; |
| 97 | + } |
| 98 | + else |
| 99 | + { |
| 100 | + currentLineNum += 10; |
| 101 | + } |
| 102 | + |
| 103 | + byte[] lineBytes = ParseLine(lineNum, restOfLine); |
| 104 | + output.AddRange(lineBytes); |
| 105 | + } |
| 106 | + |
| 107 | + return output.ToArray(); |
| 108 | + } |
| 109 | + |
| 110 | + /// <summary> |
| 111 | + /// Parses a single line of text into binary line format. |
| 112 | + /// Structure: [LineNum(BE)] [Length(LE)] [Data...] [0x0D] |
| 113 | + /// </summary> |
| 114 | + /// <param name="lineNum">The line number.</param> |
| 115 | + /// <param name="text">The text content of the line.</param> |
| 116 | + /// <returns>A byte array representing the binary line.</returns> |
| 117 | + private byte[] ParseLine(int lineNum, string text) |
| 118 | + { |
| 119 | + // 1. Iterate through the text character by character. |
| 120 | + // 2. Detect and process String Literals (preserve exactly). |
| 121 | + // 3. Detect and process Numbers (convert to ASCII + 5-byte hidden Sinclair format). |
| 122 | + // 4. Detect and process SPECIAL COMMENT (';' after colon or at start). |
| 123 | + // 5. Detect and process Keywords (Greedy Match against TokenMap): |
| 124 | + // a. If REM found, consume the rest of the line as a comment. |
| 125 | + // b. If other keyword found, strip immediately following whitespace. |
| 126 | + // 6. Fallback: Add character as literal ASCII. |
| 127 | + // 7. Append End-of-Line marker (0x0D). |
| 128 | + // 8. Prepend the Line Header (Line Number + Length) and return the byte array. |
| 129 | + |
| 130 | + List<byte> lineData = new List<byte>(); |
| 131 | + |
| 132 | + for (int i = 0; i < text.Length; i++) |
| 133 | + { |
| 134 | + // String Literals |
| 135 | + if (text[i] == '"') |
| 136 | + { |
| 137 | + int endQuote = text.IndexOf('"', i + 1); |
| 138 | + if (endQuote == -1) endQuote = text.Length; |
| 139 | + |
| 140 | + string literal = text.Substring(i, endQuote - i + 1); |
| 141 | + lineData.AddRange(Encoding.ASCII.GetBytes(literal)); |
| 142 | + i = endQuote; |
| 143 | + continue; |
| 144 | + } |
| 145 | + |
| 146 | + // Numbers |
| 147 | + if (char.IsDigit(text[i]) || (text[i] == '.' && i + 1 < text.Length && char.IsDigit(text[i + 1]))) |
| 148 | + { |
| 149 | + string numStr = ""; |
| 150 | + int j = i; |
| 151 | + while (j < text.Length && (char.IsDigit(text[j]) || text[j] == '.')) |
| 152 | + { |
| 153 | + numStr += text[j]; |
| 154 | + j++; |
| 155 | + } |
| 156 | + |
| 157 | + if (double.TryParse(numStr, out double val)) |
| 158 | + { |
| 159 | + lineData.AddRange(Encoding.ASCII.GetBytes(numStr)); |
| 160 | + lineData.Add(0x0E); // Hidden Marker |
| 161 | + lineData.AddRange(SinclairNumber.Pack(val)); |
| 162 | + i = j - 1; |
| 163 | + continue; |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + // COMMENT HANDLING: Strict check for ';comment' idiom |
| 168 | + // Trigger: Semicolon at start of line OR Semicolon immediately preceded by Colon |
| 169 | + if (text[i] == ';') |
| 170 | + { |
| 171 | + bool isComment = false; |
| 172 | + |
| 173 | + // Look backwards skipping whitespace to find the context |
| 174 | + int back = i - 1; |
| 175 | + while (back >= 0 && text[back] == ' ') back--; |
| 176 | + |
| 177 | + if (back < 0) isComment = true; // Start of line |
| 178 | + else if (text[back] == ':') isComment = true; // Preceded by colon |
| 179 | + |
| 180 | + if (isComment) |
| 181 | + { |
| 182 | + // Consume the rest of the line as literal text (do not tokenize) |
| 183 | + string comment = text.Substring(i); |
| 184 | + lineData.AddRange(Encoding.ASCII.GetBytes(comment)); |
| 185 | + i = text.Length; |
| 186 | + continue; |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + // Keywords |
| 191 | + bool matched = false; |
| 192 | + foreach (string k in _sortedKeys) |
| 193 | + { |
| 194 | + if (i + k.Length > text.Length) continue; |
| 195 | + |
| 196 | + if (string.Compare(text.Substring(i, k.Length), k, StringComparison.OrdinalIgnoreCase) != 0) |
| 197 | + continue; |
| 198 | + |
| 199 | + bool isAlphaToken = char.IsLetter(k[0]); |
| 200 | + bool prevCharValid = (i == 0) || !char.IsLetter(text[i - 1]); |
| 201 | + bool nextCharValid = (i + k.Length >= text.Length) || !char.IsLetterOrDigit(text[i + k.Length]); |
| 202 | + |
| 203 | + if (isAlphaToken && (!prevCharValid || !nextCharValid)) continue; |
| 204 | + |
| 205 | + byte token = _tokenMap.Map[k]; |
| 206 | + lineData.Add(token); |
| 207 | + i += k.Length; |
| 208 | + matched = true; |
| 209 | + |
| 210 | + // REM handling |
| 211 | + if (token == 0xEA) |
| 212 | + { |
| 213 | + if (i < text.Length) |
| 214 | + { |
| 215 | + string comment = text.Substring(i); |
| 216 | + lineData.AddRange(Encoding.ASCII.GetBytes(comment)); |
| 217 | + i = text.Length; |
| 218 | + } |
| 219 | + } |
| 220 | + else |
| 221 | + { |
| 222 | + // Strip trailing space |
| 223 | + while (i < text.Length && text[i] == ' ') i++; |
| 224 | + } |
| 225 | + |
| 226 | + i--; |
| 227 | + break; |
| 228 | + } |
| 229 | + |
| 230 | + if (matched) continue; |
| 231 | + |
| 232 | + // Literal |
| 233 | + lineData.Add((byte)text[i]); |
| 234 | + } |
| 235 | + |
| 236 | + lineData.Add(0x0D); |
| 237 | + |
| 238 | + // Construct Line Header |
| 239 | + List<byte> finalLine = |
| 240 | + [ |
| 241 | + (byte)((lineNum >> 8) & 0xFF), |
| 242 | + (byte)(lineNum & 0xFF) |
| 243 | + ]; |
| 244 | + |
| 245 | + int length = lineData.Count; |
| 246 | + finalLine.Add((byte)(length & 0xFF)); |
| 247 | + finalLine.Add((byte)((length >> 8) & 0xFF)); |
| 248 | + |
| 249 | + finalLine.AddRange(lineData); |
| 250 | + |
| 251 | + return finalLine.ToArray(); |
| 252 | + } |
| 253 | + } |
| 254 | +} |
0 commit comments