microsoft · jakebailey · Feb 24, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/internal/astnav/tokens_test.go b/internal/astnav/tokens_test.go
@@ -300,10 +300,10 @@ func writeRangeDiff(output *strings.Builder, file *ast.SourceFile, diff tokenDif
 		goTokenPos = diff.goToken.Pos
 		goTokenEnd = diff.goToken.End
 	}
-	tsStartLine, _ := core.PositionToLineAndCharacter(tsTokenPos, lines)
-	tsEndLine, _ := core.PositionToLineAndCharacter(tsTokenEnd, lines)
-	goStartLine, _ := core.PositionToLineAndCharacter(goTokenPos, lines)
-	goEndLine, _ := core.PositionToLineAndCharacter(goTokenEnd, lines)
+	tsStartLine, _ := core.PositionToLineAndByteOffset(tsTokenPos, lines)
+	tsEndLine, _ := core.PositionToLineAndByteOffset(tsTokenEnd, lines)
+	goStartLine, _ := core.PositionToLineAndByteOffset(goTokenPos, lines)
+	goEndLine, _ := core.PositionToLineAndByteOffset(goTokenEnd, lines)
 
 	contextLines := 2
 	startLine := min(tsStartLine, goStartLine)

diff --git a/internal/checker/printer.go b/internal/checker/printer.go
@@ -49,7 +49,7 @@ func (s *semicolonRemoverWriter) DecreaseIndent() {
 	s.inner.DecreaseIndent()
 }
 
-func (s *semicolonRemoverWriter) GetColumn() int {
+func (s *semicolonRemoverWriter) GetColumn() core.UTF16Offset {
 	return s.inner.GetColumn()
 }
 

diff --git a/internal/compiler/emit_test.go b/internal/compiler/emit_test.go
@@ -16,7 +16,7 @@ import (
 // generateLongLineTS generates TypeScript source code that produces a single very long line.
 // This simulates generated code (e.g., from code generators) that has no line breaks,
 // which triggers O(n²) behavior in source map generation due to
-// GetECMALineAndCharacterOfPosition scanning from line start for each position.
+// GetECMALineAndUTF16CharacterOfPosition scanning from line start for each position.
 func generateLongLineTS(numProperties int) string {
 	// Build a large object literal all on one line, with no line breaks.
 	var b strings.Builder

diff --git a/internal/core/core.go b/internal/core/core.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync"
 	"unicode"
+	"unicode/utf16"
 	"unicode/utf8"
 
 	"github.com/microsoft/typescript-go/internal/debug"
@@ -440,13 +441,37 @@ func ComputeECMALineStartsSeq(text string) iter.Seq[TextPos] {
 	}
 }
 
-func PositionToLineAndCharacter(position int, lineStarts []TextPos) (line int, character int) {
+// PositionToLineAndByteOffset returns the 0-based line and byte offset from the
+// start of that line for the given byte position, using the provided line starts.
+// The byte offset is a raw UTF-8 byte offset from the line start, not a UTF-16 code unit count.
+func PositionToLineAndByteOffset(position int, lineStarts []TextPos) (line int, byteOffset int) {
 	line = max(sort.Search(len(lineStarts), func(i int) bool {
 		return int(lineStarts[i]) > position
 	})-1, 0)
 	return line, position - int(lineStarts[line])
 }
 
+// UTF16Offset represents a character offset measured in UTF-16 code units.
+type UTF16Offset int
+
+// UTF16Len returns the number of UTF-16 code units needed to
+// represent the given UTF-8 encoded string.
+func UTF16Len(s string) UTF16Offset {
+	// Fast path: scan for non-ASCII bytes. For ASCII-only strings,
+	// each byte is one UTF-16 code unit, so we can return len(s) directly.
+	for i := range len(s) {
+		if s[i] >= utf8.RuneSelf {
+			// Found non-ASCII; count the ASCII prefix, then decode the rest.
+			n := UTF16Offset(i)
+			for _, r := range s[i:] {
+				n += UTF16Offset(utf16.RuneLen(r))
+			}
+			return n
+		}
+	}
+	return UTF16Offset(len(s))
+}
+
 func Flatten[T any](array [][]T) []T {
 	var result []T
 	for _, subArray := range array {

diff --git a/internal/diagnosticwriter/diagnosticwriter.go b/internal/diagnosticwriter/diagnosticwriter.go
@@ -167,8 +167,8 @@ func FormatDiagnosticWithColorAndContext(output io.Writer, diagnostic Diagnostic
 }
 
 func writeCodeSnippet(writer io.Writer, sourceFile FileLike, start int, length int, squiggleColor string, indent string, formatOpts *FormattingOptions) {
-	firstLine, firstLineChar := scanner.GetECMALineAndCharacterOfPosition(sourceFile, start)
-	lastLine, lastLineChar := scanner.GetECMALineAndCharacterOfPosition(sourceFile, start+length)
+	firstLine, firstLineChar := scanner.GetECMALineAndUTF16CharacterOfPosition(sourceFile, start)
+	lastLine, lastLineChar := scanner.GetECMALineAndUTF16CharacterOfPosition(sourceFile, start+length)
 	if length == 0 {
 		lastLineChar++ // When length is zero, squiggle the character right after the start position.
 	}
@@ -196,10 +196,10 @@ func writeCodeSnippet(writer io.Writer, sourceFile FileLike, start int, length i
 			i = lastLine - 1
 		}
 
-		lineStart := scanner.GetECMAPositionOfLineAndCharacter(sourceFile, i, 0)
+		lineStart := scanner.GetECMAPositionOfLineAndByteOffset(sourceFile, i, 0)
 		var lineEnd int
 		if i < lastLineOfFile {
-			lineEnd = scanner.GetECMAPositionOfLineAndCharacter(sourceFile, i+1, 0)
+			lineEnd = scanner.GetECMAPositionOfLineAndByteOffset(sourceFile, i+1, 0)
 		} else {
 			lineEnd = len(sourceFile.Text())
 		}
@@ -229,21 +229,21 @@ func writeCodeSnippet(writer io.Writer, sourceFile FileLike, start int, length i
 			// Otherwise, we'll just squiggle the rest of the line, giving 'slice' no end position.
 			var lastCharForLine int
 			if i == lastLine {
-				lastCharForLine = lastLineChar
+				lastCharForLine = int(lastLineChar)
 			} else {
-				lastCharForLine = len(lineContent)
+				lastCharForLine = int(core.UTF16Len(lineContent))
 			}
 
 			// Fill with spaces until the first character,
 			// then squiggle the remainder of the line.
-			fmt.Fprint(writer, strings.Repeat(" ", firstLineChar))
-			fmt.Fprint(writer, strings.Repeat("~", lastCharForLine-firstLineChar))
+			fmt.Fprint(writer, strings.Repeat(" ", int(firstLineChar)))
+			fmt.Fprint(writer, strings.Repeat("~", lastCharForLine-int(firstLineChar)))
 		case lastLine:
 			// Squiggle until the final character.
-			fmt.Fprint(writer, strings.Repeat("~", lastLineChar))
+			fmt.Fprint(writer, strings.Repeat("~", int(lastLineChar)))
 		default:
 			// Squiggle the entire line.
-			fmt.Fprint(writer, strings.Repeat("~", len(lineContent)))
+			fmt.Fprint(writer, strings.Repeat("~", int(core.UTF16Len(lineContent))))
 		}
 
 		fmt.Fprint(writer, resetEscapeSequence)
@@ -303,7 +303,7 @@ func writeWithStyleAndReset(output io.Writer, text string, formatStyle string) {
 }
 
 func WriteLocation(output io.Writer, file FileLike, pos int, formatOpts *FormattingOptions, writeWithStyleAndReset FormattedWriter) {
-	firstLine, firstChar := scanner.GetECMALineAndCharacterOfPosition(file, pos)
+	firstLine, firstChar := scanner.GetECMALineAndUTF16CharacterOfPosition(file, pos)
 	var relativeFileName string
 	if formatOpts != nil {
 		relativeFileName = tspath.ConvertToRelativePath(file.FileName(), formatOpts.ComparePathsOptions)
@@ -315,7 +315,7 @@ func WriteLocation(output io.Writer, file FileLike, pos int, formatOpts *Formatt
 	fmt.Fprint(output, ":")
 	writeWithStyleAndReset(output, strconv.Itoa(firstLine+1), foregroundColorEscapeYellow)
 	fmt.Fprint(output, ":")
-	writeWithStyleAndReset(output, strconv.Itoa(firstChar+1), foregroundColorEscapeYellow)
+	writeWithStyleAndReset(output, strconv.Itoa(int(firstChar)+1), foregroundColorEscapeYellow)
 }
 
 // Some of these lived in watch.ts, but they're not specific to the watch API.
@@ -465,10 +465,10 @@ func WriteFormatDiagnostics(output io.Writer, diagnostics []Diagnostic, formatOp
 
 func WriteFormatDiagnostic(output io.Writer, diagnostic Diagnostic, formatOpts *FormattingOptions) {
 	if diagnostic.File() != nil {
-		line, character := scanner.GetECMALineAndCharacterOfPosition(diagnostic.File(), diagnostic.Pos())
+		line, character := scanner.GetECMALineAndUTF16CharacterOfPosition(diagnostic.File(), diagnostic.Pos())
 		fileName := diagnostic.File().FileName()
 		relativeFileName := tspath.ConvertToRelativePath(fileName, formatOpts.ComparePathsOptions)
-		fmt.Fprintf(output, "%s(%d,%d): ", relativeFileName, line+1, character+1)
+		fmt.Fprintf(output, "%s(%d,%d): ", relativeFileName, line+1, int(character)+1)
 	}
 
 	fmt.Fprintf(output, "%s TS%d: ", diagnostic.Category().Name(), diagnostic.Code())

diff --git a/internal/format/indent.go b/internal/format/indent.go
@@ -14,7 +14,7 @@ import (
 )
 
 func GetIndentationForNode(n *ast.Node, ignoreActualIndentationRange *core.TextRange, sourceFile *ast.SourceFile, options *lsutil.FormatCodeSettings) int {
-	startline, startpos := scanner.GetECMALineAndCharacterOfPosition(sourceFile, scanner.GetTokenPosOfNode(n, sourceFile, false))
+	startline, startpos := scanner.GetECMALineAndByteOffsetOfPosition(sourceFile, scanner.GetTokenPosOfNode(n, sourceFile, false))
 	return getIndentationForNodeWorker(n, startline, startpos, ignoreActualIndentationRange /*indentationDelta*/, 0, sourceFile /*isNextChild*/, false, options)
 }
 
@@ -104,7 +104,7 @@ func getIndentationForNodeWorker(
 		parent = current.Parent
 
 		if useTrueStart {
-			currentStartLine, currentStartCharacter = scanner.GetECMALineAndCharacterOfPosition(sourceFile, scanner.GetTokenPosOfNode(current, sourceFile, false))
+			currentStartLine, currentStartCharacter = scanner.GetECMALineAndByteOffsetOfPosition(sourceFile, scanner.GetTokenPosOfNode(current, sourceFile, false))
 		} else {
 			currentStartLine = containingListOrParentStartLine
 			currentStartCharacter = containingListOrParentStartCharacter
@@ -170,7 +170,7 @@ func getActualIndentationForListStartLine(list *ast.NodeList, sourceFile *ast.So
 	if list == nil {
 		return -1
 	}
-	line, char := scanner.GetECMALineAndCharacterOfPosition(sourceFile, list.Loc.Pos())
+	line, char := scanner.GetECMALineAndByteOffsetOfPosition(sourceFile, list.Loc.Pos())
 	return findColumnForFirstNonWhitespaceCharacterInLine(line, char, sourceFile, options)
 }
 
@@ -200,7 +200,7 @@ func deriveActualIndentationFromList(list *ast.NodeList, index int, sourceFile *
 }
 
 func findColumnForFirstNonWhitespaceCharacterInLine(line int, char int, sourceFile *ast.SourceFile, options *lsutil.FormatCodeSettings) int {
-	lineStart := scanner.GetECMAPositionOfLineAndCharacter(sourceFile, line, 0)
+	lineStart := scanner.GetECMAPositionOfLineAndByteOffset(sourceFile, line, 0)
 	return FindFirstNonWhitespaceColumn(lineStart, lineStart+char, sourceFile, options)
 }
 
@@ -217,14 +217,11 @@ func FindFirstNonWhitespaceColumn(startPos int, endPos int, sourceFile *ast.Sour
 * value of 'column' for '$' is 6 (assuming that tab size is 4)
  */
 func findFirstNonWhitespaceCharacterAndColumn(startPos int, endPos int, sourceFile *ast.SourceFile, options *lsutil.FormatCodeSettings) (character int, column int) {
-	character = 0
 	column = 0
 	text := sourceFile.Text()
-	for pos := startPos; pos < endPos; pos++ {
+	pos := startPos
+	for pos < endPos {
 		ch, size := utf8.DecodeRuneInString(text[pos:])
-		if size == 0 && ch == utf8.RuneError {
-			continue // multibyte character - TODO: recognize non-tab multicolumn characters? ideographic space?
-		}
 		if !stringutil.IsWhiteSpaceSingleLine(ch) {
 			break
 		}
@@ -235,9 +232,9 @@ func findFirstNonWhitespaceCharacterAndColumn(startPos int, endPos int, sourceFi
 			column++
 		}
 
-		character++
+		pos += size
 	}
-	return character, column
+	return pos - startPos, column
 }
 
 func childStartsOnTheSameLineWithElseInIfStatement(parent *ast.Node, child *ast.Node, childStartLine int, sourceFile *ast.SourceFile) bool {
@@ -251,7 +248,7 @@ func childStartsOnTheSameLineWithElseInIfStatement(parent *ast.Node, child *ast.
 }
 
 func getStartLineAndCharacterForNode(n *ast.Node, sourceFile *ast.SourceFile) (line int, character int) {
-	return scanner.GetECMALineAndCharacterOfPosition(sourceFile, scanner.GetTokenPosOfNode(n, sourceFile, false))
+	return scanner.GetECMALineAndByteOffsetOfPosition(sourceFile, scanner.GetTokenPosOfNode(n, sourceFile, false))
 }
 
 func getStartLineForNode(n *ast.Node, sourceFile *ast.SourceFile) int {
@@ -361,7 +358,7 @@ func getContainingListOrParentStart(parent *ast.Node, child *ast.Node, sourceFil
 	} else {
 		startPos = scanner.GetTokenPosOfNode(parent, sourceFile, false)
 	}
-	return scanner.GetECMALineAndCharacterOfPosition(sourceFile, startPos)
+	return scanner.GetECMALineAndByteOffsetOfPosition(sourceFile, startPos)
 }
 
 func isControlFlowEndingStatement(kind ast.Kind, parentKind ast.Kind) bool {

diff --git a/internal/format/span.go b/internal/format/span.go
@@ -264,7 +264,7 @@ func (w *formatSpanWorker) execute(s *formattingScanner) []core.TextChange {
 		}
 
 		w.indentTriviaItems(remainingTrivia, indentation, true, func(item TextRangeWithKind) {
-			startLine, startChar := scanner.GetECMALineAndCharacterOfPosition(w.sourceFile, item.Loc.Pos())
+			startLine, startChar := scanner.GetECMALineAndByteOffsetOfPosition(w.sourceFile, item.Loc.Pos())
 			w.processRange(item, startLine, startChar, w.enclosingNode, w.enclosingNode, nil)
 			w.insertIndentation(item.Loc.Pos(), indentation, false)
 		})
@@ -770,7 +770,7 @@ func (w *formatSpanWorker) processRange(r TextRangeWithKind, rangeStartLine int,
 func (w *formatSpanWorker) processTrivia(trivia []TextRangeWithKind, parent *ast.Node, contextNode *ast.Node, dynamicIndentation *dynamicIndenter) {
 	for _, triviaItem := range trivia {
 		if isComment(triviaItem.Kind) && triviaItem.Loc.ContainedBy(w.originalRange) {
-			triviaItemStartLine, triviaItemStartCharacter := scanner.GetECMALineAndCharacterOfPosition(w.sourceFile, triviaItem.Loc.Pos())
+			triviaItemStartLine, triviaItemStartCharacter := scanner.GetECMALineAndByteOffsetOfPosition(w.sourceFile, triviaItem.Loc.Pos())
 			w.processRange(triviaItem, triviaItemStartLine, triviaItemStartCharacter, parent, contextNode, dynamicIndentation)
 		}
 	}
@@ -867,7 +867,7 @@ func (w *formatSpanWorker) insertIndentation(pos int, indentation int, lineAdded
 		// insert indentation string at the very beginning of the token
 		w.recordReplace(pos, 0, indentationString)
 	} else {
-		tokenStartLine, tokenStartCharacter := scanner.GetECMALineAndCharacterOfPosition(w.sourceFile, pos)
+		tokenStartLine, tokenStartCharacter := scanner.GetECMALineAndByteOffsetOfPosition(w.sourceFile, pos)
 		startLinePosition := int(scanner.GetECMALineStarts(w.sourceFile)[tokenStartLine])
 		if indentation != w.characterToColumn(startLinePosition, tokenStartCharacter) || w.indentationIsDifferent(indentationString, startLinePosition) {
 			w.recordReplace(startLinePosition, tokenStartCharacter, indentationString)
@@ -1026,7 +1026,7 @@ func (w *formatSpanWorker) consumeTokenAndAdvanceScanner(currentTokenInfo tokenI
 	lineAction := LineActionNone
 	isTokenInRange := currentTokenInfo.token.Loc.ContainedBy(w.originalRange)
 
-	tokenStartLine, tokenStartChar := scanner.GetECMALineAndCharacterOfPosition(w.sourceFile, currentTokenInfo.token.Loc.Pos())
+	tokenStartLine, tokenStartChar := scanner.GetECMALineAndByteOffsetOfPosition(w.sourceFile, currentTokenInfo.token.Loc.Pos())
 
 	if isTokenInRange {
 		rangeHasError := w.rangeContainsError(currentTokenInfo.token.Loc)

diff --git a/internal/printer/changetrackerwriter.go b/internal/printer/changetrackerwriter.go
@@ -224,12 +224,12 @@ func (ct *ChangeTrackerWriter) WriteLiteral(s string) {
 	ct.textWriter.WriteLiteral(s)
 	ct.setLastNonTriviaPosition(s, true)
 }
-func (ct *ChangeTrackerWriter) GetTextPos() int          { return ct.textWriter.GetTextPos() }
-func (ct *ChangeTrackerWriter) GetLine() int             { return ct.textWriter.GetLine() }
-func (ct *ChangeTrackerWriter) GetColumn() int           { return ct.textWriter.GetColumn() }
-func (ct *ChangeTrackerWriter) GetIndent() int           { return ct.textWriter.GetIndent() }
-func (ct *ChangeTrackerWriter) IsAtStartOfLine() bool    { return ct.textWriter.IsAtStartOfLine() }
-func (ct *ChangeTrackerWriter) HasTrailingComment() bool { return ct.textWriter.HasTrailingComment() }
+func (ct *ChangeTrackerWriter) GetTextPos() int             { return ct.textWriter.GetTextPos() }
+func (ct *ChangeTrackerWriter) GetLine() int                { return ct.textWriter.GetLine() }
+func (ct *ChangeTrackerWriter) GetColumn() core.UTF16Offset { return ct.textWriter.GetColumn() }
+func (ct *ChangeTrackerWriter) GetIndent() int              { return ct.textWriter.GetIndent() }
+func (ct *ChangeTrackerWriter) IsAtStartOfLine() bool       { return ct.textWriter.IsAtStartOfLine() }
+func (ct *ChangeTrackerWriter) HasTrailingComment() bool    { return ct.textWriter.HasTrailingComment() }
 func (ct *ChangeTrackerWriter) HasTrailingWhitespace() bool {
 	return ct.textWriter.HasTrailingWhitespace()
 }
diff --git a/internal/printer/emittextwriter.go b/internal/printer/emittextwriter.go
@@ -1,6 +1,9 @@
 package printer
 
-import "github.com/microsoft/typescript-go/internal/ast"
+import (
+	"github.com/microsoft/typescript-go/internal/ast"
+	"github.com/microsoft/typescript-go/internal/core"
+)
 
 // Externally opaque interface for printing text
 type EmitTextWriter interface {
@@ -25,7 +28,7 @@ type EmitTextWriter interface {
 	WriteLiteral(s string)
 	GetTextPos() int
 	GetLine() int
-	GetColumn() int
+	GetColumn() core.UTF16Offset
 	GetIndent() int
 	IsAtStartOfLine() bool
 	HasTrailingComment() bool

diff --git a/internal/printer/singlelinestringwriter.go b/internal/printer/singlelinestringwriter.go
@@ -6,6 +6,7 @@ import (
 	"unicode/utf8"
 
 	"github.com/microsoft/typescript-go/internal/ast"
+	"github.com/microsoft/typescript-go/internal/core"
 	"github.com/microsoft/typescript-go/internal/stringutil"
 )
 
@@ -39,7 +40,7 @@ func (w singleLineStringWriter) DecreaseIndent() {
 	// Do Nothing
 }
 
-func (w singleLineStringWriter) GetColumn() int {
+func (w singleLineStringWriter) GetColumn() core.UTF16Offset {
 	return 0
 }
 

diff --git a/internal/printer/textwriter.go b/internal/printer/textwriter.go
@@ -31,11 +31,15 @@ func (w *textWriter) DecreaseIndent() {
 	w.indent--
 }
 
-func (w *textWriter) GetColumn() int {
+// GetColumn returns the column position measured in UTF-16 code units
+// for source map compatibility.
+func (w *textWriter) GetColumn() core.UTF16Offset {
 	if w.lineStart {
-		return w.indent * w.indentSize
+		return core.UTF16Offset(w.indent * w.indentSize)
 	}
-	return w.builder.Len() - w.linePos
+	// Count UTF-16 code units from the last line start.
+	// For ASCII-only output (the common case), this equals the byte count.
+	return core.UTF16Len(w.builder.String()[w.linePos:])
 }
 
 func (w *textWriter) GetIndent() int {

diff --git a/internal/printer/utilities.go b/internal/printer/utilities.go
@@ -893,14 +893,16 @@ func calculateIndent(text string, pos int, end int) int {
 // optimized for monotonically increasing positions (e.g., during source map emit).
 //
 // When positions increase within the same line, only the delta between the last
-// position and the new position needs to be scanned for rune counts, turning
-// what would be O(n²) into O(n) for long lines.
+// position and the new position needs to be scanned for UTF-16 code unit counts,
+// turning what would be O(n²) into O(n) for long lines.
+//
+// Character offsets are measured in UTF-16 code units per the source map specification.
 type lineCharacterCache struct {
 	lineMap    []core.TextPos
 	text       string
 	cachedLine int
 	cachedPos  int
-	cachedChar int
+	cachedChar core.UTF16Offset
 	hasCached  bool
 }
 
@@ -911,15 +913,16 @@ func newLineCharacterCache(source sourcemap.Source) *lineCharacterCache {
 	}
 }
 
-func (c *lineCharacterCache) getLineAndCharacter(pos int) (line int, character int) {
+// getLineAndCharacter returns the 0-based line number and UTF-16 code unit
+// offset from the start of that line for the given byte position.
+func (c *lineCharacterCache) getLineAndCharacter(pos int) (line int, character core.UTF16Offset) {
 	line = scanner.ComputeLineOfPosition(c.lineMap, pos)
 	if c.hasCached && line == c.cachedLine && pos >= c.cachedPos {
-		// Incremental: only count runes from the last cached position.
-		character = c.cachedChar + utf8.RuneCountInString(c.text[c.cachedPos:pos])
+		// Incremental: only count UTF-16 code units from the last cached position.
+		character = c.cachedChar + core.UTF16Len(c.text[c.cachedPos:pos])
 	} else {
 		// Full computation from line start.
-		// !!! TODO: this is suspect; these are rune counts, not UTF-8 _or_ UTF-16 offsets.
-		character = utf8.RuneCountInString(c.text[c.lineMap[line]:pos])
+		character = core.UTF16Len(c.text[c.lineMap[line]:pos])
 	}
 	c.cachedLine = line
 	c.cachedPos = pos