From 3d3d8880368879d21813c40fdb208fb163cbfb19 Mon Sep 17 00:00:00 2001 From: FromSi Date: Wed, 20 May 2026 02:40:54 +0500 Subject: [PATCH 1/3] fix(view): sanitize HTML links --- go.mod | 3 + go.sum | 6 + internal/htmlsanitizer/lib_sanitizer.go | 54 ++++ internal/htmlsanitizer/lib_sanitizer_test.go | 273 +++++++++++++++++++ internal/htmlsanitizer/sanitizer.go | 5 + view/html.go | 40 ++- view/html_test.go | 100 +++++++ 7 files changed, 477 insertions(+), 4 deletions(-) create mode 100644 internal/htmlsanitizer/lib_sanitizer.go create mode 100644 internal/htmlsanitizer/lib_sanitizer_test.go create mode 100644 internal/htmlsanitizer/sanitizer.go diff --git a/go.mod b/go.mod index da748337..9748f570 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/knadh/go-pop3 v1.0.2 github.com/mattn/go-sixel v0.0.9 + github.com/microcosm-cc/bluemonday v1.0.27 github.com/wagslane/go-password-validator v0.3.0 github.com/yuin/goldmark v1.8.2 github.com/yuin/gopher-lua v1.1.2 @@ -37,6 +38,7 @@ require ( require ( github.com/andybalholm/cascadia v1.3.3 // indirect github.com/atotto/clipboard v0.1.4 // indirect + github.com/aymerick/douceur v0.2.0 // indirect github.com/charmbracelet/colorprofile v0.4.3 // indirect github.com/charmbracelet/ultraviolet v0.0.0-20260416155717-489999b90468 // indirect github.com/charmbracelet/x/term v0.2.2 // indirect @@ -47,6 +49,7 @@ require ( github.com/cloudflare/circl v1.6.3 // indirect github.com/danieljoos/wincred v1.2.3 // indirect github.com/godbus/dbus/v5 v5.2.2 // indirect + github.com/gorilla/css v1.0.1 // indirect github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-runewidth v0.0.23 // indirect github.com/muesli/cancelreader v0.2.2 // indirect diff --git a/go.sum b/go.sum index 7e9af686..4294b561 100644 --- a/go.sum +++ b/go.sum @@ -25,6 +25,8 @@ github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o= github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w= +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q= github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q= @@ -72,6 +74,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= +github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/knadh/go-pop3 v1.0.2 h1:gbdtwzEYedLVos/vpebM2d73NTyZxEgjgRJ4S77HlzM= @@ -84,6 +88,8 @@ github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3Ry github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mattn/go-sixel v0.0.9 h1:ncx/rVU35Ut7/6gpVk4deC4/Wp2js9fDKmFmWnzmGoY= github.com/mattn/go-sixel v0.0.9/go.mod h1:mfichvavqIDFW14LGU24ux/UZ/wF0/hG+4pUWOWrQgM= +github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= +github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/htmlsanitizer/lib_sanitizer.go b/internal/htmlsanitizer/lib_sanitizer.go new file mode 100644 index 00000000..fedebc2c --- /dev/null +++ b/internal/htmlsanitizer/lib_sanitizer.go @@ -0,0 +1,54 @@ +package htmlsanitizer + +import ( + "encoding/base64" + "net/url" + "regexp" + + "github.com/microcosm-cc/bluemonday" +) + +type LibSanitizer struct { + policy *bluemonday.Policy +} + +func NewLibSanitizer() LibSanitizer { + return LibSanitizer{policy: newPolicy()} +} + +func (s LibSanitizer) SanitizeBytes(html []byte) []byte { + return s.policy.SanitizeBytes(html) +} + +func newPolicy() *bluemonday.Policy { + p := bluemonday.NewPolicy() + linkURLPattern := regexp.MustCompile(`(?i)^(https?://|mailto:|tel:)`) + imageURLPattern := regexp.MustCompile(`(?i)^(https?://|cid:|data:image/)`) + dataImagePrefixPattern := regexp.MustCompile(`(?i)^image/(gif|jpe?g|png|webp);base64,`) + p.AllowElements( + "a", "b", "blockquote", "br", "code", "div", "em", "h1", "h2", + "i", "img", "li", "ol", "p", "pre", "span", "strong", "table", + "tbody", "td", "th", "thead", "tr", "u", "ul", + ) + p.AllowAttrs("href").Matching(linkURLPattern).OnElements("a") + p.AllowAttrs("src").Matching(imageURLPattern).OnElements("img") + p.AllowAttrs("alt").OnElements("img") + p.AllowAttrs("cite").OnElements("blockquote") + p.RequireParseableURLs(true) + p.AllowURLSchemes("http", "https", "mailto", "tel") + p.AllowURLSchemeWithCustomPolicy("cid", func(u *url.URL) bool { + return u.Opaque != "" && u.RawQuery == "" && u.Fragment == "" + }) + p.AllowURLSchemeWithCustomPolicy("data", func(u *url.URL) bool { + if u.RawQuery != "" || u.Fragment != "" { + return false + } + prefix := dataImagePrefixPattern.FindString(u.Opaque) + if prefix == "" { + return false + } + _, err := base64.StdEncoding.DecodeString(u.Opaque[len(prefix):]) + return err == nil + }) + return p +} diff --git a/internal/htmlsanitizer/lib_sanitizer_test.go b/internal/htmlsanitizer/lib_sanitizer_test.go new file mode 100644 index 00000000..1081170d --- /dev/null +++ b/internal/htmlsanitizer/lib_sanitizer_test.go @@ -0,0 +1,273 @@ +package htmlsanitizer + +import ( + "strings" + "testing" +) + +func TestLibSanitizerRemovesUnsafeHTML(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` +

Hello

+ + + bad link + good link + bad image + cid image + bad data + data image + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "onclick", + "data link + cid link + ftp link + file link + vbscript link + protocol relative link + relative link + broken link + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "href=\"data:image", + "href=\"cid:", + "href=\"ftp:", + "href=\"file:", + "href=\"vbscript:", + "href=\"//example.com", + "href=\"/relative", + "href=\":not-a-url", + } { + if strings.Contains(got, forbidden) { + t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got) + } + } + + for _, wantText := range []string{ + "data link", + "cid link", + "ftp link", + "file link", + "vbscript link", + "protocol relative link", + "relative link", + "broken link", + } { + if !strings.Contains(got, wantText) { + t.Fatalf("sanitized HTML should keep link text %q:\n%s", wantText, got) + } + } +} + +func TestLibSanitizerAllowsSafeLinks(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` + http link + https link + uppercase https link + mailto link + uppercase mailto link + tel link + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, want := range []string{ + `href="http://example.com/path?x=1"`, + `href="https://example.com/path?x=1"`, + `href="https://example.com/path?x=1"`, + `href="mailto:security@example.com"`, + `href="mailto:security@example.com"`, + `href="tel:+15551234567"`, + } { + if !strings.Contains(got, want) { + t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got) + } + } +} + +func TestLibSanitizerFiltersImageSources(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` + http image + https image + cid image + data image + javascript image + file image + html data image + relative image + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, want := range []string{ + `src="http://example.com/image.png"`, + `src="https://example.com/image.png"`, + `src="cid:test@example.com"`, + `src="data:image/png;base64,iVBORw0KGgo="`, + } { + if !strings.Contains(got, want) { + t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got) + } + } + + for _, forbidden := range []string{ + "src=\"javascript:", + "src=\"file:", + "src=\"data:text/html", + "src=\"/relative.png", + } { + if strings.Contains(got, forbidden) { + t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got) + } + } +} + +func TestLibSanitizerRemovesUnknownElementsButKeepsText(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` +
form text
+ + object text +

safe text

+ `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "styled text

+
quote text
+ `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "style=", + "class=", + "data-secret", + "id=", + "onclick=", + } { + if strings.Contains(got, forbidden) { + t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got) + } + } + + for _, want := range []string{ + "styled text", + `cite="https://example.com"`, + "quote text", + } { + if !strings.Contains(got, want) { + t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got) + } + } +} + +func TestLibSanitizerRejectsCIDWithQueryOrFragment(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` + cid query + cid fragment + cid ok + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + `src="cid:test@example.com?x=1"`, + `src="cid:test@example.com#frag"`, + } { + if strings.Contains(got, forbidden) { + t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got) + } + } + + if !strings.Contains(got, `src="cid:test@example.com"`) { + t.Fatalf("sanitized HTML should keep clean cid source:\n%s", got) + } +} + +func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` + invalid base64 + svg data + png data + `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "not base64", + "data:image/svg+xml", + } { + if strings.Contains(got, forbidden) { + t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got) + } + } + + if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) { + t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got) + } +} diff --git a/internal/htmlsanitizer/sanitizer.go b/internal/htmlsanitizer/sanitizer.go new file mode 100644 index 00000000..7196b866 --- /dev/null +++ b/internal/htmlsanitizer/sanitizer.go @@ -0,0 +1,5 @@ +package htmlsanitizer + +type Sanitizer interface { + SanitizeBytes(html []byte) []byte +} diff --git a/view/html.go b/view/html.go index 3600677d..40f1b633 100644 --- a/view/html.go +++ b/view/html.go @@ -13,12 +13,15 @@ import ( "charm.land/lipgloss/v2" "github.com/floatpane/matcha/clib" + "github.com/floatpane/matcha/internal/htmlsanitizer" "github.com/floatpane/matcha/internal/httpclient" "github.com/floatpane/matcha/internal/loglevel" "github.com/floatpane/matcha/theme" lru "github.com/hashicorp/golang-lru/v2" ) +var htmlSanitizer htmlsanitizer.Sanitizer = htmlsanitizer.NewLibSanitizer() + const termGhostty = "ghostty" func linkStyle() lipgloss.Style { @@ -107,6 +110,8 @@ func hyperlinkSupported() bool { // hyperlink formats a string as either a terminal-clickable hyperlink or plain text with URL. func hyperlink(url, text string) string { + url = strings.TrimSpace(url) + text = stripTerminalControls(text) if text == "" { text = url } @@ -124,6 +129,24 @@ func hyperlink(url, text string) string { return fmt.Sprintf("%s <%s>", linkStyle().Render(text), linkStyle().Render(url)) } +func stripTerminalControls(s string) string { + return strings.Map(func(r rune) rune { + if r == '\n' || r == '\t' { + return r + } + if r < 0x20 || r == 0x7f || r == 0x9c { + return -1 + } + return r + }, s) +} + +func hasTerminalControls(s string) bool { + return strings.IndexFunc(s, func(r rune) bool { + return r < 0x20 || r == 0x7f || r == 0x9c + }) != -1 +} + func decodeQuotedPrintable(s string) (string, error) { reader := quotedprintable.NewReader(strings.NewReader(s)) body, err := io.ReadAll(reader) @@ -589,6 +612,7 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2 } else { htmlBody = markdownToHTML([]byte(decodedBody)) } + htmlBody = htmlSanitizer.SanitizeBytes(htmlBody) result, placements, err := renderHTMLToText(htmlBody, inline, h1Style, h2Style, disableImages) if err != nil { @@ -601,7 +625,8 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2 // keep these alive. Retry through the markdown pre-pass when the direct // HTML path produces nothing. if directHTML && strings.TrimSpace(result) == "" { - result, placements, err = renderHTMLToText(markdownToHTML([]byte(decodedBody)), inline, h1Style, h2Style, disableImages) + fallbackHTML := htmlSanitizer.SanitizeBytes(markdownToHTML([]byte(decodedBody))) + result, placements, err = renderHTMLToText(fallbackHTML, inline, h1Style, h2Style, disableImages) if err != nil { return "", nil, err } @@ -643,11 +668,18 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl text.WriteString("\n\n") case clib.HElemLink: - text.WriteString(hyperlink(elem.Attr1, elem.Text)) + if hasTerminalControls(elem.Attr1) { + text.WriteString(stripTerminalControls(elem.Text)) + } else { + text.WriteString(hyperlink(elem.Attr1, elem.Text)) + } case clib.HElemImage: - src := elem.Attr1 - alt := elem.Attr2 + src := strings.TrimSpace(elem.Attr1) + alt := stripTerminalControls(elem.Attr2) + if hasTerminalControls(src) { + continue + } if !disableImages && imageProtocolSupported() { var payload string diff --git a/view/html_test.go b/view/html_test.go index e916bc04..b9fc2aff 100644 --- a/view/html_test.go +++ b/view/html_test.go @@ -670,6 +670,106 @@ func TestProcessBodyWithHyperlinkSupport(t *testing.T) { } } +func TestProcessBodySanitizesUnsafeHTMLLinks(t *testing.T) { + origTerm := os.Getenv("TERM") + origTermProgram := os.Getenv("TERM_PROGRAM") + origVTEVersion := os.Getenv("VTE_VERSION") + defer func() { + os.Setenv("TERM", origTerm) + os.Setenv("TERM_PROGRAM", origTermProgram) + os.Setenv("VTE_VERSION", origVTEVersion) + }() + + os.Setenv("TERM", "xterm-kitty") + os.Setenv("TERM_PROGRAM", "") + os.Unsetenv("VTE_VERSION") + + h1Style := lipgloss.NewStyle() + h2Style := lipgloss.NewStyle() + bodyStyle := lipgloss.NewStyle() + + tests := []struct { + name string + input string + wantContains string + forbiddenContains []string + }{ + { + name: "javascript link is rendered as text only", + input: `Click here`, + wantContains: "Click here", + forbiddenContains: []string{ + "javascript:", + "\x1b]8;;javascript:", + }, + }, + { + name: "mixed-case javascript link is rejected", + input: `Click here`, + wantContains: "Click here", + forbiddenContains: []string{ + "JaVaScRiPt:", + "javascript:", + }, + }, + { + name: "unsafe image source is not linked", + input: `bad imageAfter`, + wantContains: "After", + forbiddenContains: []string{ + "javascript:", + "bad image", + "Click here to view image", + }, + }, + { + name: "data image href is not rendered as a link", + input: `data link`, + wantContains: "data link", + forbiddenContains: []string{ + "data:image", + "\x1b]8;;data:", + }, + }, + { + name: "cid href is not rendered as a link", + input: `cid link`, + wantContains: "cid link", + forbiddenContains: []string{ + "cid:test-image", + "\x1b]8;;cid:", + }, + }, + { + name: "OSC control characters are stripped from safe links", + input: "safe", + wantContains: "safe", + forbiddenContains: []string{ + "\x1b]8;;file:", + "file:///tmp/pwn", + "\x07", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + processed, _, err := ProcessBody(tt.input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, false) + if err != nil { + t.Fatalf("ProcessBody() failed: %v", err) + } + if !strings.Contains(processed, tt.wantContains) { + t.Fatalf("processed body does not contain %q:\n%q", tt.wantContains, processed) + } + for _, forbidden := range tt.forbiddenContains { + if strings.Contains(processed, forbidden) { + t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed) + } + } + }) + } +} + func TestProcessBodyWithImageProtocol(t *testing.T) { // Save original environment variables origTerm := os.Getenv("TERM") From 5964a34d96c3e09f5dabf95de8b7da56cab112ee Mon Sep 17 00:00:00 2001 From: FromSi Date: Mon, 25 May 2026 17:34:32 +0500 Subject: [PATCH 2/3] fix(view): sanitize HTML links --- view/html.go | 6 +++++- view/html_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/view/html.go b/view/html.go index 40f1b633..e047ff80 100644 --- a/view/html.go +++ b/view/html.go @@ -717,7 +717,7 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl } debugImageProtocol("no payload for src=%s", src) } - if hyperlinkSupported() { + if isRemoteImageURL(src) && hyperlinkSupported() { fmt.Fprintf(&text, "\n %s \n", hyperlink(src, fmt.Sprintf("[Click here to view image: %s]", alt))) } else { fmt.Fprintf(&text, "\n %s \n", linkStyle().Render(fmt.Sprintf("[Image: %s, %s]", alt, src))) @@ -787,6 +787,10 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl return result, placements, nil } +func isRemoteImageURL(src string) bool { + return strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://") +} + func tableHeaderStyle() lipgloss.Style { return lipgloss.NewStyle().Bold(true).Foreground(theme.ActiveTheme.Accent) } diff --git a/view/html_test.go b/view/html_test.go index b9fc2aff..15ea03fe 100644 --- a/view/html_test.go +++ b/view/html_test.go @@ -770,6 +770,45 @@ func TestProcessBodySanitizesUnsafeHTMLLinks(t *testing.T) { } } +func TestProcessBodyDoesNotHyperlinkNonRemoteImageFallbacks(t *testing.T) { + t.Setenv("TERM", "xterm") + t.Setenv("TERM_PROGRAM", "") + t.Setenv("WEZTERM_EXECUTABLE", "/usr/bin/wezterm") + + h1Style := lipgloss.NewStyle() + h2Style := lipgloss.NewStyle() + bodyStyle := lipgloss.NewStyle() + + input := ` + data image + cid image + ` + + processed, _, err := ProcessBody(input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, true) + if err != nil { + t.Fatalf("ProcessBody() failed: %v", err) + } + + for _, want := range []string{ + "[Image: data image, data:image/png;base64,iVBORw0KGgo=]", + "[Image: cid image, cid:test-image@example.com]", + } { + if !strings.Contains(processed, want) { + t.Fatalf("processed body does not contain %q:\n%q", want, processed) + } + } + + for _, forbidden := range []string{ + "Click here to view image", + "\x1b]8;;data:", + "\x1b]8;;cid:", + } { + if strings.Contains(processed, forbidden) { + t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed) + } + } +} + func TestProcessBodyWithImageProtocol(t *testing.T) { // Save original environment variables origTerm := os.Getenv("TERM") From 23b10eec5b3cb52d72be8dcfa78b61ab9443278e Mon Sep 17 00:00:00 2001 From: FromSi Date: Tue, 26 May 2026 18:24:07 +0500 Subject: [PATCH 3/3] fix(view): sanitize HTML links --- internal/htmlsanitizer/lib_sanitizer.go | 6 +++++- internal/htmlsanitizer/lib_sanitizer_test.go | 4 ++++ view/html.go | 1 + view/html_test.go | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/internal/htmlsanitizer/lib_sanitizer.go b/internal/htmlsanitizer/lib_sanitizer.go index fedebc2c..7405c7bd 100644 --- a/internal/htmlsanitizer/lib_sanitizer.go +++ b/internal/htmlsanitizer/lib_sanitizer.go @@ -47,7 +47,11 @@ func newPolicy() *bluemonday.Policy { if prefix == "" { return false } - _, err := base64.StdEncoding.DecodeString(u.Opaque[len(prefix):]) + payload := u.Opaque[len(prefix):] + if _, err := base64.StdEncoding.DecodeString(payload); err == nil { + return true + } + _, err := base64.RawStdEncoding.DecodeString(payload) return err == nil }) return p diff --git a/internal/htmlsanitizer/lib_sanitizer_test.go b/internal/htmlsanitizer/lib_sanitizer_test.go index 1081170d..fefc984f 100644 --- a/internal/htmlsanitizer/lib_sanitizer_test.go +++ b/internal/htmlsanitizer/lib_sanitizer_test.go @@ -254,6 +254,7 @@ func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) { invalid base64 svg data png data + raw png data `) got := string(sanitizer.SanitizeBytes(input)) @@ -270,4 +271,7 @@ func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) { if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) { t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got) } + if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo"`) { + t.Fatalf("sanitized HTML should keep valid unpadded png data URI:\n%s", got) + } } diff --git a/view/html.go b/view/html.go index e047ff80..4f2578f0 100644 --- a/view/html.go +++ b/view/html.go @@ -788,6 +788,7 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl } func isRemoteImageURL(src string) bool { + src = strings.ToLower(src) return strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://") } diff --git a/view/html_test.go b/view/html_test.go index 15ea03fe..b75e2d76 100644 --- a/view/html_test.go +++ b/view/html_test.go @@ -809,6 +809,24 @@ func TestProcessBodyDoesNotHyperlinkNonRemoteImageFallbacks(t *testing.T) { } } +func TestIsRemoteImageURLAllowsUppercaseHTTPSScheme(t *testing.T) { + tests := []struct { + src string + want bool + }{ + {src: "http://example.com/image.png", want: true}, + {src: "HTTPS://example.com/image.png", want: true}, + {src: "cid:test-image@example.com", want: false}, + {src: "data:image/png;base64,iVBORw0KGgo=", want: false}, + } + + for _, tt := range tests { + if got := isRemoteImageURL(tt.src); got != tt.want { + t.Fatalf("isRemoteImageURL(%q) = %v, want %v", tt.src, got, tt.want) + } + } +} + func TestProcessBodyWithImageProtocol(t *testing.T) { // Save original environment variables origTerm := os.Getenv("TERM")