From 3d3d8880368879d21813c40fdb208fb163cbfb19 Mon Sep 17 00:00:00 2001
From: FromSi
Date: Wed, 20 May 2026 02:40:54 +0500
Subject: [PATCH 1/3] fix(view): sanitize HTML links
---
go.mod | 3 +
go.sum | 6 +
internal/htmlsanitizer/lib_sanitizer.go | 54 ++++
internal/htmlsanitizer/lib_sanitizer_test.go | 273 +++++++++++++++++++
internal/htmlsanitizer/sanitizer.go | 5 +
view/html.go | 40 ++-
view/html_test.go | 100 +++++++
7 files changed, 477 insertions(+), 4 deletions(-)
create mode 100644 internal/htmlsanitizer/lib_sanitizer.go
create mode 100644 internal/htmlsanitizer/lib_sanitizer_test.go
create mode 100644 internal/htmlsanitizer/sanitizer.go
diff --git a/go.mod b/go.mod
index da748337..9748f570 100644
--- a/go.mod
+++ b/go.mod
@@ -23,6 +23,7 @@ require (
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/knadh/go-pop3 v1.0.2
github.com/mattn/go-sixel v0.0.9
+ github.com/microcosm-cc/bluemonday v1.0.27
github.com/wagslane/go-password-validator v0.3.0
github.com/yuin/goldmark v1.8.2
github.com/yuin/gopher-lua v1.1.2
@@ -37,6 +38,7 @@ require (
require (
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
+ github.com/aymerick/douceur v0.2.0 // indirect
github.com/charmbracelet/colorprofile v0.4.3 // indirect
github.com/charmbracelet/ultraviolet v0.0.0-20260416155717-489999b90468 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
@@ -47,6 +49,7 @@ require (
github.com/cloudflare/circl v1.6.3 // indirect
github.com/danieljoos/wincred v1.2.3 // indirect
github.com/godbus/dbus/v5 v5.2.2 // indirect
+ github.com/gorilla/css v1.0.1 // indirect
github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
github.com/mattn/go-runewidth v0.0.23 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
diff --git a/go.sum b/go.sum
index 7e9af686..4294b561 100644
--- a/go.sum
+++ b/go.sum
@@ -25,6 +25,8 @@ github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o=
github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w=
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0=
github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q=
github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
@@ -72,6 +74,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
+github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/knadh/go-pop3 v1.0.2 h1:gbdtwzEYedLVos/vpebM2d73NTyZxEgjgRJ4S77HlzM=
@@ -84,6 +88,8 @@ github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3Ry
github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/mattn/go-sixel v0.0.9 h1:ncx/rVU35Ut7/6gpVk4deC4/Wp2js9fDKmFmWnzmGoY=
github.com/mattn/go-sixel v0.0.9/go.mod h1:mfichvavqIDFW14LGU24ux/UZ/wF0/hG+4pUWOWrQgM=
+github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
+github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/internal/htmlsanitizer/lib_sanitizer.go b/internal/htmlsanitizer/lib_sanitizer.go
new file mode 100644
index 00000000..fedebc2c
--- /dev/null
+++ b/internal/htmlsanitizer/lib_sanitizer.go
@@ -0,0 +1,54 @@
+package htmlsanitizer
+
+import (
+ "encoding/base64"
+ "net/url"
+ "regexp"
+
+ "github.com/microcosm-cc/bluemonday"
+)
+
+type LibSanitizer struct {
+ policy *bluemonday.Policy
+}
+
+func NewLibSanitizer() LibSanitizer {
+ return LibSanitizer{policy: newPolicy()}
+}
+
+func (s LibSanitizer) SanitizeBytes(html []byte) []byte {
+ return s.policy.SanitizeBytes(html)
+}
+
+func newPolicy() *bluemonday.Policy {
+ p := bluemonday.NewPolicy()
+ linkURLPattern := regexp.MustCompile(`(?i)^(https?://|mailto:|tel:)`)
+ imageURLPattern := regexp.MustCompile(`(?i)^(https?://|cid:|data:image/)`)
+ dataImagePrefixPattern := regexp.MustCompile(`(?i)^image/(gif|jpe?g|png|webp);base64,`)
+ p.AllowElements(
+ "a", "b", "blockquote", "br", "code", "div", "em", "h1", "h2",
+ "i", "img", "li", "ol", "p", "pre", "span", "strong", "table",
+ "tbody", "td", "th", "thead", "tr", "u", "ul",
+ )
+ p.AllowAttrs("href").Matching(linkURLPattern).OnElements("a")
+ p.AllowAttrs("src").Matching(imageURLPattern).OnElements("img")
+ p.AllowAttrs("alt").OnElements("img")
+ p.AllowAttrs("cite").OnElements("blockquote")
+ p.RequireParseableURLs(true)
+ p.AllowURLSchemes("http", "https", "mailto", "tel")
+ p.AllowURLSchemeWithCustomPolicy("cid", func(u *url.URL) bool {
+ return u.Opaque != "" && u.RawQuery == "" && u.Fragment == ""
+ })
+ p.AllowURLSchemeWithCustomPolicy("data", func(u *url.URL) bool {
+ if u.RawQuery != "" || u.Fragment != "" {
+ return false
+ }
+ prefix := dataImagePrefixPattern.FindString(u.Opaque)
+ if prefix == "" {
+ return false
+ }
+ _, err := base64.StdEncoding.DecodeString(u.Opaque[len(prefix):])
+ return err == nil
+ })
+ return p
+}
diff --git a/internal/htmlsanitizer/lib_sanitizer_test.go b/internal/htmlsanitizer/lib_sanitizer_test.go
new file mode 100644
index 00000000..1081170d
--- /dev/null
+++ b/internal/htmlsanitizer/lib_sanitizer_test.go
@@ -0,0 +1,273 @@
+package htmlsanitizer
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestLibSanitizerRemovesUnsafeHTML(t *testing.T) {
+ sanitizer := NewLibSanitizer()
+ input := []byte(`
+ Hello
+
+
+ bad link
+ good link
+
+
+
+
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, forbidden := range []string{
+ "onclick",
+ "" alt="html data image">
+
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, want := range []string{
+ `src="http://example.com/image.png"`,
+ `src="https://example.com/image.png"`,
+ `src="cid:test@example.com"`,
+ `src="data:image/png;base64,iVBORw0KGgo="`,
+ } {
+ if !strings.Contains(got, want) {
+ t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+ }
+ }
+
+ for _, forbidden := range []string{
+ "src=\"javascript:",
+ "src=\"file:",
+ "src=\"data:text/html",
+ "src=\"/relative.png",
+ } {
+ if strings.Contains(got, forbidden) {
+ t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+ }
+ }
+}
+
+func TestLibSanitizerRemovesUnknownElementsButKeepsText(t *testing.T) {
+ sanitizer := NewLibSanitizer()
+ input := []byte(`
+
+
+
+ safe text
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, forbidden := range []string{
+ "
+ quote text
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, forbidden := range []string{
+ "style=",
+ "class=",
+ "data-secret",
+ "id=",
+ "onclick=",
+ } {
+ if strings.Contains(got, forbidden) {
+ t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+ }
+ }
+
+ for _, want := range []string{
+ "styled text",
+ `cite="https://example.com"`,
+ "quote text",
+ } {
+ if !strings.Contains(got, want) {
+ t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+ }
+ }
+}
+
+func TestLibSanitizerRejectsCIDWithQueryOrFragment(t *testing.T) {
+ sanitizer := NewLibSanitizer()
+ input := []byte(`
+
+
+
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, forbidden := range []string{
+ `src="cid:test@example.com?x=1"`,
+ `src="cid:test@example.com#frag"`,
+ } {
+ if strings.Contains(got, forbidden) {
+ t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+ }
+ }
+
+ if !strings.Contains(got, `src="cid:test@example.com"`) {
+ t.Fatalf("sanitized HTML should keep clean cid source:\n%s", got)
+ }
+}
+
+func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) {
+ sanitizer := NewLibSanitizer()
+ input := []byte(`
+
+
+
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, forbidden := range []string{
+ "not base64",
+ "data:image/svg+xml",
+ } {
+ if strings.Contains(got, forbidden) {
+ t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+ }
+ }
+
+ if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) {
+ t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got)
+ }
+}
diff --git a/internal/htmlsanitizer/sanitizer.go b/internal/htmlsanitizer/sanitizer.go
new file mode 100644
index 00000000..7196b866
--- /dev/null
+++ b/internal/htmlsanitizer/sanitizer.go
@@ -0,0 +1,5 @@
+package htmlsanitizer
+
+type Sanitizer interface {
+ SanitizeBytes(html []byte) []byte
+}
diff --git a/view/html.go b/view/html.go
index 3600677d..40f1b633 100644
--- a/view/html.go
+++ b/view/html.go
@@ -13,12 +13,15 @@ import (
"charm.land/lipgloss/v2"
"github.com/floatpane/matcha/clib"
+ "github.com/floatpane/matcha/internal/htmlsanitizer"
"github.com/floatpane/matcha/internal/httpclient"
"github.com/floatpane/matcha/internal/loglevel"
"github.com/floatpane/matcha/theme"
lru "github.com/hashicorp/golang-lru/v2"
)
+var htmlSanitizer htmlsanitizer.Sanitizer = htmlsanitizer.NewLibSanitizer()
+
const termGhostty = "ghostty"
func linkStyle() lipgloss.Style {
@@ -107,6 +110,8 @@ func hyperlinkSupported() bool {
// hyperlink formats a string as either a terminal-clickable hyperlink or plain text with URL.
func hyperlink(url, text string) string {
+ url = strings.TrimSpace(url)
+ text = stripTerminalControls(text)
if text == "" {
text = url
}
@@ -124,6 +129,24 @@ func hyperlink(url, text string) string {
return fmt.Sprintf("%s <%s>", linkStyle().Render(text), linkStyle().Render(url))
}
+func stripTerminalControls(s string) string {
+ return strings.Map(func(r rune) rune {
+ if r == '\n' || r == '\t' {
+ return r
+ }
+ if r < 0x20 || r == 0x7f || r == 0x9c {
+ return -1
+ }
+ return r
+ }, s)
+}
+
+func hasTerminalControls(s string) bool {
+ return strings.IndexFunc(s, func(r rune) bool {
+ return r < 0x20 || r == 0x7f || r == 0x9c
+ }) != -1
+}
+
func decodeQuotedPrintable(s string) (string, error) {
reader := quotedprintable.NewReader(strings.NewReader(s))
body, err := io.ReadAll(reader)
@@ -589,6 +612,7 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2
} else {
htmlBody = markdownToHTML([]byte(decodedBody))
}
+ htmlBody = htmlSanitizer.SanitizeBytes(htmlBody)
result, placements, err := renderHTMLToText(htmlBody, inline, h1Style, h2Style, disableImages)
if err != nil {
@@ -601,7 +625,8 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2
// keep these alive. Retry through the markdown pre-pass when the direct
// HTML path produces nothing.
if directHTML && strings.TrimSpace(result) == "" {
- result, placements, err = renderHTMLToText(markdownToHTML([]byte(decodedBody)), inline, h1Style, h2Style, disableImages)
+ fallbackHTML := htmlSanitizer.SanitizeBytes(markdownToHTML([]byte(decodedBody)))
+ result, placements, err = renderHTMLToText(fallbackHTML, inline, h1Style, h2Style, disableImages)
if err != nil {
return "", nil, err
}
@@ -643,11 +668,18 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
text.WriteString("\n\n")
case clib.HElemLink:
- text.WriteString(hyperlink(elem.Attr1, elem.Text))
+ if hasTerminalControls(elem.Attr1) {
+ text.WriteString(stripTerminalControls(elem.Text))
+ } else {
+ text.WriteString(hyperlink(elem.Attr1, elem.Text))
+ }
case clib.HElemImage:
- src := elem.Attr1
- alt := elem.Attr2
+ src := strings.TrimSpace(elem.Attr1)
+ alt := stripTerminalControls(elem.Attr2)
+ if hasTerminalControls(src) {
+ continue
+ }
if !disableImages && imageProtocolSupported() {
var payload string
diff --git a/view/html_test.go b/view/html_test.go
index e916bc04..b9fc2aff 100644
--- a/view/html_test.go
+++ b/view/html_test.go
@@ -670,6 +670,106 @@ func TestProcessBodyWithHyperlinkSupport(t *testing.T) {
}
}
+func TestProcessBodySanitizesUnsafeHTMLLinks(t *testing.T) {
+ origTerm := os.Getenv("TERM")
+ origTermProgram := os.Getenv("TERM_PROGRAM")
+ origVTEVersion := os.Getenv("VTE_VERSION")
+ defer func() {
+ os.Setenv("TERM", origTerm)
+ os.Setenv("TERM_PROGRAM", origTermProgram)
+ os.Setenv("VTE_VERSION", origVTEVersion)
+ }()
+
+ os.Setenv("TERM", "xterm-kitty")
+ os.Setenv("TERM_PROGRAM", "")
+ os.Unsetenv("VTE_VERSION")
+
+ h1Style := lipgloss.NewStyle()
+ h2Style := lipgloss.NewStyle()
+ bodyStyle := lipgloss.NewStyle()
+
+ tests := []struct {
+ name string
+ input string
+ wantContains string
+ forbiddenContains []string
+ }{
+ {
+ name: "javascript link is rendered as text only",
+ input: `Click here`,
+ wantContains: "Click here",
+ forbiddenContains: []string{
+ "javascript:",
+ "\x1b]8;;javascript:",
+ },
+ },
+ {
+ name: "mixed-case javascript link is rejected",
+ input: `Click here`,
+ wantContains: "Click here",
+ forbiddenContains: []string{
+ "JaVaScRiPt:",
+ "javascript:",
+ },
+ },
+ {
+ name: "unsafe image source is not linked",
+ input: `
After`,
+ wantContains: "After",
+ forbiddenContains: []string{
+ "javascript:",
+ "bad image",
+ "Click here to view image",
+ },
+ },
+ {
+ name: "data image href is not rendered as a link",
+ input: `data link`,
+ wantContains: "data link",
+ forbiddenContains: []string{
+ "data:image",
+ "\x1b]8;;data:",
+ },
+ },
+ {
+ name: "cid href is not rendered as a link",
+ input: `cid link`,
+ wantContains: "cid link",
+ forbiddenContains: []string{
+ "cid:test-image",
+ "\x1b]8;;cid:",
+ },
+ },
+ {
+ name: "OSC control characters are stripped from safe links",
+ input: "safe",
+ wantContains: "safe",
+ forbiddenContains: []string{
+ "\x1b]8;;file:",
+ "file:///tmp/pwn",
+ "\x07",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ processed, _, err := ProcessBody(tt.input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, false)
+ if err != nil {
+ t.Fatalf("ProcessBody() failed: %v", err)
+ }
+ if !strings.Contains(processed, tt.wantContains) {
+ t.Fatalf("processed body does not contain %q:\n%q", tt.wantContains, processed)
+ }
+ for _, forbidden := range tt.forbiddenContains {
+ if strings.Contains(processed, forbidden) {
+ t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed)
+ }
+ }
+ })
+ }
+}
+
func TestProcessBodyWithImageProtocol(t *testing.T) {
// Save original environment variables
origTerm := os.Getenv("TERM")
From 5964a34d96c3e09f5dabf95de8b7da56cab112ee Mon Sep 17 00:00:00 2001
From: FromSi
Date: Mon, 25 May 2026 17:34:32 +0500
Subject: [PATCH 2/3] fix(view): sanitize HTML links
---
view/html.go | 6 +++++-
view/html_test.go | 39 +++++++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/view/html.go b/view/html.go
index 40f1b633..e047ff80 100644
--- a/view/html.go
+++ b/view/html.go
@@ -717,7 +717,7 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
}
debugImageProtocol("no payload for src=%s", src)
}
- if hyperlinkSupported() {
+ if isRemoteImageURL(src) && hyperlinkSupported() {
fmt.Fprintf(&text, "\n %s \n", hyperlink(src, fmt.Sprintf("[Click here to view image: %s]", alt)))
} else {
fmt.Fprintf(&text, "\n %s \n", linkStyle().Render(fmt.Sprintf("[Image: %s, %s]", alt, src)))
@@ -787,6 +787,10 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
return result, placements, nil
}
+func isRemoteImageURL(src string) bool {
+ return strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://")
+}
+
func tableHeaderStyle() lipgloss.Style {
return lipgloss.NewStyle().Bold(true).Foreground(theme.ActiveTheme.Accent)
}
diff --git a/view/html_test.go b/view/html_test.go
index b9fc2aff..15ea03fe 100644
--- a/view/html_test.go
+++ b/view/html_test.go
@@ -770,6 +770,45 @@ func TestProcessBodySanitizesUnsafeHTMLLinks(t *testing.T) {
}
}
+func TestProcessBodyDoesNotHyperlinkNonRemoteImageFallbacks(t *testing.T) {
+ t.Setenv("TERM", "xterm")
+ t.Setenv("TERM_PROGRAM", "")
+ t.Setenv("WEZTERM_EXECUTABLE", "/usr/bin/wezterm")
+
+ h1Style := lipgloss.NewStyle()
+ h2Style := lipgloss.NewStyle()
+ bodyStyle := lipgloss.NewStyle()
+
+ input := `
+
+
+ `
+
+ processed, _, err := ProcessBody(input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, true)
+ if err != nil {
+ t.Fatalf("ProcessBody() failed: %v", err)
+ }
+
+ for _, want := range []string{
+ "[Image: data image, data:image/png;base64,iVBORw0KGgo=]",
+ "[Image: cid image, cid:test-image@example.com]",
+ } {
+ if !strings.Contains(processed, want) {
+ t.Fatalf("processed body does not contain %q:\n%q", want, processed)
+ }
+ }
+
+ for _, forbidden := range []string{
+ "Click here to view image",
+ "\x1b]8;;data:",
+ "\x1b]8;;cid:",
+ } {
+ if strings.Contains(processed, forbidden) {
+ t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed)
+ }
+ }
+}
+
func TestProcessBodyWithImageProtocol(t *testing.T) {
// Save original environment variables
origTerm := os.Getenv("TERM")
From 23b10eec5b3cb52d72be8dcfa78b61ab9443278e Mon Sep 17 00:00:00 2001
From: FromSi
Date: Tue, 26 May 2026 18:24:07 +0500
Subject: [PATCH 3/3] fix(view): sanitize HTML links
---
internal/htmlsanitizer/lib_sanitizer.go | 6 +++++-
internal/htmlsanitizer/lib_sanitizer_test.go | 4 ++++
view/html.go | 1 +
view/html_test.go | 18 ++++++++++++++++++
4 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/internal/htmlsanitizer/lib_sanitizer.go b/internal/htmlsanitizer/lib_sanitizer.go
index fedebc2c..7405c7bd 100644
--- a/internal/htmlsanitizer/lib_sanitizer.go
+++ b/internal/htmlsanitizer/lib_sanitizer.go
@@ -47,7 +47,11 @@ func newPolicy() *bluemonday.Policy {
if prefix == "" {
return false
}
- _, err := base64.StdEncoding.DecodeString(u.Opaque[len(prefix):])
+ payload := u.Opaque[len(prefix):]
+ if _, err := base64.StdEncoding.DecodeString(payload); err == nil {
+ return true
+ }
+ _, err := base64.RawStdEncoding.DecodeString(payload)
return err == nil
})
return p
diff --git a/internal/htmlsanitizer/lib_sanitizer_test.go b/internal/htmlsanitizer/lib_sanitizer_test.go
index 1081170d..fefc984f 100644
--- a/internal/htmlsanitizer/lib_sanitizer_test.go
+++ b/internal/htmlsanitizer/lib_sanitizer_test.go
@@ -254,6 +254,7 @@ func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) {
+
`)
got := string(sanitizer.SanitizeBytes(input))
@@ -270,4 +271,7 @@ func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) {
if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) {
t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got)
}
+ if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo"`) {
+ t.Fatalf("sanitized HTML should keep valid unpadded png data URI:\n%s", got)
+ }
}
diff --git a/view/html.go b/view/html.go
index e047ff80..4f2578f0 100644
--- a/view/html.go
+++ b/view/html.go
@@ -788,6 +788,7 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
}
func isRemoteImageURL(src string) bool {
+ src = strings.ToLower(src)
return strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://")
}
diff --git a/view/html_test.go b/view/html_test.go
index 15ea03fe..b75e2d76 100644
--- a/view/html_test.go
+++ b/view/html_test.go
@@ -809,6 +809,24 @@ func TestProcessBodyDoesNotHyperlinkNonRemoteImageFallbacks(t *testing.T) {
}
}
+func TestIsRemoteImageURLAllowsUppercaseHTTPSScheme(t *testing.T) {
+ tests := []struct {
+ src string
+ want bool
+ }{
+ {src: "http://example.com/image.png", want: true},
+ {src: "HTTPS://example.com/image.png", want: true},
+ {src: "cid:test-image@example.com", want: false},
+ {src: "data:image/png;base64,iVBORw0KGgo=", want: false},
+ }
+
+ for _, tt := range tests {
+ if got := isRemoteImageURL(tt.src); got != tt.want {
+ t.Fatalf("isRemoteImageURL(%q) = %v, want %v", tt.src, got, tt.want)
+ }
+ }
+}
+
func TestProcessBodyWithImageProtocol(t *testing.T) {
// Save original environment variables
origTerm := os.Getenv("TERM")