diff --git a/go.mod b/go.mod index da748337..9748f570 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/knadh/go-pop3 v1.0.2 github.com/mattn/go-sixel v0.0.9 + github.com/microcosm-cc/bluemonday v1.0.27 github.com/wagslane/go-password-validator v0.3.0 github.com/yuin/goldmark v1.8.2 github.com/yuin/gopher-lua v1.1.2 @@ -37,6 +38,7 @@ require ( require ( github.com/andybalholm/cascadia v1.3.3 // indirect github.com/atotto/clipboard v0.1.4 // indirect + github.com/aymerick/douceur v0.2.0 // indirect github.com/charmbracelet/colorprofile v0.4.3 // indirect github.com/charmbracelet/ultraviolet v0.0.0-20260416155717-489999b90468 // indirect github.com/charmbracelet/x/term v0.2.2 // indirect @@ -47,6 +49,7 @@ require ( github.com/cloudflare/circl v1.6.3 // indirect github.com/danieljoos/wincred v1.2.3 // indirect github.com/godbus/dbus/v5 v5.2.2 // indirect + github.com/gorilla/css v1.0.1 // indirect github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-runewidth v0.0.23 // indirect github.com/muesli/cancelreader v0.2.2 // indirect diff --git a/go.sum b/go.sum index 7e9af686..4294b561 100644 --- a/go.sum +++ b/go.sum @@ -25,6 +25,8 @@ github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o= github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w= +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q= github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q= @@ -72,6 +74,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= +github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/knadh/go-pop3 v1.0.2 h1:gbdtwzEYedLVos/vpebM2d73NTyZxEgjgRJ4S77HlzM= @@ -84,6 +88,8 @@ github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3Ry github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mattn/go-sixel v0.0.9 h1:ncx/rVU35Ut7/6gpVk4deC4/Wp2js9fDKmFmWnzmGoY= github.com/mattn/go-sixel v0.0.9/go.mod h1:mfichvavqIDFW14LGU24ux/UZ/wF0/hG+4pUWOWrQgM= +github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= +github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/htmlsanitizer/lib_sanitizer.go b/internal/htmlsanitizer/lib_sanitizer.go new file mode 100644 index 00000000..fedebc2c --- /dev/null +++ b/internal/htmlsanitizer/lib_sanitizer.go @@ -0,0 +1,54 @@ +package htmlsanitizer + +import ( + "encoding/base64" + "net/url" + "regexp" + + "github.com/microcosm-cc/bluemonday" +) + +type LibSanitizer struct { + policy *bluemonday.Policy +} + +func NewLibSanitizer() LibSanitizer { + return LibSanitizer{policy: newPolicy()} +} + +func (s LibSanitizer) SanitizeBytes(html []byte) []byte { + return s.policy.SanitizeBytes(html) +} + +func newPolicy() *bluemonday.Policy { + p := bluemonday.NewPolicy() + linkURLPattern := regexp.MustCompile(`(?i)^(https?://|mailto:|tel:)`) + imageURLPattern := regexp.MustCompile(`(?i)^(https?://|cid:|data:image/)`) + dataImagePrefixPattern := regexp.MustCompile(`(?i)^image/(gif|jpe?g|png|webp);base64,`) + p.AllowElements( + "a", "b", "blockquote", "br", "code", "div", "em", "h1", "h2", + "i", "img", "li", "ol", "p", "pre", "span", "strong", "table", + "tbody", "td", "th", "thead", "tr", "u", "ul", + ) + p.AllowAttrs("href").Matching(linkURLPattern).OnElements("a") + p.AllowAttrs("src").Matching(imageURLPattern).OnElements("img") + p.AllowAttrs("alt").OnElements("img") + p.AllowAttrs("cite").OnElements("blockquote") + p.RequireParseableURLs(true) + p.AllowURLSchemes("http", "https", "mailto", "tel") + p.AllowURLSchemeWithCustomPolicy("cid", func(u *url.URL) bool { + return u.Opaque != "" && u.RawQuery == "" && u.Fragment == "" + }) + p.AllowURLSchemeWithCustomPolicy("data", func(u *url.URL) bool { + if u.RawQuery != "" || u.Fragment != "" { + return false + } + prefix := dataImagePrefixPattern.FindString(u.Opaque) + if prefix == "" { + return false + } + _, err := base64.StdEncoding.DecodeString(u.Opaque[len(prefix):]) + return err == nil + }) + return p +} diff --git a/internal/htmlsanitizer/lib_sanitizer_test.go b/internal/htmlsanitizer/lib_sanitizer_test.go new file mode 100644 index 00000000..1081170d --- /dev/null +++ b/internal/htmlsanitizer/lib_sanitizer_test.go @@ -0,0 +1,273 @@ +package htmlsanitizer + +import ( + "strings" + "testing" +) + +func TestLibSanitizerRemovesUnsafeHTML(t *testing.T) { + sanitizer := NewLibSanitizer() + input := []byte(` +
Hello
+ + + bad link + good link +
+
+ `)
+
+ got := string(sanitizer.SanitizeBytes(input))
+
+ for _, want := range []string{
+ `src="http://example.com/image.png"`,
+ `src="https://example.com/image.png"`,
+ `src="cid:test@example.com"`,
+ `src="data:image/png;base64,iVBORw0KGgo="`,
+ } {
+ if !strings.Contains(got, want) {
+ t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+ }
+ }
+
+ for _, forbidden := range []string{
+ "src=\"javascript:",
+ "src=\"file:",
+ "src=\"data:text/html",
+ "src=\"/relative.png",
+ } {
+ if strings.Contains(got, forbidden) {
+ t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+ }
+ }
+}
+
+func TestLibSanitizerRemovesUnknownElementsButKeepsText(t *testing.T) {
+ sanitizer := NewLibSanitizer()
+ input := []byte(`
+
+
+
+ safe text
+ `) + + got := string(sanitizer.SanitizeBytes(input)) + + for _, forbidden := range []string{ + "