diff --git a/go.mod b/go.mod index 8cf8cf5..fea5801 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/bartventer/httpcache -go 1.24 +go 1.25 diff --git a/store/acceptance/acceptance.go b/store/acceptance/acceptance.go index 21a4acf..2c2fdfc 100644 --- a/store/acceptance/acceptance.go +++ b/store/acceptance/acceptance.go @@ -18,6 +18,7 @@ package acceptance import ( "bytes" "slices" + "strings" "testing" "github.com/bartventer/httpcache/internal/testutil" @@ -126,7 +127,11 @@ func testKeys(t *testing.T, factory FactoryFunc) { if !ok { t.Skip("Cache implementation does not support key listing") } - keys := []string{"foo", "bar", "baz"} + keys := []string{ + "foo", + "bar", + "baz" + strings.Repeat("x", 255), // ensure long key handling + } for _, key := range keys { value := []byte("value for " + key) testutil.RequireNoError(t, cache.Set(key, value), "Set failed for key "+key) diff --git a/store/fscache/filenamer.go b/store/fscache/filenamer.go new file mode 100644 index 0000000..0c7b577 --- /dev/null +++ b/store/fscache/filenamer.go @@ -0,0 +1,90 @@ +// Copyright (c) 2025 Bart Venter +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fscache + +import ( + "encoding/base64" + "path/filepath" + "strings" +) + +type ( + fileNamer interface{ FileName(key string) string } + fileNameKeyer interface { + KeyFromFileName(name string) (string, error) + } +) + +type ( + fileNamerFunc func(key string) string + fileNameKeyerFunc func(name string) (string, error) +) + +func (f fileNamerFunc) FileName(key string) string { return f(key) } +func (f fileNameKeyerFunc) KeyFromFileName(name string) (string, error) { return f(name) } + +// fragmentSize is the maximum filename length per directory level. +// 48 is chosen so that 5 fragments fit within 240 chars, well under common filesystem limits. +const fragmentSize = 48 + +// fragmentingFileNamer returns a fileNamer that fragments long keys into directory structures. +// This helps avoid filesystem limits on filename lengths. +func fragmentingFileNamer() fileNamer { + return fileNamerFunc(fragmentFileName) +} + +func fragmentFileName(key string) string { + encoded := base64.RawURLEncoding.EncodeToString([]byte(key)) + if len(encoded) <= 255 { // Common filesystem filename limit + return encoded + } + + // Fragment the encoded string + var parts []string + for i := 0; i < len(encoded); i += fragmentSize { + end := min(i+fragmentSize, len(encoded)) + parts = append(parts, encoded[i:end]) + } + return filepath.Join(parts...) +} + +func fragmentingFileNameKeyer() fileNameKeyer { + return fileNameKeyerFunc(fragmentedFileNameToKey) +} + +var filepathSeparatorReplacer = strings.NewReplacer( + string(filepath.Separator), + "", +) + +func fragmentedFileNameToKey(name string) (string, error) { + // Check if the name contains path separators (i.e., is fragmented) + if strings.ContainsRune(name, filepath.Separator) { + // Handle fragmented path + base64Str := filepathSeparatorReplacer.Replace(name) + decoded, err := base64.RawURLEncoding.DecodeString(base64Str) + if err != nil { + return "", err + } + return string(decoded), nil + } + + // Handle plain base64 + decoded, err := base64.RawURLEncoding.DecodeString(name) + if err != nil { + return "", err + } + return string(decoded), nil +} diff --git a/store/fscache/filenamer_test.go b/store/fscache/filenamer_test.go new file mode 100644 index 0000000..6168b20 --- /dev/null +++ b/store/fscache/filenamer_test.go @@ -0,0 +1,109 @@ +// Copyright (c) 2025 Bart Venter +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fscache + +import ( + "encoding/base64" + "fmt" + "path/filepath" + "strings" + "testing" + + "github.com/bartventer/httpcache/internal/testutil" +) + +func Example_fragmentFileName_short() { + url := "https://short.url/test" + path := fragmentFileName(url) + fmt.Println("Fragmented path:", path) + // Output: + // Fragmented path: aHR0cHM6Ly9zaG9ydC51cmwvdGVzdA +} + +func Example_fragmentFileName_long() { + url := "https://example.com/" + strings.Repeat("a", 255) + path := fragmentFileName(url) + fmt.Println("Fragmented path:", path) + // Output: + // Fragmented path: aHR0cHM6Ly9leGFtcGxlLmNvbS9hYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh/YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWE +} + +func Test_fragmentFileName_fragmentedFileNameToKey(t *testing.T) { + cases := []struct { + name string + url string + assertion func(tt *testing.T, encoded, decoded string) + }{ + { + name: "Empty string", + url: "", + }, + { + name: "Short ASCII URL", + url: "https://example.com/test?foo=bar", + }, + { + name: "Long ASCII URL", + url: "https://example.com/" + strings.Repeat("a", 1000), + assertion: func(tt *testing.T, encoded string, _ string) { + for frag := range strings.SplitSeq(encoded, string(filepath.Separator)) { + testutil.AssertTrue( + tt, + len(frag) <= fragmentSize, + "Fragment too long: got %d, want <= %d", + len(frag), + fragmentSize, + ) + } + }, + }, + { + name: "Unicode URL", + url: "https://例子.ζ΅‹θ―•?emoji=πŸš€", + }, + { + name: "URL with separators", + url: "https://foo/bar/baz?x=y/z", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + encoded := fragmentFileName(tc.url) + // Roundtrip test + decoded, err := fragmentedFileNameToKey(encoded) + testutil.RequireNoError(t, err) + testutil.AssertEqual(t, tc.url, decoded, "Roundtrip failed") + if tc.assertion != nil { + tc.assertion(t, encoded, decoded) + } + }) + } +} + +func Test_fragmentedFileNameToKey_InvalidBase64(t *testing.T) { + invalidPaths := []string{ + "!!!notbase64", + "this/is/not/valid/base64/===", + "foo/bar/baz", + } + for _, path := range invalidPaths { + t.Run(path, func(t *testing.T) { + _, err := fragmentedFileNameToKey(path) + var cie base64.CorruptInputError + testutil.RequireErrorAs(t, err, &cie) + }) + } +} diff --git a/store/fscache/fscache.go b/store/fscache/fscache.go index 079aa43..8dc86ba 100644 --- a/store/fscache/fscache.go +++ b/store/fscache/fscache.go @@ -49,16 +49,17 @@ import ( "cmp" "context" "crypto/rand" - "encoding/base64" + "io/fs" + "strings" + "errors" "fmt" "io" - "io/fs" + "net/url" "os" "path/filepath" "slices" - "strings" "time" "github.com/bartventer/httpcache/store" @@ -110,7 +111,7 @@ type fsCache struct { // internal components fn fileNamer // generates file names from keys - fnk fileNameKeyer // extracts keys from file names + fnk fileNameKeyer // recovers keys from file names dw dirWalker // used for directory walking } @@ -251,39 +252,22 @@ func (c *fsCache) initialize(appname string) error { if err != nil { return fmt.Errorf("fscache: could not open cache directory %q: %w", c.base, err) } - c.fn = fileNamerFunc(safeFileName) - c.fnk = fileNameKeyerFunc(keyFromFileName) + c.fn = fragmentingFileNamer() + c.fnk = fragmentingFileNameKeyer() c.dw = dirWalkerFunc(filepath.WalkDir) c.timeout = cmp.Or(c.timeout, defaultTimeout) return nil } -type ( - fileNamer interface{ FileName(key string) string } - fileNameKeyer interface{ KeyFromFileName(name string) string } - dirWalker interface { - WalkDir(root string, fn fs.WalkDirFunc) error - } -) +type dirWalker interface { + WalkDir(root string, fn fs.WalkDirFunc) error +} -type ( - fileNamerFunc func(key string) string - fileNameKeyerFunc func(name string) string - dirWalkerFunc func(root string, fn fs.WalkDirFunc) error -) +type dirWalkerFunc func(root string, fn fs.WalkDirFunc) error -func (f fileNamerFunc) FileName(key string) string { return f(key) } -func (f fileNameKeyerFunc) KeyFromFileName(name string) string { return f(name) } func (f dirWalkerFunc) WalkDir(root string, fn fs.WalkDirFunc) error { return f(root, fn) } -func safeFileName(key string) string { return base64.RawURLEncoding.EncodeToString([]byte(key)) } - -func keyFromFileName(name string) string { - data, _ := base64.RawURLEncoding.DecodeString(name) - return string(data) -} - var _ driver.Conn = (*fsCache)(nil) var _ expapi.KeyLister = (*fsCache)(nil) @@ -366,7 +350,11 @@ func (c *fsCache) set(key string, entry []byte) error { return err } } - f, err := c.root.Create(c.fn.FileName(key)) + name := c.fn.FileName(key) + if err := c.root.MkdirAll(filepath.Dir(name), 0o755); err != nil { + return err + } + f, err := c.root.Create(name) if err != nil { return err } @@ -448,7 +436,13 @@ func (c *fsCache) keys(prefix string) ([]string, error) { if d.IsDir() { return nil } - if key := c.fnk.KeyFromFileName(filepath.Base(path)); strings.HasPrefix(key, prefix) { + key, err := c.fnk.KeyFromFileName( + strings.TrimPrefix(path, dirname+string(os.PathSeparator)), + ) + if err != nil { + return err + } + if strings.HasPrefix(key, prefix) { keys = append(keys, key) } return nil diff --git a/store/fscache/fscache_test.go b/store/fscache/fscache_test.go index 88068a6..f9ecd15 100644 --- a/store/fscache/fscache_test.go +++ b/store/fscache/fscache_test.go @@ -71,8 +71,8 @@ func Test_fsCache_KeysError(t *testing.T) { cache.fn = fileNamerFunc(func(key string) string { return key }) - cache.fnk = fileNameKeyerFunc(func(name string) string { - return name + cache.fnk = fileNameKeyerFunc(func(name string) (string, error) { + return name, nil }) cache.dw = dirWalkerFunc(func(root string, fn fs.WalkDirFunc) error { return testutil.ErrSample