diff --git a/component/file_cache/file_cache.go b/component/file_cache/file_cache.go index 99ca6580c..ae5e886e0 100644 --- a/component/file_cache/file_cache.go +++ b/component/file_cache/file_cache.go @@ -2141,17 +2141,18 @@ func (fc *FileCache) GetAttr(options internal.GetAttrOptions) (*internal.ObjAttr // Don't log these by default, as it noticeably affects performance // log.Trace("FileCache::GetAttr : %s", options.Name) - // For get attr, there are three different path situations we have to potentially handle. - // 1. Path in cloud storage but not in local cache - // 2. Path not in cloud storage but in local cache (this could happen if we recently created the file [and are currently writing to it]) (also supports immutable containers) - // 3. Path in cloud storage and in local cache (this could result in dirty properties on the service if we recently wrote to the file) + // For get attr, there are four different file state situations we have to potentially handle. + // 1. File exists in cloud storage but not in local cache (this could happen if we recently created the file [and are currently writing to it]) (also supports immutable containers) + // 2. File exists in local cache but not in cloud storage (e.g., newly created and not uploaded yet; also supports immutable containers): serve local attributes. + // 3. File exists in both cloud storage and local cache: start from cloud attributes, but override size/mtime from local cache. + // 4. File is open in local cache and dirty (local cache is the source of truth) // If the file is being downloaded or deleted, the size and mod time will be incorrect // wait for download or deletion to complete before getting local file info flock := fc.fileLocks.Get(options.Name) flock.RLock() - // Path in local cache, open, and dirty so cache is the source of truth for attributes. + // Case 4: File in local cache, open, and dirty so cache is the source of truth for attributes. localPath := filepath.Join(fc.tmpPath, options.Name) info, localErr := os.Stat(localPath) if localErr != nil && !isNotExist(localErr) { @@ -2403,35 +2404,27 @@ func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { f := options.Handle.GetFileObject() if f == nil { - // Recover missing fd on valid handles by reopening the cached path. - // This avoids issues when truncate arrives before the handle has a live file object. + // File object is missing, try to recover by reopening the file localPath := filepath.Join(fc.tmpPath, options.Name) - reopened, reopenErr := common.OpenFile(localPath, os.O_RDWR, fc.defaultPermission) - if reopenErr == nil { - options.Handle.UnixFD = uint64(reopened.Fd()) - options.Handle.SetFileObject(reopened) - f = reopened + newFile, err := common.OpenFile(localPath, os.O_RDWR, 0666) + if err != nil { + log.Err( + "FileCache::TruncateFile : error recovering file object for %s [%v]", + options.Handle.Path, + err, + ) + return err } + options.Handle.SetFileObject(newFile) + options.Handle.UnixFD = uint64(newFile.Fd()) + f = newFile } - if f == nil { - log.Warn( - "FileCache::TruncateFile : missing fd in handle, fallback to path truncate %s", - options.Name, - ) - options.Handle = nil - } - } - - if options.Handle != nil { - f := options.Handle.GetFileObject() - if f == nil { - return syscall.EBADF - } - + // Get current file size before truncating + currentSize := int64(0) info, err := f.Stat() - if err == nil && info.Size() == options.NewSize { - return nil + if err == nil { + currentSize = info.Size() } err = f.Truncate(options.NewSize) @@ -2444,7 +2437,10 @@ func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { return err } - fc.setHandleDirty(options.Handle) + // Only mark handle dirty if the size actually changed + if currentSize != options.NewSize { + fc.setHandleDirty(options.Handle) + } return nil } diff --git a/component/s3storage/client.go b/component/s3storage/client.go index 90ef220a4..2ee722cb6 100644 --- a/component/s3storage/client.go +++ b/component/s3storage/client.go @@ -490,7 +490,15 @@ func (cl *Client) RenameFile( ) error { log.Trace("Client::RenameFile : %s -> %s", source, target) - err := cl.renameObject( + isDir, err := cl.directoryExistsForTarget(ctx, target) + if err != nil { + return err + } + if isDir { + return syscall.EISDIR + } + + err = cl.renameObject( ctx, renameObjectOptions{source: source, target: target, isSymLink: isSymLink}, ) @@ -506,6 +514,22 @@ func (cl *Client) RenameFile( return err } +func (cl *Client) directoryExistsForTarget(ctx context.Context, name string) (bool, error) { + dirName := internal.ExtendDirName(name) + if !shouldProbeDirMarker(dirName, false, cl.Config.skipDirProbeOnFileExt) { + return false, nil + } + + _, err := cl.getDirectoryAttr(ctx, dirName, false) + if err == nil { + return true, nil + } + if err == syscall.ENOENT { + return false, nil + } + return false, err +} + // RenameDirectory : Rename the directory func (cl *Client) RenameDirectory(ctx context.Context, source string, target string) error { log.Trace("Client::RenameDirectory : %s -> %s", source, target) @@ -593,7 +617,7 @@ func (cl *Client) GetAttr(ctx context.Context, name string) (*internal.ObjAttr, } if err != syscall.ENOENT { log.Err("Client::GetAttr : Failed to getFileAttr(%s). Here's why: %v", name, err) - } else if !shouldProbeDirMarker(dirName, explicitDirLookup) { + } else if !shouldProbeDirMarker(dirName, explicitDirLookup, cl.Config.skipDirProbeOnFileExt) { // For obvious file-like names, skip expensive directory probing on miss-heavy paths. return nil, syscall.ENOENT } @@ -624,9 +648,28 @@ func (cl *Client) getDirectoryAttr( ) (*internal.ObjAttr, error) { log.Trace("Client::getDirectoryAttr : name %s", dirName) - // When directory markers are enabled, check for the marker first via - // HeadObject (cheap, single-key lookup) before falling back to a List. - if cl.Config.enableDirMarker && shouldProbeDirMarker(dirName, explicitDirLookup) { + objects, _, listErr := cl.List(ctx, dirName, nil, 1) + + // Otherwise, the cloud does not support directory markers, or there is no + // marker, so look for an object in the directory. + if listErr != nil { + log.Err("Client::getDirectoryAttr : List(%s) failed. Here's why: %v", dirName, listErr) + return nil, listErr + } + if len(objects) > 0 { + // create and return an objAttr for the directory + attr := internal.CreateObjAttrDir(dirName) + return attr, nil + } + + // Only check for explicit empty directory markers when needed. + // For file-like names, this saves one extra HeadObject + // call on miss-heavy paths that are not directories. + if cl.Config.enableDirMarker && shouldProbeDirMarker( + dirName, + explicitDirLookup, + cl.Config.skipDirProbeOnFileExt, + ) { headAttr, headErr := cl.headObject(ctx, dirName, false, true) if headErr == nil { return headAttr, nil @@ -644,7 +687,7 @@ func (cl *Client) getDirectoryAttr( // Either directory markers are disabled, there is no marker, or the name // was skipped by shouldProbeDirMarker. Fall back to listing objects under // the prefix to detect a non-empty directory. - objects, _, listErr := cl.List(ctx, dirName, nil, 1) + objects, _, listErr = cl.List(ctx, dirName, nil, 1) if listErr != nil { log.Err("Client::getDirectoryAttr : List(%s) failed. Here's why: %v", dirName, listErr) return nil, listErr @@ -660,38 +703,30 @@ func (cl *Client) getDirectoryAttr( return nil, syscall.ENOENT } -var knownFileLikeExtensions = map[string]struct{}{ - ".tmp": {}, - ".ini": {}, - ".inf": {}, - ".db": {}, - ".guid": {}, - ".nxdb": {}, - ".mkv": {}, - ".mp4": {}, - ".avi": {}, - ".mov": {}, - ".txt": {}, - ".doc": {}, - ".docx": {}, - ".xls": {}, - ".xlsx": {}, - ".ppt": {}, - ".pptx": {}, -} - -func shouldProbeDirMarker(dirName string, explicitDirLookup bool) bool { - if explicitDirLookup { +func shouldProbeDirMarker( + dirName string, + explicitDirLookup bool, + skipDirProbeOnFileExt bool, +) bool { + if explicitDirLookup || !skipDirProbeOnFileExt { return true } trimmed := internal.TruncateDirName(dirName) - base := strings.ToLower(path.Base(trimmed)) - ext := strings.ToLower(path.Ext(base)) - if ext == "" { - return true + base := path.Base(trimmed) + return !hasShortAlphaNumExt(base) +} + +func hasShortAlphaNumExt(base string) bool { + ext := path.Ext(base) + if len(ext) < 2 || len(ext) > 5 { + return false + } + for _, r := range ext[1:] { + if (r < '0' || r > '9') && (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') { + return false + } } - _, isFileLike := knownFileLikeExtensions[ext] - return !isFileLike + return true } // Download object data to a file handle. diff --git a/component/s3storage/client_test.go b/component/s3storage/client_test.go index cea12a1be..9ddfae20a 100644 --- a/component/s3storage/client_test.go +++ b/component/s3storage/client_test.go @@ -890,6 +890,47 @@ func (s *clientTestSuite) TestRenameFileError() { }) s.assert.Error(err) } + +func (s *clientTestSuite) TestRenameFileDstDir() { + defer s.cleanupTest() + // Setup + + src := generateFileName() + _, err := s.awsS3Client.PutObject(context.Background(), &s3.PutObjectInput{ + Bucket: aws.String(s.client.Config.AuthConfig.BucketName), + Key: aws.String(src), + ChecksumAlgorithm: s.client.Config.checksumAlgorithm, + }) + s.assert.NoError(err) + + dstDir := generateDirectoryName() + child := generateFileName() + _, err = s.awsS3Client.PutObject(context.Background(), &s3.PutObjectInput{ + Bucket: aws.String(s.client.Config.AuthConfig.BucketName), + Key: aws.String(path.Join(dstDir, child)), + ChecksumAlgorithm: s.client.Config.checksumAlgorithm, + }) + s.assert.NoError(err) + + err = s.client.RenameFile(context.Background(), src, dstDir, false) + s.assert.EqualValues(syscall.EISDIR, err) + + // Src should still be in the account + _, err = s.awsS3Client.GetObject(context.Background(), &s3.GetObjectInput{ + Bucket: aws.String(s.client.Config.AuthConfig.BucketName), + Key: aws.String(src), + ChecksumMode: types.ChecksumModeEnabled, + }) + s.assert.NoError(err) + + // Dst file should not be created + _, err = s.awsS3Client.GetObject(context.Background(), &s3.GetObjectInput{ + Bucket: aws.String(s.client.Config.AuthConfig.BucketName), + Key: aws.String(dstDir), + ChecksumMode: types.ChecksumModeEnabled, + }) + s.assert.Error(err) +} func (s *clientTestSuite) TestRenameDirectory() { defer s.cleanupTest() // setup @@ -1059,7 +1100,7 @@ func (s *clientTestSuite) TestShouldProbeDirMarker() { } for _, tc := range cases { - got := shouldProbeDirMarker(tc.dirName, tc.explicitDirLookup) + got := shouldProbeDirMarker(tc.dirName, tc.explicitDirLookup, true) s.assert.Equal(tc.want, got, tc.name) } } diff --git a/component/s3storage/config.go b/component/s3storage/config.go index 1d1bfd515..cdd2e3734 100644 --- a/component/s3storage/config.go +++ b/component/s3storage/config.go @@ -56,6 +56,7 @@ type Options struct { UsePathStyle bool `config:"use-path-style" yaml:"use-path-style,omitempty"` DisableUsage bool `config:"disable-usage" yaml:"disable-usage,omitempty"` EnableDirMarker bool `config:"enable-dir-marker" yaml:"enable-dir-marker,omitempty"` + SkipDirProbeOnFileExt bool `config:"skip-dir-probe-on-file-ext" yaml:"skip-dir-probe-on-file-ext,omitempty"` HealthCheckIntervalSec int `config:"health-check-interval-sec" yaml:"health-check-interval-sec,omitempty"` } @@ -92,6 +93,11 @@ func ParseAndValidateConfig(s3 *S3Storage, opt Options, secrets ConfigSecrets) e s3.stConfig.usePathStyle = opt.UsePathStyle s3.stConfig.disableUsage = opt.DisableUsage s3.stConfig.enableDirMarker = opt.EnableDirMarker + if config.IsSet("s3storage.skip-dir-probe-on-file-ext") { + s3.stConfig.skipDirProbeOnFileExt = opt.SkipDirProbeOnFileExt + } else { + s3.stConfig.skipDirProbeOnFileExt = true + } // Part size must be at least 5 MB and smaller than 5GB. Otherwise, set to default. if opt.PartSizeMb < 5 || opt.PartSizeMb > MaxPartSizeMb { @@ -216,6 +222,9 @@ func ParseAndReadDynamicConfig(s3 *S3Storage, opt Options, reload bool) error { } } s3.stConfig.disableSymlink = !enableSymlinks + if config.IsSet("s3storage.skip-dir-probe-on-file-ext") { + s3.stConfig.skipDirProbeOnFileExt = opt.SkipDirProbeOnFileExt + } return nil } diff --git a/component/s3storage/connection.go b/component/s3storage/connection.go index 3275f0967..c6efc4d4f 100644 --- a/component/s3storage/connection.go +++ b/component/s3storage/connection.go @@ -57,6 +57,7 @@ type Config struct { disableSymlink bool disableUsage bool enableDirMarker bool + skipDirProbeOnFileExt bool healthCheckInterval time.Duration } diff --git a/component/s3storage/s3storage.go b/component/s3storage/s3storage.go index cea78eb30..93241db7f 100644 --- a/component/s3storage/s3storage.go +++ b/component/s3storage/s3storage.go @@ -379,7 +379,7 @@ func (s3 *S3Storage) StreamDir( options.Name, iteration, *nextMarker) } // decrement and loop - entriesRemaining -= int32(len(newList)) + entriesRemaining -= totalEntriesFetched // in one case, the response will be missing one entry (see comment above `count++` in Client::List) if entriesRemaining == 1 && options.Token == "" { // don't make a request just for that one leftover entry diff --git a/component/s3storage/s3storage_test.go b/component/s3storage/s3storage_test.go index f04d60b0c..5734d3395 100644 --- a/component/s3storage/s3storage_test.go +++ b/component/s3storage/s3storage_test.go @@ -2534,6 +2534,48 @@ func (s *s3StorageTestSuite) TestRenameFileError() { s.assert.Error(err) } +func (s *s3StorageTestSuite) TestRenameFileDstDir() { + defer s.cleanupTest() + // Setup + src := generateFileName() + _, err := s.s3Storage.CreateFile(internal.CreateFileOptions{Name: src}) + s.assert.NoError(err) + + dstDir := generateDirectoryName() + child := generateFileName() + dstDirKey := common.JoinUnixFilepath( + s.s3Storage.stConfig.prefixPath, + path.Join(dstDir, child), + ) + _, err = s.awsS3Client.PutObject(context.Background(), &s3.PutObjectInput{ + Bucket: aws.String(s.s3Storage.Storage.(*Client).Config.AuthConfig.BucketName), + Key: aws.String(dstDirKey), + ChecksumAlgorithm: s.s3Storage.Storage.(*Client).Config.checksumAlgorithm, + }) + s.assert.NoError(err) + + err = s.s3Storage.RenameFile(internal.RenameFileOptions{Src: src, Dst: dstDir}) + s.assert.EqualValues(syscall.EISDIR, err) + + // Src should still be in the account + srcKey := common.JoinUnixFilepath(s.s3Storage.stConfig.prefixPath, src) + _, err = s.awsS3Client.GetObject(context.Background(), &s3.GetObjectInput{ + Bucket: aws.String(s.s3Storage.Storage.(*Client).Config.AuthConfig.BucketName), + Key: aws.String(srcKey), + ChecksumMode: types.ChecksumModeEnabled, + }) + s.assert.NoError(err) + + // Dst file should not be created + dstKey := common.JoinUnixFilepath(s.s3Storage.stConfig.prefixPath, dstDir) + _, err = s.awsS3Client.GetObject(context.Background(), &s3.GetObjectInput{ + Bucket: aws.String(s.s3Storage.Storage.(*Client).Config.AuthConfig.BucketName), + Key: aws.String(dstKey), + ChecksumMode: types.ChecksumModeEnabled, + }) + s.assert.Error(err) +} + func (s *s3StorageTestSuite) TestRenameFileSymlink() { defer s.cleanupTest() config := generateConfigYaml( diff --git a/setup/advancedConfig.yaml b/setup/advancedConfig.yaml index ffe0f1b1b..a0a735ffb 100644 --- a/setup/advancedConfig.yaml +++ b/setup/advancedConfig.yaml @@ -212,6 +212,7 @@ s3storage: use-path-style: true|false disable-usage: true|false enable-dir-marker: true|false + skip-dir-probe-on-file-ext: true|false health-check-interval-sec: # Mount all configuration diff --git a/setup/baseConfig.yaml b/setup/baseConfig.yaml index e638d3af3..0b13c4e85 100644 --- a/setup/baseConfig.yaml +++ b/setup/baseConfig.yaml @@ -199,6 +199,7 @@ s3storage: use-path-style: true|false disable-usage: true|false enable-dir-marker: true|false + skip-dir-probe-on-file-ext: true|false health-check-interval-sec: # Mount all configuration