common/folderDeletionManager.go (141 lines of code) (raw):

// Copyright Microsoft <wastore@microsoft.com> // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package common import ( "context" "net/url" "strings" "sync" ) // folderDeletionFunc should delete the folder IF IT IS EMPTY, and return true. // If it is not empty, false must be returned. // FolderDeletionManager is allowed to call this on a folder that is not yet empty. // In that case, FolderDeletionManager may call it again later. // Errors are not returned because of the delay to when deletion might happen, so // it's up to the func to do its own logging type FolderDeletionFunc func(context.Context, ILogger) bool // FolderDeletionManager handles the fact that (in most locations) we can't delete folders that // still contain files. So it allows us to request deletion of a folder, and have that be attempted // after the last file is removed. Note that maybe the apparent last file isn't the last (e.g. // there are other files, still to be deleted, in future job parts), in which case any failed deletion // will be retried if there's a new "candidate last child" removed. // Takes URLs rather than strings because that ensures correct (un)escaping, and makes it clear that we // don't support Windows & MacOS local paths (which have cases insensitivity that we don't support here). type FolderDeletionManager interface { // RecordChildExists takes a child name and counts it against the child's immediate parent // Should be called for both types of child: folders and files. // Only counts it against the immediate parent (that's all that's necessary, because we recurse in tryDeletion) RecordChildExists(childFileOrFolder *url.URL) // RecordChildDelete records that a file, previously passed to RecordChildExists, has now been deleted // Only call for files, not folders RecordChildDeleted(childFile *url.URL) // RequestDeletion registers a function that will be called to delete the given folder, when that // folder has no more known children. May be called before, after or during the time that // the folder's children are being passed to RecordChildExists and RecordChildDeleted // // Warning: only pass in deletionFuncs that will do nothing and return FALSE if the // folder is not yet empty. If they return false, they may be called again later. RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) // TODO: do we want this to report, so that we can log, any folders at the very end which still are not deleted? // or will we just leave such folders there, with no logged message other than any "per attempt" logging? } type folderDeletionState struct { childCount int64 deleter FolderDeletionFunc } func (f *folderDeletionState) shouldDeleteNow() bool { deletionRequested := f.deleter != nil return deletionRequested && f.childCount == 0 } func NewFolderDeletionManager(ctx context.Context, fpo FolderPropertyOption, logger ILogger) FolderDeletionManager { switch fpo { case EFolderPropertiesOption.AllFolders(), EFolderPropertiesOption.AllFoldersExceptRoot(): return &standardFolderDeletionManager{ mu: &sync.Mutex{}, contents: make(map[string]*folderDeletionState), logger: logger, ctx: ctx, } case EFolderPropertiesOption.NoFolders(): // no point in using a real implementation here, since it will just use memory and take time for no benefit return &nullFolderDeletionManager{} default: panic("unknown folderPropertiesOption") } } // Note: the current implementation assumes that names are either case sensitive, or at least // consistently capitalized. If it receives inconsistently capitalized things, it will think they are // distinct, and so may try deletion prematurely and fail type standardFolderDeletionManager struct { mu *sync.Mutex // mutex is simpler than RWMutex because folderDeletionState has multiple mutable elements contents map[string]*folderDeletionState // pointer so no need to put back INTO map after reading from map and mutating a field value // have our own logger and context, because our deletions don't necessarily run when RequestDeletion is called logger ILogger ctx context.Context } func (s standardFolderDeletionManager) copyURL(u *url.URL) *url.URL { out := *u if u.User != nil { user := *u.User out.User = &user } return &out } func (s *standardFolderDeletionManager) clean(u *url.URL) *url.URL { out := s.copyURL(u) out.RawQuery = "" // no SAS return out } // getParent drops final part of path (not using use path.Dir because it messes with the // in URLs) func (s *standardFolderDeletionManager) getParent(u *url.URL) (*url.URL, bool) { if len(u.Path) == 0 || u.Path == "/" { return u, false // path is already empty, so we can't go up another level } // trim off last portion of path (or all of the path, if it only has one component) out := s.clean(u) out.Path = out.Path[:strings.LastIndex(out.Path, "/")] if out.RawPath != "" { out.RawPath = out.RawPath[:strings.LastIndex(out.RawPath, "/")] } return out, true } func (s standardFolderDeletionManager) getMapKey(u *url.URL) string { return url.PathEscape(u.Path) } // getStateAlreadyLocked assumes the lock is already held func (s *standardFolderDeletionManager) getStateAlreadyLocked(folder *url.URL) *folderDeletionState { fmapKey := s.getMapKey(folder) state, alreadyKnown := s.contents[fmapKey] if alreadyKnown { return state } else { state = &folderDeletionState{} s.contents[fmapKey] = state return state } } func (s *standardFolderDeletionManager) RecordChildExists(childFileOrFolder *url.URL) { folder, ok := s.getParent(childFileOrFolder) if !ok { return // this is not a child of any parent, so there is nothing for us to do } s.mu.Lock() defer s.mu.Unlock() folderStatePtr := s.getStateAlreadyLocked(folder) folderStatePtr.childCount++ } func (s *standardFolderDeletionManager) RecordChildDeleted(childFile *url.URL) { folder, ok := s.getParent(childFile) if !ok { return // this is not a child of any parent, so there is nothing for us to do } s.mu.Lock() folderStatePtr, alreadyKnown := s.contents[s.getMapKey(folder)] if !alreadyKnown { // we are not tracking this child, so there is nothing that we should do in response // to its deletion (may happen in the recursive calls from tryDeletion, when they recurse up to parent dirs) s.mu.Unlock() return } folderStatePtr.childCount-- if folderStatePtr.childCount < 0 { // should never happen. If it does it means someone called RequestDeletion and Recorded a child as deleted, without ever registering the child as known folderStatePtr.childCount = 0 } deletionFunc := folderStatePtr.deleter shouldDel := folderStatePtr.shouldDeleteNow() s.mu.Unlock() // unlock before network calls for deletion if shouldDel { s.tryDeletion(folder, deletionFunc) } } func (s *standardFolderDeletionManager) RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) { folder = s.clean(folder) s.mu.Lock() folderStatePtr := s.getStateAlreadyLocked(folder) folderStatePtr.deleter = deletionFunc shouldDel := folderStatePtr.shouldDeleteNow() // test now in case there are no children s.mu.Unlock() // release lock before expensive deletion attempt if shouldDel { s.tryDeletion(folder, deletionFunc) } } func (s *standardFolderDeletionManager) tryDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) { success := deletionFunc(s.ctx, s.logger) // for safety, deletionFunc should be coded to do nothing, and return false, if the directory is not empty if success { s.mu.Lock() delete(s.contents, s.getMapKey(folder)) s.mu.Unlock() // folder is, itself, a child of its parent. So recurse. This is the only place that RecordChildDeleted should be called with a FOLDER parameter s.RecordChildDeleted(folder) } } /////////////////////////////////////// type nullFolderDeletionManager struct{} func (f *nullFolderDeletionManager) RecordChildExists(child *url.URL) { // no-op } func (f *nullFolderDeletionManager) RecordChildDeleted(child *url.URL) { // no-op } func (f *nullFolderDeletionManager) RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) { // There's no way this should ever be called, because we only create the null deletion manager if we are // NOT transferring folder info. panic("wrong type of folder deletion manager has been instantiated. This type does not do anything") }