taildrop: implement asynchronous file deletion (#9844)
File resumption requires keeping partial files around for some time, but we must still eventually delete them if never resumed. Thus, we implement asynchronous file deletion, which could spawn a background goroutine to delete the files. We also use the same mechanism for deleting files on Windows, where a file can't be deleted if there is still an open file handle. We can enqueue those with the asynchronous file deleter as well. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net>main
parent
33bb2bbfe9
commit
c2a551469c
@ -0,0 +1,182 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & AUTHORS
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package taildrop |
||||||
|
|
||||||
|
import ( |
||||||
|
"container/list" |
||||||
|
"context" |
||||||
|
"io/fs" |
||||||
|
"os" |
||||||
|
"path/filepath" |
||||||
|
"strings" |
||||||
|
"sync" |
||||||
|
"time" |
||||||
|
|
||||||
|
"tailscale.com/syncs" |
||||||
|
"tailscale.com/tstime" |
||||||
|
"tailscale.com/types/logger" |
||||||
|
) |
||||||
|
|
||||||
|
// deleteDelay is the amount of time to wait before we delete a file.
|
||||||
|
// A shorter value ensures timely deletion of deleted and partial files, while
|
||||||
|
// a longer value provides more opportunity for partial files to be resumed.
|
||||||
|
const deleteDelay = time.Hour |
||||||
|
|
||||||
|
// fileDeleter manages asynchronous deletion of files after deleteDelay.
|
||||||
|
type fileDeleter struct { |
||||||
|
logf logger.Logf |
||||||
|
clock tstime.DefaultClock |
||||||
|
event func(string) // called for certain events; for testing only
|
||||||
|
dir string |
||||||
|
|
||||||
|
mu sync.Mutex |
||||||
|
queue list.List |
||||||
|
byName map[string]*list.Element |
||||||
|
|
||||||
|
emptySignal chan struct{} // signal that the queue is empty
|
||||||
|
group syncs.WaitGroup |
||||||
|
shutdownCtx context.Context |
||||||
|
shutdown context.CancelFunc |
||||||
|
} |
||||||
|
|
||||||
|
// deleteFile is a specific file to delete after deleteDelay.
|
||||||
|
type deleteFile struct { |
||||||
|
name string |
||||||
|
inserted time.Time |
||||||
|
} |
||||||
|
|
||||||
|
func (d *fileDeleter) Init(logf logger.Logf, clock tstime.DefaultClock, event func(string), dir string) { |
||||||
|
d.logf = logf |
||||||
|
d.clock = clock |
||||||
|
d.dir = dir |
||||||
|
d.event = event |
||||||
|
|
||||||
|
// From a cold-start, load the list of partial and deleted files.
|
||||||
|
d.byName = make(map[string]*list.Element) |
||||||
|
d.emptySignal = make(chan struct{}) |
||||||
|
d.shutdownCtx, d.shutdown = context.WithCancel(context.Background()) |
||||||
|
d.group.Go(func() { |
||||||
|
d.event("start init") |
||||||
|
defer d.event("end init") |
||||||
|
rangeDir(dir, func(de fs.DirEntry) bool { |
||||||
|
switch { |
||||||
|
case d.shutdownCtx.Err() != nil: |
||||||
|
return false // terminate early
|
||||||
|
case !de.Type().IsRegular(): |
||||||
|
return true |
||||||
|
case strings.Contains(de.Name(), partialSuffix): |
||||||
|
d.Insert(de.Name()) |
||||||
|
case strings.Contains(de.Name(), deletedSuffix): |
||||||
|
// Best-effort immediate deletion of deleted files.
|
||||||
|
name := strings.TrimSuffix(de.Name(), deletedSuffix) |
||||||
|
if os.Remove(filepath.Join(dir, name)) == nil { |
||||||
|
if os.Remove(filepath.Join(dir, de.Name())) == nil { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
// Otherwise, enqueue the file for later deletion.
|
||||||
|
d.Insert(de.Name()) |
||||||
|
} |
||||||
|
return true |
||||||
|
}) |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
// Insert enqueues baseName for eventual deletion.
|
||||||
|
func (d *fileDeleter) Insert(baseName string) { |
||||||
|
d.mu.Lock() |
||||||
|
defer d.mu.Unlock() |
||||||
|
if d.shutdownCtx.Err() != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
if _, ok := d.byName[baseName]; ok { |
||||||
|
return // already queued for deletion
|
||||||
|
} |
||||||
|
d.byName[baseName] = d.queue.PushBack(&deleteFile{baseName, d.clock.Now()}) |
||||||
|
if d.queue.Len() == 1 { |
||||||
|
d.group.Go(func() { d.waitAndDelete(deleteDelay) }) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// waitAndDelete is an asynchronous deletion goroutine.
|
||||||
|
// At most one waitAndDelete routine is ever running at a time.
|
||||||
|
// It is not started unless there is at least one file in the queue.
|
||||||
|
func (d *fileDeleter) waitAndDelete(wait time.Duration) { |
||||||
|
tc, ch := d.clock.NewTimer(wait) |
||||||
|
defer tc.Stop() // cleanup the timer resource if we stop early
|
||||||
|
d.event("start waitAndDelete") |
||||||
|
defer d.event("end waitAndDelete") |
||||||
|
select { |
||||||
|
case <-d.shutdownCtx.Done(): |
||||||
|
case <-d.emptySignal: |
||||||
|
case now := <-ch: |
||||||
|
d.mu.Lock() |
||||||
|
defer d.mu.Unlock() |
||||||
|
|
||||||
|
// Iterate over all files to delete, and delete anything old enough.
|
||||||
|
var next *list.Element |
||||||
|
var failed []*list.Element |
||||||
|
for elem := d.queue.Front(); elem != nil; elem = next { |
||||||
|
next = elem.Next() |
||||||
|
file := elem.Value.(*deleteFile) |
||||||
|
if now.Sub(file.inserted) < deleteDelay { |
||||||
|
break // everything after this is recently inserted
|
||||||
|
} |
||||||
|
|
||||||
|
// Delete the expired file.
|
||||||
|
if name, ok := strings.CutSuffix(file.name, deletedSuffix); ok { |
||||||
|
if err := os.Remove(filepath.Join(d.dir, name)); err != nil && !os.IsNotExist(err) { |
||||||
|
d.logf("could not delete: %v", redactError(err)) |
||||||
|
failed = append(failed, elem) |
||||||
|
continue |
||||||
|
} |
||||||
|
} |
||||||
|
if err := os.Remove(filepath.Join(d.dir, file.name)); err != nil && !os.IsNotExist(err) { |
||||||
|
d.logf("could not delete: %v", redactError(err)) |
||||||
|
failed = append(failed, elem) |
||||||
|
continue |
||||||
|
} |
||||||
|
d.queue.Remove(elem) |
||||||
|
delete(d.byName, file.name) |
||||||
|
d.event("deleted " + file.name) |
||||||
|
} |
||||||
|
for _, elem := range failed { |
||||||
|
elem.Value.(*deleteFile).inserted = now // retry after deleteDelay
|
||||||
|
d.queue.MoveToBack(elem) |
||||||
|
} |
||||||
|
|
||||||
|
// If there are still some files to delete, retry again later.
|
||||||
|
if d.queue.Len() > 0 { |
||||||
|
file := d.queue.Front().Value.(*deleteFile) |
||||||
|
retryAfter := deleteDelay - now.Sub(file.inserted) |
||||||
|
d.group.Go(func() { d.waitAndDelete(retryAfter) }) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Remove dequeues baseName from eventual deletion.
|
||||||
|
func (d *fileDeleter) Remove(baseName string) { |
||||||
|
d.mu.Lock() |
||||||
|
defer d.mu.Unlock() |
||||||
|
if elem := d.byName[baseName]; elem != nil { |
||||||
|
d.queue.Remove(elem) |
||||||
|
delete(d.byName, baseName) |
||||||
|
// Signal to terminate any waitAndDelete goroutines.
|
||||||
|
if d.queue.Len() == 0 { |
||||||
|
select { |
||||||
|
case <-d.shutdownCtx.Done(): |
||||||
|
case d.emptySignal <- struct{}{}: |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Shutdown shuts down the deleter.
|
||||||
|
// It blocks until all goroutines are stopped.
|
||||||
|
func (d *fileDeleter) Shutdown() { |
||||||
|
d.mu.Lock() // acquire lock to ensure no new goroutines start after shutdown
|
||||||
|
d.shutdown() |
||||||
|
d.mu.Unlock() |
||||||
|
d.group.Wait() |
||||||
|
} |
||||||
@ -0,0 +1,132 @@ |
|||||||
|
// Copyright (c) Tailscale Inc & AUTHORS
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package taildrop |
||||||
|
|
||||||
|
import ( |
||||||
|
"os" |
||||||
|
"path/filepath" |
||||||
|
"slices" |
||||||
|
"testing" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp" |
||||||
|
"tailscale.com/tstest" |
||||||
|
"tailscale.com/tstime" |
||||||
|
"tailscale.com/util/must" |
||||||
|
) |
||||||
|
|
||||||
|
func TestDeleter(t *testing.T) { |
||||||
|
dir := t.TempDir() |
||||||
|
must.Do(touchFile(filepath.Join(dir, "foo.partial"))) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "bar.partial"))) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "fizz"))) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "fizz.deleted"))) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "buzz.deleted"))) // lacks a matching "buzz" file
|
||||||
|
|
||||||
|
checkDirectory := func(want ...string) { |
||||||
|
t.Helper() |
||||||
|
var got []string |
||||||
|
for _, de := range must.Get(os.ReadDir(dir)) { |
||||||
|
got = append(got, de.Name()) |
||||||
|
} |
||||||
|
slices.Sort(got) |
||||||
|
slices.Sort(want) |
||||||
|
if diff := cmp.Diff(got, want); diff != "" { |
||||||
|
t.Fatalf("directory mismatch (-got +want):\n%s", diff) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
clock := tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)}) |
||||||
|
advance := func(d time.Duration) { |
||||||
|
t.Helper() |
||||||
|
t.Logf("advance: %v", d) |
||||||
|
clock.Advance(d) |
||||||
|
} |
||||||
|
|
||||||
|
eventsChan := make(chan string, 1000) |
||||||
|
checkEvents := func(want ...string) { |
||||||
|
t.Helper() |
||||||
|
tm := time.NewTimer(10 * time.Second) |
||||||
|
defer tm.Stop() |
||||||
|
var got []string |
||||||
|
for range want { |
||||||
|
select { |
||||||
|
case event := <-eventsChan: |
||||||
|
t.Logf("event: %s", event) |
||||||
|
got = append(got, event) |
||||||
|
case <-tm.C: |
||||||
|
t.Fatalf("timed out waiting for event: got %v, want %v", got, want) |
||||||
|
} |
||||||
|
} |
||||||
|
slices.Sort(got) |
||||||
|
slices.Sort(want) |
||||||
|
if diff := cmp.Diff(got, want); diff != "" { |
||||||
|
t.Fatalf("events mismatch (-got +want):\n%s", diff) |
||||||
|
} |
||||||
|
} |
||||||
|
eventHook := func(event string) { eventsChan <- event } |
||||||
|
|
||||||
|
var fd fileDeleter |
||||||
|
fd.Init(t.Logf, tstime.DefaultClock{Clock: clock}, eventHook, dir) |
||||||
|
defer fd.Shutdown() |
||||||
|
insert := func(name string) { |
||||||
|
t.Helper() |
||||||
|
t.Logf("insert: %v", name) |
||||||
|
fd.Insert(name) |
||||||
|
} |
||||||
|
remove := func(name string) { |
||||||
|
t.Helper() |
||||||
|
t.Logf("remove: %v", name) |
||||||
|
fd.Remove(name) |
||||||
|
} |
||||||
|
|
||||||
|
checkEvents("start init") |
||||||
|
checkEvents("end init", "start waitAndDelete") |
||||||
|
checkDirectory("foo.partial", "bar.partial", "buzz.deleted") |
||||||
|
|
||||||
|
advance(deleteDelay / 2) |
||||||
|
checkDirectory("foo.partial", "bar.partial", "buzz.deleted") |
||||||
|
advance(deleteDelay / 2) |
||||||
|
checkEvents("deleted foo.partial", "deleted bar.partial", "deleted buzz.deleted") |
||||||
|
checkEvents("end waitAndDelete") |
||||||
|
checkDirectory() |
||||||
|
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "one.partial"))) |
||||||
|
insert("one.partial") |
||||||
|
checkEvents("start waitAndDelete") |
||||||
|
advance(deleteDelay / 4) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "two.partial"))) |
||||||
|
insert("two.partial") |
||||||
|
advance(deleteDelay / 4) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "three.partial"))) |
||||||
|
insert("three.partial") |
||||||
|
advance(deleteDelay / 4) |
||||||
|
must.Do(touchFile(filepath.Join(dir, "four.partial"))) |
||||||
|
insert("four.partial") |
||||||
|
|
||||||
|
advance(deleteDelay / 4) |
||||||
|
checkEvents("deleted one.partial") |
||||||
|
checkDirectory("two.partial", "three.partial", "four.partial") |
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||||
|
|
||||||
|
advance(deleteDelay / 4) |
||||||
|
checkEvents("deleted two.partial") |
||||||
|
checkDirectory("three.partial", "four.partial") |
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||||
|
|
||||||
|
advance(deleteDelay / 4) |
||||||
|
checkEvents("deleted three.partial") |
||||||
|
checkDirectory("four.partial") |
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||||
|
|
||||||
|
advance(deleteDelay / 4) |
||||||
|
checkEvents("deleted four.partial") |
||||||
|
checkDirectory() |
||||||
|
checkEvents("end waitAndDelete") |
||||||
|
|
||||||
|
insert("wuzz.partial") |
||||||
|
checkEvents("start waitAndDelete") |
||||||
|
remove("wuzz.partial") |
||||||
|
checkEvents("end waitAndDelete") |
||||||
|
} |
||||||
Loading…
Reference in new issue