taildrop: implement asynchronous file deletion (#9844)
File resumption requires keeping partial files around for some time, but we must still eventually delete them if never resumed. Thus, we implement asynchronous file deletion, which could spawn a background goroutine to delete the files. We also use the same mechanism for deleting files on Windows, where a file can't be deleted if there is still an open file handle. We can enqueue those with the asynchronous file deleter as well. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net>main
parent
33bb2bbfe9
commit
c2a551469c
@ -0,0 +1,182 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package taildrop |
||||
|
||||
import ( |
||||
"container/list" |
||||
"context" |
||||
"io/fs" |
||||
"os" |
||||
"path/filepath" |
||||
"strings" |
||||
"sync" |
||||
"time" |
||||
|
||||
"tailscale.com/syncs" |
||||
"tailscale.com/tstime" |
||||
"tailscale.com/types/logger" |
||||
) |
||||
|
||||
// deleteDelay is the amount of time to wait before we delete a file.
|
||||
// A shorter value ensures timely deletion of deleted and partial files, while
|
||||
// a longer value provides more opportunity for partial files to be resumed.
|
||||
const deleteDelay = time.Hour |
||||
|
||||
// fileDeleter manages asynchronous deletion of files after deleteDelay.
|
||||
type fileDeleter struct { |
||||
logf logger.Logf |
||||
clock tstime.DefaultClock |
||||
event func(string) // called for certain events; for testing only
|
||||
dir string |
||||
|
||||
mu sync.Mutex |
||||
queue list.List |
||||
byName map[string]*list.Element |
||||
|
||||
emptySignal chan struct{} // signal that the queue is empty
|
||||
group syncs.WaitGroup |
||||
shutdownCtx context.Context |
||||
shutdown context.CancelFunc |
||||
} |
||||
|
||||
// deleteFile is a specific file to delete after deleteDelay.
|
||||
type deleteFile struct { |
||||
name string |
||||
inserted time.Time |
||||
} |
||||
|
||||
func (d *fileDeleter) Init(logf logger.Logf, clock tstime.DefaultClock, event func(string), dir string) { |
||||
d.logf = logf |
||||
d.clock = clock |
||||
d.dir = dir |
||||
d.event = event |
||||
|
||||
// From a cold-start, load the list of partial and deleted files.
|
||||
d.byName = make(map[string]*list.Element) |
||||
d.emptySignal = make(chan struct{}) |
||||
d.shutdownCtx, d.shutdown = context.WithCancel(context.Background()) |
||||
d.group.Go(func() { |
||||
d.event("start init") |
||||
defer d.event("end init") |
||||
rangeDir(dir, func(de fs.DirEntry) bool { |
||||
switch { |
||||
case d.shutdownCtx.Err() != nil: |
||||
return false // terminate early
|
||||
case !de.Type().IsRegular(): |
||||
return true |
||||
case strings.Contains(de.Name(), partialSuffix): |
||||
d.Insert(de.Name()) |
||||
case strings.Contains(de.Name(), deletedSuffix): |
||||
// Best-effort immediate deletion of deleted files.
|
||||
name := strings.TrimSuffix(de.Name(), deletedSuffix) |
||||
if os.Remove(filepath.Join(dir, name)) == nil { |
||||
if os.Remove(filepath.Join(dir, de.Name())) == nil { |
||||
break |
||||
} |
||||
} |
||||
// Otherwise, enqueue the file for later deletion.
|
||||
d.Insert(de.Name()) |
||||
} |
||||
return true |
||||
}) |
||||
}) |
||||
} |
||||
|
||||
// Insert enqueues baseName for eventual deletion.
|
||||
func (d *fileDeleter) Insert(baseName string) { |
||||
d.mu.Lock() |
||||
defer d.mu.Unlock() |
||||
if d.shutdownCtx.Err() != nil { |
||||
return |
||||
} |
||||
if _, ok := d.byName[baseName]; ok { |
||||
return // already queued for deletion
|
||||
} |
||||
d.byName[baseName] = d.queue.PushBack(&deleteFile{baseName, d.clock.Now()}) |
||||
if d.queue.Len() == 1 { |
||||
d.group.Go(func() { d.waitAndDelete(deleteDelay) }) |
||||
} |
||||
} |
||||
|
||||
// waitAndDelete is an asynchronous deletion goroutine.
|
||||
// At most one waitAndDelete routine is ever running at a time.
|
||||
// It is not started unless there is at least one file in the queue.
|
||||
func (d *fileDeleter) waitAndDelete(wait time.Duration) { |
||||
tc, ch := d.clock.NewTimer(wait) |
||||
defer tc.Stop() // cleanup the timer resource if we stop early
|
||||
d.event("start waitAndDelete") |
||||
defer d.event("end waitAndDelete") |
||||
select { |
||||
case <-d.shutdownCtx.Done(): |
||||
case <-d.emptySignal: |
||||
case now := <-ch: |
||||
d.mu.Lock() |
||||
defer d.mu.Unlock() |
||||
|
||||
// Iterate over all files to delete, and delete anything old enough.
|
||||
var next *list.Element |
||||
var failed []*list.Element |
||||
for elem := d.queue.Front(); elem != nil; elem = next { |
||||
next = elem.Next() |
||||
file := elem.Value.(*deleteFile) |
||||
if now.Sub(file.inserted) < deleteDelay { |
||||
break // everything after this is recently inserted
|
||||
} |
||||
|
||||
// Delete the expired file.
|
||||
if name, ok := strings.CutSuffix(file.name, deletedSuffix); ok { |
||||
if err := os.Remove(filepath.Join(d.dir, name)); err != nil && !os.IsNotExist(err) { |
||||
d.logf("could not delete: %v", redactError(err)) |
||||
failed = append(failed, elem) |
||||
continue |
||||
} |
||||
} |
||||
if err := os.Remove(filepath.Join(d.dir, file.name)); err != nil && !os.IsNotExist(err) { |
||||
d.logf("could not delete: %v", redactError(err)) |
||||
failed = append(failed, elem) |
||||
continue |
||||
} |
||||
d.queue.Remove(elem) |
||||
delete(d.byName, file.name) |
||||
d.event("deleted " + file.name) |
||||
} |
||||
for _, elem := range failed { |
||||
elem.Value.(*deleteFile).inserted = now // retry after deleteDelay
|
||||
d.queue.MoveToBack(elem) |
||||
} |
||||
|
||||
// If there are still some files to delete, retry again later.
|
||||
if d.queue.Len() > 0 { |
||||
file := d.queue.Front().Value.(*deleteFile) |
||||
retryAfter := deleteDelay - now.Sub(file.inserted) |
||||
d.group.Go(func() { d.waitAndDelete(retryAfter) }) |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Remove dequeues baseName from eventual deletion.
|
||||
func (d *fileDeleter) Remove(baseName string) { |
||||
d.mu.Lock() |
||||
defer d.mu.Unlock() |
||||
if elem := d.byName[baseName]; elem != nil { |
||||
d.queue.Remove(elem) |
||||
delete(d.byName, baseName) |
||||
// Signal to terminate any waitAndDelete goroutines.
|
||||
if d.queue.Len() == 0 { |
||||
select { |
||||
case <-d.shutdownCtx.Done(): |
||||
case d.emptySignal <- struct{}{}: |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Shutdown shuts down the deleter.
|
||||
// It blocks until all goroutines are stopped.
|
||||
func (d *fileDeleter) Shutdown() { |
||||
d.mu.Lock() // acquire lock to ensure no new goroutines start after shutdown
|
||||
d.shutdown() |
||||
d.mu.Unlock() |
||||
d.group.Wait() |
||||
} |
||||
@ -0,0 +1,132 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package taildrop |
||||
|
||||
import ( |
||||
"os" |
||||
"path/filepath" |
||||
"slices" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/google/go-cmp/cmp" |
||||
"tailscale.com/tstest" |
||||
"tailscale.com/tstime" |
||||
"tailscale.com/util/must" |
||||
) |
||||
|
||||
func TestDeleter(t *testing.T) { |
||||
dir := t.TempDir() |
||||
must.Do(touchFile(filepath.Join(dir, "foo.partial"))) |
||||
must.Do(touchFile(filepath.Join(dir, "bar.partial"))) |
||||
must.Do(touchFile(filepath.Join(dir, "fizz"))) |
||||
must.Do(touchFile(filepath.Join(dir, "fizz.deleted"))) |
||||
must.Do(touchFile(filepath.Join(dir, "buzz.deleted"))) // lacks a matching "buzz" file
|
||||
|
||||
checkDirectory := func(want ...string) { |
||||
t.Helper() |
||||
var got []string |
||||
for _, de := range must.Get(os.ReadDir(dir)) { |
||||
got = append(got, de.Name()) |
||||
} |
||||
slices.Sort(got) |
||||
slices.Sort(want) |
||||
if diff := cmp.Diff(got, want); diff != "" { |
||||
t.Fatalf("directory mismatch (-got +want):\n%s", diff) |
||||
} |
||||
} |
||||
|
||||
clock := tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)}) |
||||
advance := func(d time.Duration) { |
||||
t.Helper() |
||||
t.Logf("advance: %v", d) |
||||
clock.Advance(d) |
||||
} |
||||
|
||||
eventsChan := make(chan string, 1000) |
||||
checkEvents := func(want ...string) { |
||||
t.Helper() |
||||
tm := time.NewTimer(10 * time.Second) |
||||
defer tm.Stop() |
||||
var got []string |
||||
for range want { |
||||
select { |
||||
case event := <-eventsChan: |
||||
t.Logf("event: %s", event) |
||||
got = append(got, event) |
||||
case <-tm.C: |
||||
t.Fatalf("timed out waiting for event: got %v, want %v", got, want) |
||||
} |
||||
} |
||||
slices.Sort(got) |
||||
slices.Sort(want) |
||||
if diff := cmp.Diff(got, want); diff != "" { |
||||
t.Fatalf("events mismatch (-got +want):\n%s", diff) |
||||
} |
||||
} |
||||
eventHook := func(event string) { eventsChan <- event } |
||||
|
||||
var fd fileDeleter |
||||
fd.Init(t.Logf, tstime.DefaultClock{Clock: clock}, eventHook, dir) |
||||
defer fd.Shutdown() |
||||
insert := func(name string) { |
||||
t.Helper() |
||||
t.Logf("insert: %v", name) |
||||
fd.Insert(name) |
||||
} |
||||
remove := func(name string) { |
||||
t.Helper() |
||||
t.Logf("remove: %v", name) |
||||
fd.Remove(name) |
||||
} |
||||
|
||||
checkEvents("start init") |
||||
checkEvents("end init", "start waitAndDelete") |
||||
checkDirectory("foo.partial", "bar.partial", "buzz.deleted") |
||||
|
||||
advance(deleteDelay / 2) |
||||
checkDirectory("foo.partial", "bar.partial", "buzz.deleted") |
||||
advance(deleteDelay / 2) |
||||
checkEvents("deleted foo.partial", "deleted bar.partial", "deleted buzz.deleted") |
||||
checkEvents("end waitAndDelete") |
||||
checkDirectory() |
||||
|
||||
must.Do(touchFile(filepath.Join(dir, "one.partial"))) |
||||
insert("one.partial") |
||||
checkEvents("start waitAndDelete") |
||||
advance(deleteDelay / 4) |
||||
must.Do(touchFile(filepath.Join(dir, "two.partial"))) |
||||
insert("two.partial") |
||||
advance(deleteDelay / 4) |
||||
must.Do(touchFile(filepath.Join(dir, "three.partial"))) |
||||
insert("three.partial") |
||||
advance(deleteDelay / 4) |
||||
must.Do(touchFile(filepath.Join(dir, "four.partial"))) |
||||
insert("four.partial") |
||||
|
||||
advance(deleteDelay / 4) |
||||
checkEvents("deleted one.partial") |
||||
checkDirectory("two.partial", "three.partial", "four.partial") |
||||
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||
|
||||
advance(deleteDelay / 4) |
||||
checkEvents("deleted two.partial") |
||||
checkDirectory("three.partial", "four.partial") |
||||
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||
|
||||
advance(deleteDelay / 4) |
||||
checkEvents("deleted three.partial") |
||||
checkDirectory("four.partial") |
||||
checkEvents("end waitAndDelete", "start waitAndDelete") |
||||
|
||||
advance(deleteDelay / 4) |
||||
checkEvents("deleted four.partial") |
||||
checkDirectory() |
||||
checkEvents("end waitAndDelete") |
||||
|
||||
insert("wuzz.partial") |
||||
checkEvents("start waitAndDelete") |
||||
remove("wuzz.partial") |
||||
checkEvents("end waitAndDelete") |
||||
} |
||||
Loading…
Reference in new issue