From 6cbfc2f3babe5e6e55ddc589dee413801f663797 Mon Sep 17 00:00:00 2001 From: James Scott Date: Tue, 10 Feb 2026 13:24:00 -0800 Subject: [PATCH] logtail/filch: fix filch test panic (#18660) Updates rotateLocked so that we hold the activeStderrWriteForTest write lock around the dup2Stderr call, rather than acquiring it only after dup2 was already compelete. This ensures no stderrWriteForTest calls can race with the dup2 syscall. The now unused waitIdleStderrForTest has been removed. On macOS, dup2 and write on the same file descriptor are not atomic with respect to each other, when rotateLocked called dup2Stderr to redirect the stderr fd to a new file, concurrent goroutines calling stderrWriteForTest could observe the fd in a transiently invalid state, resulting in the bad file descripter. Fixes tailscale/corp#36953 Signed-off-by: James Scott --- logtail/filch/filch.go | 27 ++++++++++++++------------- logtail/filch/filch_test.go | 14 -------------- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/logtail/filch/filch.go b/logtail/filch/filch.go index 1bd82d8c4..8ae912306 100644 --- a/logtail/filch/filch.go +++ b/logtail/filch/filch.go @@ -297,12 +297,6 @@ func stderrWriteForTest(b []byte) int { return must.Get(os.Stderr.Write(b)) } -// waitIdleStderrForTest waits until there are no active stderrWriteForTest calls. -func waitIdleStderrForTest() { - activeStderrWriteForTest.Lock() - defer activeStderrWriteForTest.Unlock() -} - // rotateLocked swaps f.newer and f.older such that: // // - f.newer will be truncated and future writes will be appended to the end. @@ -350,8 +344,15 @@ func (f *Filch) rotateLocked() error { // Note that mutex does not prevent stderr writes. prevSize := f.newlyWrittenBytes + f.newlyFilchedBytes f.newlyWrittenBytes, f.newlyFilchedBytes = 0, 0 + + // Hold the write lock around dup2 to prevent concurrent + // stderrWriteForTest calls from racing with dup2 on the same fd. + // On macOS, dup2 and write are not atomic with respect to each other, + // so a concurrent write can observe a bad file descriptor. + activeStderrWriteForTest.Lock() if f.OrigStderr != nil { if err := dup2Stderr(f.newer); err != nil { + activeStderrWriteForTest.Unlock() return err } } @@ -369,15 +370,15 @@ func (f *Filch) rotateLocked() error { // In rare cases, it is possible that [Filch.TryReadLine] consumes // the entire older file before the write commits, // leading to dropped stderr lines. - waitIdleStderrForTest() - if fi, err := f.older.Stat(); err != nil { + fi, err := f.older.Stat() + activeStderrWriteForTest.Unlock() + if err != nil { return err - } else { - filchedBytes := max(0, fi.Size()-prevSize) - f.writeBytes.Add(filchedBytes) - f.filchedBytes.Add(filchedBytes) - f.storedBytes.Set(fi.Size()) // newer has been truncated, so only older matters } + filchedBytes := max(0, fi.Size()-prevSize) + f.writeBytes.Add(filchedBytes) + f.filchedBytes.Add(filchedBytes) + f.storedBytes.Set(fi.Size()) // newer has been truncated, so only older matters // Start reading from the start of older. if _, err := f.older.Seek(0, io.SeekStart); err != nil { diff --git a/logtail/filch/filch_test.go b/logtail/filch/filch_test.go index f2f9e9e3b..2538233cf 100644 --- a/logtail/filch/filch_test.go +++ b/logtail/filch/filch_test.go @@ -5,7 +5,6 @@ package filch import ( "bytes" - "crypto/sha256" "encoding/json" "fmt" "io" @@ -121,19 +120,7 @@ func setupStderr(t *testing.T) { tstest.Replace(t, &os.Stderr, pipeW) } -func skipDarwin(t testing.TB) { - if runtime.GOOS != "darwin" { - return - } - src := must.Get(os.ReadFile("filch.go")) - if fmt.Sprintf("%x", sha256.Sum256(src)) != "a32da5e22034823c19ac7f29960e3646f540d67f85a0028832cab1f1557fc693" { - t.Errorf("filch.go has changed since this test was skipped; please delete this skip") - } - t.Skip("skipping known failing test on darwin; fixed in progress by https://github.com/tailscale/tailscale/pull/18660") -} - func TestConcurrentWriteAndRead(t *testing.T) { - skipDarwin(t) if replaceStderrSupportedForTest { setupStderr(t) } @@ -296,7 +283,6 @@ func TestMaxLineSize(t *testing.T) { } func TestMaxFileSize(t *testing.T) { - skipDarwin(t) if replaceStderrSupportedForTest { t.Run("ReplaceStderr:true", func(t *testing.T) { testMaxFileSize(t, true) }) }