health: introduce captive-portal-detected Warnable (#12707)

Updates tailscale/tailscale#1634

This PR introduces a new `captive-portal-detected` Warnable which is set to an unhealthy state whenever a captive portal is detected on the local network, preventing Tailscale from connecting.



ipn/ipnlocal: fix captive portal loop shutdown


Change-Id: I7cafdbce68463a16260091bcec1741501a070c95

net/captivedetection: fix mutex misuse

ipn/ipnlocal: ensure that we don't fail to start the timer


Change-Id: I3e43fb19264d793e8707c5031c0898e48e3e7465

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
Andrea Gottardo
2024-07-26 11:25:55 -07:00
committed by GitHub
parent cf97cff33b
commit 90be06bd5b
15 changed files with 750 additions and 154 deletions
+3 -76
View File
@@ -14,13 +14,11 @@ import (
"io"
"log"
"maps"
"math/rand/v2"
"net"
"net/http"
"net/netip"
"runtime"
"sort"
"strings"
"sync"
"syscall"
"time"
@@ -28,6 +26,7 @@ import (
"github.com/tcnksm/go-httpstat"
"tailscale.com/derp/derphttp"
"tailscale.com/envknob"
"tailscale.com/net/captivedetection"
"tailscale.com/net/dnscache"
"tailscale.com/net/neterror"
"tailscale.com/net/netmon"
@@ -847,11 +846,8 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe
tmr := time.AfterFunc(c.captivePortalDelay(), func() {
defer close(ch)
found, err := c.checkCaptivePortal(ctx, dm, preferredDERP)
if err != nil {
c.logf("[v1] checkCaptivePortal: %v", err)
return
}
d := captivedetection.NewDetector(c.logf)
found := d.Detect(ctx, c.NetMon, dm, preferredDERP)
rs.report.CaptivePortal.Set(found)
})
@@ -988,75 +984,6 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep
return report
}
var noRedirectClient = &http.Client{
// No redirects allowed
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
// Remaining fields are the same as the default client.
Transport: http.DefaultClient.Transport,
Jar: http.DefaultClient.Jar,
Timeout: http.DefaultClient.Timeout,
}
// checkCaptivePortal reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) {
defer noRedirectClient.CloseIdleConnections()
// If we have a preferred DERP region with more than one node, try
// that; otherwise, pick a random one not marked as "Avoid".
if preferredDERP == 0 || dm.Regions[preferredDERP] == nil ||
(preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) {
rids := make([]int, 0, len(dm.Regions))
for id, reg := range dm.Regions {
if reg == nil || reg.Avoid || len(reg.Nodes) == 0 {
continue
}
rids = append(rids, id)
}
if len(rids) == 0 {
return false, nil
}
preferredDERP = rids[rand.IntN(len(rids))]
}
node := dm.Regions[preferredDERP].Nodes[0]
if strings.HasSuffix(node.HostName, tailcfg.DotInvalid) {
// Don't try to connect to invalid hostnames. This occurred in tests:
// https://github.com/tailscale/tailscale/issues/6207
// TODO(bradfitz,andrew-d): how to actually handle this nicely?
return false, nil
}
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil)
if err != nil {
return false, err
}
// Note: the set of valid characters in a challenge and the total
// length is limited; see isChallengeChar in cmd/derper for more
// details.
chal := "ts_" + node.HostName
req.Header.Set("X-Tailscale-Challenge", chal)
r, err := noRedirectClient.Do(req)
if err != nil {
return false, err
}
defer r.Body.Close()
expectedResponse := "response " + chal
validResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse
c.logf("[v2] checkCaptivePortal url=%q status_code=%d valid_response=%v", req.URL.String(), r.StatusCode, validResponse)
return r.StatusCode != 204 || !validResponse, nil
}
// runHTTPOnlyChecks is the netcheck done by environments that can
// only do HTTP requests, such as ws/wasm.
func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error {
-50
View File
@@ -15,14 +15,12 @@ import (
"sort"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
"tailscale.com/net/netmon"
"tailscale.com/net/stun/stuntest"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstest/nettest"
)
@@ -778,54 +776,6 @@ func TestSortRegions(t *testing.T) {
}
}
func TestNoCaptivePortalWhenUDP(t *testing.T) {
nettest.SkipIfNoNetwork(t) // empirically. not sure why.
// Override noRedirectClient to handle the /generate_204 endpoint
var generate204Called atomic.Bool
tr := RoundTripFunc(func(req *http.Request) *http.Response {
if !strings.HasSuffix(req.URL.String(), "/generate_204") {
panic("bad URL: " + req.URL.String())
}
generate204Called.Store(true)
return &http.Response{
StatusCode: http.StatusNoContent,
Header: make(http.Header),
}
})
tstest.Replace(t, &noRedirectClient.Transport, http.RoundTripper(tr))
stunAddr, cleanup := stuntest.Serve(t)
defer cleanup()
c := newTestClient(t)
c.testEnoughRegions = 1
// Set the delay long enough that we have time to cancel it
// when our STUN probe succeeds.
c.testCaptivePortalDelay = 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
if err := c.Standalone(ctx, "127.0.0.1:0"); err != nil {
t.Fatal(err)
}
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()), nil)
if err != nil {
t.Fatal(err)
}
// Should not have called our captive portal function.
if generate204Called.Load() {
t.Errorf("captive portal check called; expected no call")
}
if r.CaptivePortal != "" {
t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal)
}
}
type RoundTripFunc func(req *http.Request) *http.Response
func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {