tailcfg, control/controlhttp, control/controlclient: add ControlDialPlan field (#5648)

* tailcfg, control/controlhttp, control/controlclient: add ControlDialPlan field

This field allows the control server to provide explicit information
about how to connect to it; useful if the client's link status can
change after the initial connection, or if the DNS settings pushed by
the control server break future connections.

Change-Id: I720afe6289ec27d40a41b3dcb310ec45bd7e5f3e
Signed-off-by: Andrew Dunham <andrew@tailscale.com>
This commit is contained in:
Andrew Dunham
2022-09-23 13:06:55 -04:00
committed by GitHub
parent acc7baac6d
commit e1bdbfe710
8 changed files with 604 additions and 17 deletions
+45 -1
View File
@@ -76,6 +76,8 @@ type Direct struct {
popBrowser func(url string) // or nil
c2nHandler http.Handler // or nil
dialPlan ControlDialPlanner // can be nil
mu sync.Mutex // mutex guards the following fields
serverKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key
serverNoiseKey key.MachinePublic
@@ -133,6 +135,34 @@ type Options struct {
// MapResponse.PingRequest queries from the control plane.
// If nil, PingRequest queries are not answered.
Pinger Pinger
// DialPlan contains and stores a previous dial plan that we received
// from the control server; if nil, we fall back to using DNS.
//
// If we receive a new DialPlan from the server, this value will be
// updated.
DialPlan ControlDialPlanner
}
// ControlDialPlanner is the interface optionally supplied when creating a
// control client to control exactly how TCP connections to the control plane
// are dialed.
//
// It is usually implemented by an atomic.Pointer.
type ControlDialPlanner interface {
// Load returns the current plan for how to connect to control.
//
// The returned plan can be nil. If so, connections should be made by
// resolving the control URL using DNS.
Load() *tailcfg.ControlDialPlan
// Store updates the dial plan with new directions from the control
// server.
//
// The dial plan can span multiple connections to the control server.
// That is, a dial plan received when connected over Wi-Fi is still
// valid for a subsequent connection over LTE after a network switch.
Store(*tailcfg.ControlDialPlan)
}
// Pinger is the LocalBackend.Ping method.
@@ -216,6 +246,7 @@ func NewDirect(opts Options) (*Direct, error) {
popBrowser: opts.PopBrowserURL,
c2nHandler: opts.C2NHandler,
dialer: opts.Dialer,
dialPlan: opts.DialPlan,
}
if opts.Hostinfo == nil {
c.SetHostinfo(hostinfo.New())
@@ -915,6 +946,14 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
} else {
vlogf("netmap: got new map")
}
if resp.ControlDialPlan != nil {
if c.dialPlan != nil {
c.logf("netmap: got new dial plan from control")
c.dialPlan.Store(resp.ControlDialPlan)
} else {
c.logf("netmap: [unexpected] new dial plan; nowhere to store it")
}
}
select {
case timeoutReset <- struct{}{}:
@@ -1365,12 +1404,17 @@ func (c *Direct) getNoiseClient() (*noiseClient, error) {
if nc != nil {
return nc, nil
}
var dp func() *tailcfg.ControlDialPlan
if c.dialPlan != nil {
dp = c.dialPlan.Load
}
nc, err, _ := c.sfGroup.Do(struct{}{}, func() (*noiseClient, error) {
k, err := c.getMachinePrivKey()
if err != nil {
return nil, err
}
nc, err := newNoiseClient(k, serverNoiseKey, c.serverURL, c.dialer)
c.logf("creating new noise client")
nc, err := newNoiseClient(k, serverNoiseKey, c.serverURL, c.dialer, dp)
if err != nil {
return nil, err
}
+50 -6
View File
@@ -53,6 +53,11 @@ type noiseClient struct {
httpPort string // the default port to call
httpsPort string // the fallback Noise-over-https port
// dialPlan optionally returns a ControlDialPlan previously received
// from the control server; either the function or the return value can
// be nil.
dialPlan func() *tailcfg.ControlDialPlan
// mu only protects the following variables.
mu sync.Mutex
nextID int
@@ -61,7 +66,9 @@ type noiseClient struct {
// newNoiseClient returns a new noiseClient for the provided server and machine key.
// serverURL is of the form https://<host>:<port> (no trailing slash).
func newNoiseClient(priKey key.MachinePrivate, serverPubKey key.MachinePublic, serverURL string, dialer *tsdial.Dialer) (*noiseClient, error) {
//
// dialPlan may be nil
func newNoiseClient(priKey key.MachinePrivate, serverPubKey key.MachinePublic, serverURL string, dialer *tsdial.Dialer, dialPlan func() *tailcfg.ControlDialPlan) (*noiseClient, error) {
u, err := url.Parse(serverURL)
if err != nil {
return nil, err
@@ -89,6 +96,7 @@ func newNoiseClient(priKey key.MachinePrivate, serverPubKey key.MachinePublic, s
httpPort: httpPort,
httpsPort: httpsPort,
dialer: dialer,
dialPlan: dialPlan,
}
// Create the HTTP/2 Transport using a net/http.Transport
@@ -155,16 +163,51 @@ func (nc *noiseClient) dial(_, _ string, _ *tls.Config) (net.Conn, error) {
nc.nextID++
nc.mu.Unlock()
// Timeout is a little arbitrary, but plenty long enough for even the
// highest latency links.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if tailcfg.CurrentCapabilityVersion > math.MaxUint16 {
// Panic, because a test should have started failing several
// thousand version numbers before getting to this point.
panic("capability version is too high to fit in the wire protocol")
}
var dialPlan *tailcfg.ControlDialPlan
if nc.dialPlan != nil {
dialPlan = nc.dialPlan()
}
// If we have a dial plan, then set our timeout as slightly longer than
// the maximum amount of time contained therein; we assume that
// explicit instructions on timeouts are more useful than a single
// hard-coded timeout.
//
// The default value of 5 is chosen so that, when there's no dial plan,
// we retain the previous behaviour of 10 seconds end-to-end timeout.
timeoutSec := 5.0
if dialPlan != nil {
for _, c := range dialPlan.Candidates {
if v := c.DialStartDelaySec + c.DialTimeoutSec; v > timeoutSec {
timeoutSec = v
}
}
}
// After we establish a connection, we need some time to actually
// upgrade it into a Noise connection. With a ballpark worst-case RTT
// of 1000ms, give ourselves an extra 5 seconds to complete the
// handshake.
timeoutSec += 5
// Be extremely defensive and ensure that the timeout is in the range
// [5, 60] seconds (e.g. if we accidentally get a negative number).
if timeoutSec > 60 {
timeoutSec = 60
} else if timeoutSec < 5 {
timeoutSec = 5
}
timeout := time.Duration(timeoutSec * float64(time.Second))
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
conn, err := (&controlhttp.Dialer{
Hostname: nc.host,
HTTPPort: nc.httpPort,
@@ -173,6 +216,7 @@ func (nc *noiseClient) dial(_, _ string, _ *tls.Config) (net.Conn, error) {
ControlKey: nc.serverPubKey,
ProtocolVersion: uint16(tailcfg.CurrentCapabilityVersion),
Dialer: nc.dialer.SystemDial,
DialPlan: dialPlan,
}).Dial(ctx)
if err != nil {
return nil, err