mirror of
https://github.com/netbirdio/netbird.git
synced 2025-02-02 19:39:17 +01:00
Add faster availability DNS probe and update test domain to .com (#2280)
* Add faster availability DNS probe and update test domain to .com - Count success queries and compare it before doing after network map probes. - Reduce the first dns probe to 500ms - Updated test domain with com instead of . due to Palo alto DNS proxy server issues * use fqdn * Update client/internal/dns/upstream.go Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com> --------- Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com>
This commit is contained in:
parent
e78ec2e985
commit
19147f518e
@ -24,7 +24,7 @@ const (
|
|||||||
probeTimeout = 2 * time.Second
|
probeTimeout = 2 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
const testRecord = "."
|
const testRecord = "com."
|
||||||
|
|
||||||
type upstreamClient interface {
|
type upstreamClient interface {
|
||||||
exchange(ctx context.Context, upstream string, r *dns.Msg) (*dns.Msg, time.Duration, error)
|
exchange(ctx context.Context, upstream string, r *dns.Msg) (*dns.Msg, time.Duration, error)
|
||||||
@ -42,6 +42,7 @@ type upstreamResolverBase struct {
|
|||||||
upstreamServers []string
|
upstreamServers []string
|
||||||
disabled bool
|
disabled bool
|
||||||
failsCount atomic.Int32
|
failsCount atomic.Int32
|
||||||
|
successCount atomic.Int32
|
||||||
failsTillDeact int32
|
failsTillDeact int32
|
||||||
mutex sync.Mutex
|
mutex sync.Mutex
|
||||||
reactivatePeriod time.Duration
|
reactivatePeriod time.Duration
|
||||||
@ -124,6 +125,7 @@ func (u *upstreamResolverBase) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u.successCount.Add(1)
|
||||||
log.Tracef("took %s to query the upstream %s", t, upstream)
|
log.Tracef("took %s to query the upstream %s", t, upstream)
|
||||||
|
|
||||||
err = w.WriteMsg(rm)
|
err = w.WriteMsg(rm)
|
||||||
@ -172,6 +174,11 @@ func (u *upstreamResolverBase) probeAvailability() {
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// avoid probe if upstreams could resolve at least one query and fails count is less than failsTillDeact
|
||||||
|
if u.successCount.Load() > 0 && u.failsCount.Load() < u.failsTillDeact {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
var success bool
|
var success bool
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
@ -183,7 +190,7 @@ func (u *upstreamResolverBase) probeAvailability() {
|
|||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
err := u.testNameserver(upstream)
|
err := u.testNameserver(upstream, 500*time.Millisecond)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errors = multierror.Append(errors, err)
|
errors = multierror.Append(errors, err)
|
||||||
log.Warnf("probing upstream nameserver %s: %s", upstream, err)
|
log.Warnf("probing upstream nameserver %s: %s", upstream, err)
|
||||||
@ -224,7 +231,7 @@ func (u *upstreamResolverBase) waitUntilResponse() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, upstream := range u.upstreamServers {
|
for _, upstream := range u.upstreamServers {
|
||||||
if err := u.testNameserver(upstream); err != nil {
|
if err := u.testNameserver(upstream, probeTimeout); err != nil {
|
||||||
log.Tracef("upstream check for %s: %s", upstream, err)
|
log.Tracef("upstream check for %s: %s", upstream, err)
|
||||||
} else {
|
} else {
|
||||||
// at least one upstream server is available, stop probing
|
// at least one upstream server is available, stop probing
|
||||||
@ -244,6 +251,7 @@ func (u *upstreamResolverBase) waitUntilResponse() {
|
|||||||
|
|
||||||
log.Infof("upstreams %s are responsive again. Adding them back to system", u.upstreamServers)
|
log.Infof("upstreams %s are responsive again. Adding them back to system", u.upstreamServers)
|
||||||
u.failsCount.Store(0)
|
u.failsCount.Store(0)
|
||||||
|
u.successCount.Add(1)
|
||||||
u.reactivate()
|
u.reactivate()
|
||||||
u.disabled = false
|
u.disabled = false
|
||||||
}
|
}
|
||||||
@ -265,13 +273,14 @@ func (u *upstreamResolverBase) disable(err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Warnf("Upstream resolving is Disabled for %v", reactivatePeriod)
|
log.Warnf("Upstream resolving is Disabled for %v", reactivatePeriod)
|
||||||
|
u.successCount.Store(0)
|
||||||
u.deactivate(err)
|
u.deactivate(err)
|
||||||
u.disabled = true
|
u.disabled = true
|
||||||
go u.waitUntilResponse()
|
go u.waitUntilResponse()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *upstreamResolverBase) testNameserver(server string) error {
|
func (u *upstreamResolverBase) testNameserver(server string, timeout time.Duration) error {
|
||||||
ctx, cancel := context.WithTimeout(u.ctx, probeTimeout)
|
ctx, cancel := context.WithTimeout(u.ctx, timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
r := new(dns.Msg).SetQuestion(testRecord, dns.TypeSOA)
|
r := new(dns.Msg).SetQuestion(testRecord, dns.TypeSOA)
|
||||||
|
Loading…
Reference in New Issue
Block a user