mirror of
https://github.com/netbirdio/netbird.git
synced 2025-01-23 14:28:51 +01:00
Add faster availability DNS probe and update test domain to .com (#2280)
* Add faster availability DNS probe and update test domain to .com - Count success queries and compare it before doing after network map probes. - Reduce the first dns probe to 500ms - Updated test domain with com instead of . due to Palo alto DNS proxy server issues * use fqdn * Update client/internal/dns/upstream.go Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com> --------- Co-authored-by: Viktor Liu <17948409+lixmal@users.noreply.github.com>
This commit is contained in:
parent
e78ec2e985
commit
19147f518e
@ -24,7 +24,7 @@ const (
|
||||
probeTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
const testRecord = "."
|
||||
const testRecord = "com."
|
||||
|
||||
type upstreamClient interface {
|
||||
exchange(ctx context.Context, upstream string, r *dns.Msg) (*dns.Msg, time.Duration, error)
|
||||
@ -42,6 +42,7 @@ type upstreamResolverBase struct {
|
||||
upstreamServers []string
|
||||
disabled bool
|
||||
failsCount atomic.Int32
|
||||
successCount atomic.Int32
|
||||
failsTillDeact int32
|
||||
mutex sync.Mutex
|
||||
reactivatePeriod time.Duration
|
||||
@ -124,6 +125,7 @@ func (u *upstreamResolverBase) ServeDNS(w dns.ResponseWriter, r *dns.Msg) {
|
||||
return
|
||||
}
|
||||
|
||||
u.successCount.Add(1)
|
||||
log.Tracef("took %s to query the upstream %s", t, upstream)
|
||||
|
||||
err = w.WriteMsg(rm)
|
||||
@ -172,6 +174,11 @@ func (u *upstreamResolverBase) probeAvailability() {
|
||||
default:
|
||||
}
|
||||
|
||||
// avoid probe if upstreams could resolve at least one query and fails count is less than failsTillDeact
|
||||
if u.successCount.Load() > 0 && u.failsCount.Load() < u.failsTillDeact {
|
||||
return
|
||||
}
|
||||
|
||||
var success bool
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
@ -183,7 +190,7 @@ func (u *upstreamResolverBase) probeAvailability() {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
err := u.testNameserver(upstream)
|
||||
err := u.testNameserver(upstream, 500*time.Millisecond)
|
||||
if err != nil {
|
||||
errors = multierror.Append(errors, err)
|
||||
log.Warnf("probing upstream nameserver %s: %s", upstream, err)
|
||||
@ -224,7 +231,7 @@ func (u *upstreamResolverBase) waitUntilResponse() {
|
||||
}
|
||||
|
||||
for _, upstream := range u.upstreamServers {
|
||||
if err := u.testNameserver(upstream); err != nil {
|
||||
if err := u.testNameserver(upstream, probeTimeout); err != nil {
|
||||
log.Tracef("upstream check for %s: %s", upstream, err)
|
||||
} else {
|
||||
// at least one upstream server is available, stop probing
|
||||
@ -244,6 +251,7 @@ func (u *upstreamResolverBase) waitUntilResponse() {
|
||||
|
||||
log.Infof("upstreams %s are responsive again. Adding them back to system", u.upstreamServers)
|
||||
u.failsCount.Store(0)
|
||||
u.successCount.Add(1)
|
||||
u.reactivate()
|
||||
u.disabled = false
|
||||
}
|
||||
@ -265,13 +273,14 @@ func (u *upstreamResolverBase) disable(err error) {
|
||||
}
|
||||
|
||||
log.Warnf("Upstream resolving is Disabled for %v", reactivatePeriod)
|
||||
u.successCount.Store(0)
|
||||
u.deactivate(err)
|
||||
u.disabled = true
|
||||
go u.waitUntilResponse()
|
||||
}
|
||||
|
||||
func (u *upstreamResolverBase) testNameserver(server string) error {
|
||||
ctx, cancel := context.WithTimeout(u.ctx, probeTimeout)
|
||||
func (u *upstreamResolverBase) testNameserver(server string, timeout time.Duration) error {
|
||||
ctx, cancel := context.WithTimeout(u.ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
r := new(dns.Msg).SetQuestion(testRecord, dns.TypeSOA)
|
||||
|
Loading…
Reference in New Issue
Block a user