[client] add profiling dumps to debug package (#3517)

enhances debugging capabilities by adding support for goroutine, mutex, and block profiling while updating state dump tracking and refining test and release settings.

- Adds pprof-based profiling for goroutine, mutex, and block profiles in the debug bundle.
- Updates state dump functionality by incorporating new status and key fields.
- Adjusts test validations and default flag/retention settings.
This commit is contained in:
Maycon Santos 2025-03-23 13:46:09 +01:00 committed by GitHub
parent 051a5a4adc
commit bd8f0c1ef3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 71 additions and 14 deletions

View File

@ -87,25 +87,25 @@ jobs:
with:
name: release
path: dist/
retention-days: 3
retention-days: 7
- name: upload linux packages
uses: actions/upload-artifact@v4
with:
name: linux-packages
path: dist/netbird_linux**
retention-days: 3
retention-days: 7
- name: upload windows packages
uses: actions/upload-artifact@v4
with:
name: windows-packages
path: dist/netbird_windows**
retention-days: 3
retention-days: 7
- name: upload macos packages
uses: actions/upload-artifact@v4
with:
name: macos-packages
path: dist/netbird_darwin**
retention-days: 3
retention-days: 7
release_ui:
runs-on: ubuntu-latest

View File

@ -180,7 +180,7 @@ func init() {
upCmd.PersistentFlags().BoolVar(&serverSSHAllowed, serverSSHAllowedFlag, false, "Allow SSH server on peer. If enabled, the SSH server will be permitted")
upCmd.PersistentFlags().BoolVar(&autoConnectDisabled, disableAutoConnectFlag, false, "Disables auto-connect feature. If enabled, then the client won't connect automatically when the service starts.")
debugCmd.PersistentFlags().BoolVarP(&debugSystemInfoFlag, systemInfoFlag, "S", false, "Adds system information to the debug bundle")
debugCmd.PersistentFlags().BoolVarP(&debugSystemInfoFlag, systemInfoFlag, "S", true, "Adds system information to the debug bundle")
}
// SetupCloseHandler handles SIGTERM signal and exits with success

View File

@ -140,7 +140,7 @@ func NewConn(engineCtx context.Context, config ConnConfig, statusRecorder *Statu
statusRelay: NewAtomicConnStatus(),
statusICE: NewAtomicConnStatus(),
semaphore: semaphore,
dumpState: newStateDump(connLog),
dumpState: newStateDump(config.Key, connLog, statusRecorder),
}
ctrl := isController(config)
@ -258,7 +258,7 @@ func (conn *Conn) Close() {
// doesn't block, discards the message if connection wasn't ready
func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) bool {
conn.dumpState.RemoteAnswer()
conn.log.Infof("OnRemoteAnswer, status ICE: %s, status relay: %s", conn.statusICE, conn.statusRelay)
conn.log.Infof("OnRemoteAnswer, priority: %s, status ICE: %s, status relay: %s", conn.currentConnPriority, conn.statusICE, conn.statusRelay)
return conn.handshaker.OnRemoteAnswer(answer)
}

View File

@ -10,6 +10,8 @@ import (
type stateDump struct {
log *log.Entry
status *Status
key string
sentOffer int
remoteOffer int
@ -24,9 +26,11 @@ type stateDump struct {
mu sync.Mutex
}
func newStateDump(log *log.Entry) *stateDump {
func newStateDump(key string, log *log.Entry, statusRecorder *Status) *stateDump {
return &stateDump{
log: log,
status: statusRecorder,
key: key,
}
}
@ -66,8 +70,14 @@ func (s *stateDump) dumpState() {
s.mu.Lock()
defer s.mu.Unlock()
s.log.Infof("Dump stat: SentOffer: %d, RemoteOffer: %d, RemoteAnswer: %d, RemoteCandidate: %d, P2PConnected: %d, SwitchToRelay: %d, WGCheckSuccess: %d, RelayConnected: %d, LocalProxies: %d",
s.sentOffer, s.remoteOffer, s.remoteAnswer, s.remoteCandidate, s.p2pConnected, s.switchToRelay, s.wgCheckSuccess, s.relayConnected, s.localProxies)
status := "unknown"
state, e := s.status.GetPeer(s.key)
if e == nil {
status = state.ConnStatus.String()
}
s.log.Infof("Dump stat: Status: %s, SentOffer: %d, RemoteOffer: %d, RemoteAnswer: %d, RemoteCandidate: %d, P2PConnected: %d, SwitchToRelay: %d, WGCheckSuccess: %d, RelayConnected: %d, LocalProxies: %d",
status, s.sentOffer, s.remoteOffer, s.remoteAnswer, s.remoteCandidate, s.p2pConnected, s.switchToRelay, s.wgCheckSuccess, s.relayConnected, s.localProxies)
}
func (s *stateDump) RemoteAnswer() {

View File

@ -43,7 +43,7 @@ func TestWGWatcher_EnableWgWatcher(t *testing.T) {
mlog := log.WithField("peer", "tet")
mocWgIface := &MocWgIface{}
watcher := NewWGWatcher(mlog, mocWgIface, "", newStateDump(mlog))
watcher := NewWGWatcher(mlog, mocWgIface, "", newStateDump("peer", mlog, &Status{}))
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -72,7 +72,7 @@ func TestWGWatcher_ReEnable(t *testing.T) {
mlog := log.WithField("peer", "tet")
mocWgIface := &MocWgIface{}
watcher := NewWGWatcher(mlog, mocWgIface, "", newStateDump(mlog))
watcher := NewWGWatcher(mlog, mocWgIface, "", newStateDump("peer", mlog, &Status{}))
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

View File

@ -17,6 +17,7 @@ import (
"os"
"path/filepath"
"runtime"
"runtime/pprof"
"sort"
"strings"
"time"
@ -46,6 +47,9 @@ nftables.txt: Anonymized nftables rules with packet counters, if --system-info f
config.txt: Anonymized configuration information of the NetBird client.
network_map.json: Anonymized network map containing peer configurations, routes, DNS settings, and firewall rules.
state.json: Anonymized client state dump containing netbird states.
mutex.prof: Mutex profiling information.
goroutine.prof: Goroutine profiling information.
block.prof: Block profiling information.
Anonymization Process
@ -88,6 +92,14 @@ The state file follows the same anonymization rules as other files:
- Domain names are consistently anonymized
- Technical identifiers and non-sensitive data remain unchanged
Mutex, Goroutines, and Block Profiling Files
The goroutine, block, and mutex profiling files contains process information that might help the NetBird team diagnose performance issues. The information in these files don't contain personal data.
You can check each using the following go command:
go tool pprof -http=:8088 mutex.prof
This will open a web browser tab with the profiling information.
Routes
For anonymized routes, the IP addresses are replaced as described above. The prefix length remains unchanged. Note that for prefixes, the anonymized IP might not be a network address, but the prefix length is still correct.
@ -188,6 +200,10 @@ func (s *Server) createArchive(bundlePath *os.File, req *proto.DebugBundleReques
s.addSystemInfo(req, anonymizer, archive)
}
if err := s.addProf(req, anonymizer, archive); err != nil {
log.Errorf("Failed to add goroutines rules to debug bundle: %v", err)
}
if err := s.addNetworkMap(req, anonymizer, archive); err != nil {
return fmt.Errorf("add network map: %w", err)
}
@ -310,6 +326,29 @@ func (s *Server) addCommonConfigFields(configContent *strings.Builder) {
configContent.WriteString(fmt.Sprintf("BlockLANAccess: %v\n", s.config.BlockLANAccess))
}
func (s *Server) addProf(req *proto.DebugBundleRequest, anonymizer *anonymize.Anonymizer, archive *zip.Writer) error {
runtime.SetBlockProfileRate(1)
_ = runtime.SetMutexProfileFraction(1)
defer runtime.SetBlockProfileRate(0)
defer runtime.SetMutexProfileFraction(0)
time.Sleep(5 * time.Second)
for _, profile := range []string{"goroutine", "block", "mutex"} {
var buff []byte
myBuff := bytes.NewBuffer(buff)
err := pprof.Lookup(profile).WriteTo(myBuff, 0)
if err != nil {
return fmt.Errorf("write %s profile: %w", profile, err)
}
if err := addFileToZip(archive, myBuff, profile+".prof"); err != nil {
return fmt.Errorf("add %s file to zip: %w", profile, err)
}
}
return nil
}
func (s *Server) addRoutes(req *proto.DebugBundleRequest, anonymizer *anonymize.Anonymizer, archive *zip.Writer) error {
routes, err := systemops.GetRoutesFromTable()
if err != nil {

View File

@ -220,6 +220,10 @@ func generateAccountSQLTypes(account *types.Account) {
account.SetupKeysG = append(account.SetupKeysG, *key)
}
if len(account.SetupKeys) != len(account.SetupKeysG) {
log.Warnf("SetupKeysG length mismatch for account %s", account.Id)
}
for id, peer := range account.Peers {
peer.ID = id
account.PeersG = append(account.PeersG, *peer)

View File

@ -148,6 +148,10 @@ func runLargeTest(t *testing.T, store Store) {
account.NameServerGroups[nameserver.ID] = nameserver
setupKey, _ := types.GenerateDefaultSetupKey()
_, exists := account.SetupKeys[setupKey.Key]
if exists {
t.Errorf("setup key already exists")
}
account.SetupKeys[setupKey.Key] = setupKey
}