Skip to content

tetragon: fix graceful shutdown and exit code #520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 31 additions & 19 deletions cmd/tetragon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ func stopProfile() {
}

func tetragonExecute() error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)

Expand All @@ -143,6 +146,12 @@ func tetragonExecute() error {
defaults.NetnsDir = viper.GetString(keyNetnsDir)
}

// Setup file system mounts
bpf.CheckOrMountFS("")
bpf.CheckOrMountDebugFS()
bpf.CheckOrMountCgroup2()

// Start profilers first as we have to capture them in signal handling
if memProfile != "" {
log.WithField("file", memProfile).Info("Starting mem profiling")
}
Expand All @@ -160,25 +169,35 @@ func tetragonExecute() error {
log.WithField("file", cpuProfile).Info("Starting cpu profiling")
}

bpf.CheckOrMountFS("")
bpf.CheckOrMountDebugFS()
bpf.CheckOrMountCgroup2()

sensors.LogRegisteredSensorsAndProbes()
defer stopProfile()

// Raise memory resource
bpf.ConfigureResourceLimits()

// Get observer bpf maps and programs directory
observerDir := getObserverDir()
option.Config.BpfDir = observerDir
option.Config.MapDir = observerDir

// Get observer from configFile
obs := observer.NewObserver(configFile)
defer func() {
obs.PrintStats()
obs.RemovePrograms()
}()

go func() {
s := <-sigs
log.Infof("Received signal %s, shutting down...", s)
cancel()
}()

sensors.LogRegisteredSensorsAndProbes()

if err := obs.InitSensorManager(); err != nil {
return err
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var cancelWg sync.WaitGroup

/* Remove any stale programs, otherwise feature set change can cause
* old programs to linger resulting in undefined behavior. And because
* we recapture current running state from proc and/or have cache of
Expand Down Expand Up @@ -209,6 +228,9 @@ func tetragonExecute() error {
return err
}

var cancelWg sync.WaitGroup
defer cancelWg.Wait()

pm, err := tetragonGrpc.NewProcessManager(
ctx,
&cancelWg,
Expand All @@ -226,16 +248,6 @@ func tetragonExecute() error {
}
}

go func() {
<-sigs
obs.PrintStats()
obs.RemovePrograms()
stopProfile()
cancel()
cancelWg.Wait()
os.Exit(1)
}()

log.WithField("enabled", exportFilename != "").WithField("fileName", exportFilename).Info("Exporter configuration")
obs.AddListener(pm)
saveInitInfo()
Expand Down
2 changes: 1 addition & 1 deletion pkg/observer/observer.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ func (k *Observer) runEvents(stopCtx context.Context, ready func()) error {
for stopCtx.Err() == nil {
record, err := perfReader.Read()
if err != nil {
// NOTE(JM): Keeping the old behaviour for now and just counting the errors without stopping
// NOTE(JM and Djalal): count and log errors while excluding the stopping context
if stopCtx.Err() == nil {
k.errorCntr++
ringbufmetrics.ErrorsSet(float64(k.errorCntr))
Expand Down