Skip to content

Commit bb7a97b

Browse files
tixxdzjrfastab
authored andcommitted
tetragon: fix graceful shutdown and exit code
Right now tetragon always exit with 1, let's fix this and improve graceful shutdown, so service monitors and container managers... won't treat Tetragon as it failed. Some start up logic was re-ordered to improve how we cleanup things. Also this change will make tetragonExecute() return up to callers, and allow Tetragon main to exit with 0. Signed-off-by: Djalal Harouni <[email protected]>
1 parent f5f7f1b commit bb7a97b

File tree

2 files changed

+32
-20
lines changed

2 files changed

+32
-20
lines changed

cmd/tetragon/main.go

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ func stopProfile() {
119119
}
120120

121121
func tetragonExecute() error {
122+
ctx, cancel := context.WithCancel(context.Background())
123+
defer cancel()
124+
122125
sigs := make(chan os.Signal, 1)
123126
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
124127

@@ -143,6 +146,12 @@ func tetragonExecute() error {
143146
defaults.NetnsDir = viper.GetString(keyNetnsDir)
144147
}
145148

149+
// Setup file system mounts
150+
bpf.CheckOrMountFS("")
151+
bpf.CheckOrMountDebugFS()
152+
bpf.CheckOrMountCgroup2()
153+
154+
// Start profilers first as we have to capture them in signal handling
146155
if memProfile != "" {
147156
log.WithField("file", memProfile).Info("Starting mem profiling")
148157
}
@@ -160,25 +169,35 @@ func tetragonExecute() error {
160169
log.WithField("file", cpuProfile).Info("Starting cpu profiling")
161170
}
162171

163-
bpf.CheckOrMountFS("")
164-
bpf.CheckOrMountDebugFS()
165-
bpf.CheckOrMountCgroup2()
166-
167-
sensors.LogRegisteredSensorsAndProbes()
172+
defer stopProfile()
168173

174+
// Raise memory resource
169175
bpf.ConfigureResourceLimits()
176+
177+
// Get observer bpf maps and programs directory
170178
observerDir := getObserverDir()
171179
option.Config.BpfDir = observerDir
172180
option.Config.MapDir = observerDir
181+
182+
// Get observer from configFile
173183
obs := observer.NewObserver(configFile)
184+
defer func() {
185+
obs.PrintStats()
186+
obs.RemovePrograms()
187+
}()
188+
189+
go func() {
190+
s := <-sigs
191+
log.Infof("Received signal %s, shutting down...", s)
192+
cancel()
193+
}()
194+
195+
sensors.LogRegisteredSensorsAndProbes()
196+
174197
if err := obs.InitSensorManager(); err != nil {
175198
return err
176199
}
177200

178-
ctx, cancel := context.WithCancel(context.Background())
179-
defer cancel()
180-
var cancelWg sync.WaitGroup
181-
182201
/* Remove any stale programs, otherwise feature set change can cause
183202
* old programs to linger resulting in undefined behavior. And because
184203
* we recapture current running state from proc and/or have cache of
@@ -209,6 +228,9 @@ func tetragonExecute() error {
209228
return err
210229
}
211230

231+
var cancelWg sync.WaitGroup
232+
defer cancelWg.Wait()
233+
212234
pm, err := tetragonGrpc.NewProcessManager(
213235
ctx,
214236
&cancelWg,
@@ -226,16 +248,6 @@ func tetragonExecute() error {
226248
}
227249
}
228250

229-
go func() {
230-
<-sigs
231-
obs.PrintStats()
232-
obs.RemovePrograms()
233-
stopProfile()
234-
cancel()
235-
cancelWg.Wait()
236-
os.Exit(1)
237-
}()
238-
239251
log.WithField("enabled", exportFilename != "").WithField("fileName", exportFilename).Info("Exporter configuration")
240252
obs.AddListener(pm)
241253
saveInitInfo()

pkg/observer/observer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ func (k *Observer) runEvents(stopCtx context.Context, ready func()) error {
209209
for stopCtx.Err() == nil {
210210
record, err := perfReader.Read()
211211
if err != nil {
212-
// NOTE(JM): Keeping the old behaviour for now and just counting the errors without stopping
212+
// NOTE(JM and Djalal): count and log errors while excluding the stopping context
213213
if stopCtx.Err() == nil {
214214
k.errorCntr++
215215
ringbufmetrics.ErrorsSet(float64(k.errorCntr))

0 commit comments

Comments
 (0)