Debug Health route latency.

This commit is contained in:
Maximilian Paß
2024-01-26 12:55:23 +01:00
committed by Sebastian Serth
parent c1dbd105c6
commit 213628b958

View File

@ -1,13 +1,18 @@
package api package api
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"github.com/coreos/go-systemd/v22/daemon"
"github.com/openHPI/poseidon/internal/config" "github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment" "github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"net/http" "net/http"
"os"
"runtime/pprof"
"strings" "strings"
"time"
) )
var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting") var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting")
@ -17,6 +22,10 @@ var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting"
// If it is not, the response won't reach the client. // If it is not, the response won't reach the client.
func Health(manager environment.Manager) http.HandlerFunc { func Health(manager environment.Manager) http.HandlerFunc {
return func(writer http.ResponseWriter, request *http.Request) { return func(writer http.ResponseWriter, request *http.Request) {
ctx, cancel := context.WithCancel(request.Context())
defer cancel()
go debugGoroutines(ctx)
if err := checkPrewarmingPool(manager); err != nil { if err := checkPrewarmingPool(manager); err != nil {
sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: dto.PrewarmingPoolDepleting}, sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: dto.PrewarmingPoolDepleting},
http.StatusServiceUnavailable, request.Context()) http.StatusServiceUnavailable, request.Context())
@ -40,3 +49,24 @@ func checkPrewarmingPool(manager environment.Manager) error {
} }
return nil return nil
} }
// debugGoroutines temporarily debugs a behavior where we observe long latencies in the Health route.
func debugGoroutines(ctx context.Context) {
interval, err := daemon.SdWatchdogEnabled(false)
if err != nil || interval == 0 {
return
}
log.Trace("Starting timeout for debugging the Goroutines")
const notificationIntervalFactor = 3
select {
case <-ctx.Done():
return
case <-time.After(interval / notificationIntervalFactor):
log.Warn("Health route latency is too high")
err := pprof.Lookup("goroutine").WriteTo(os.Stderr, 1)
if err != nil {
log.WithError(err).Warn("Failed to log the goroutines")
}
}
}