|
|
package instrumentation
import ( "context" "reflect" "sort" "strings" "sync" "time"
"github.com/99designs/gqlgen/graphql" prometheusclient "github.com/prometheus/client_golang/prometheus" )
const ( existStatusFailure = "failure" exitStatusSuccess = "success" )
var latencyMapMutex sync.Mutex var latencyMap = make(map[string]float64, 128)
type ctxKeyType string
var ctxKey = ctxKeyType("metricglobal")
var ( requestStartedCounter prometheusclient.Counter requestCompletedCounter prometheusclient.Counter requestFailureCounter *prometheusclient.CounterVec requestSuccessCounter *prometheusclient.CounterVec requestComplexityHisto prometheusclient.Histogram resolverStartedCounter *prometheusclient.CounterVec resolverCompletedCounter *prometheusclient.CounterVec timeToResolveField *prometheusclient.HistogramVec timeToHandleRequest *prometheusclient.HistogramVec timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec timeToHandleRequestQueryTotal *prometheusclient.CounterVec responseSizeMAvg *prometheusclient.GaugeVec responseSizeTotal *prometheusclient.CounterVec )
// Globals is monitoring data.
type Globals struct { sync.Mutex
Queries []string Failed bool }
// Register registers the prometheus metrics.
func Register() { requestStartedCounter = prometheusclient.NewCounter( prometheusclient.CounterOpts{ Name: "graphql_request_started_total", Help: "Total number of requests started on the graphql server.", }, )
requestCompletedCounter = prometheusclient.NewCounter( prometheusclient.CounterOpts{ Name: "graphql_request_completed_total", Help: "Total number of requests completed on the graphql server.", }, )
requestFailureCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_failures", Help: "Number of requests with either success or failure status.", }, []string{"queries"}, ) requestSuccessCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_successes", Help: "Number of requests with either success or failure status.", }, []string{"queries"}, )
requestComplexityHisto = prometheusclient.NewHistogram( prometheusclient.HistogramOpts{ Name: "graphql_request_complexity", Help: "The complexity value of incoming queries.", Buckets: prometheusclient.LinearBuckets(100, 25, 9), }, )
resolverStartedCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_resolver_started_total", Help: "Total number of resolver started on the graphql server.", }, []string{"object", "field"}, )
resolverCompletedCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_resolver_completed_total", Help: "Total number of resolver completed on the graphql server.", }, []string{"object", "field"}, )
timeToResolveField = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{ Name: "graphql_resolver_duration_ms", Help: "The time taken to resolve a field by graphql server.", Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), }, []string{"exitStatus"})
timeToHandleRequest = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{ Name: "graphql_request_duration_ms", Help: "The time taken to handle a request by graphql server.", Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), }, []string{"exitStatus"})
timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec( prometheusclient.GaugeOpts{ Name: "graphql_request_query_duration_mavg_ms", Help: "Moving average of graphql request query duration for a query set.", }, []string{"queries"}, ) timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_query_duration_total_ms", Help: "Total duration spent responding to a query.", }, []string{"queries"}, )
responseSizeMAvg = prometheusclient.NewGaugeVec( prometheusclient.GaugeOpts{ Name: "graphql_response_size_mavg_bytes", Help: "Moving average of graphql response size for a query set.", }, []string{"queries"}, ) responseSizeTotal = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_response_size_total_bytes", Help: "Total bytes of response data to a query.", }, []string{"queries"}, )
prometheusclient.MustRegister( requestStartedCounter, requestCompletedCounter, requestSuccessCounter, requestFailureCounter, requestComplexityHisto, resolverStartedCounter, resolverCompletedCounter, timeToResolveField, timeToHandleRequest, timeToHandleRequestQueryMAvg, timeToHandleRequestQueryTotal, responseSizeMAvg, responseSizeTotal, ) }
// UnRegister unregisters
func UnRegister() { prometheusclient.Unregister(requestStartedCounter) prometheusclient.Unregister(requestCompletedCounter) prometheusclient.Unregister(requestSuccessCounter) prometheusclient.Unregister(requestFailureCounter) prometheusclient.Unregister(requestComplexityHisto) prometheusclient.Unregister(resolverStartedCounter) prometheusclient.Unregister(resolverCompletedCounter) prometheusclient.Unregister(timeToResolveField) prometheusclient.Unregister(timeToHandleRequest) prometheusclient.Unregister(timeToHandleRequestQueryMAvg) prometheusclient.Unregister(timeToHandleRequestQueryTotal) prometheusclient.Unregister(responseSizeMAvg) prometheusclient.Unregister(responseSizeTotal) }
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
func ResolverMiddleware() graphql.FieldMiddleware { return func(ctx context.Context, next graphql.Resolver) (interface{}, error) { rctx := graphql.GetResolverContext(ctx)
resolverStartedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
observerStart := time.Now()
path := rctx.Path() if len(path) == 1 { if globals, ok := ctx.Value(ctxKey).(*Globals); ok { globals.Lock() if rctx.Field.Field != nil { globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")") } else if name, ok := path[0].(string); ok { globals.Queries = append(globals.Queries, name) } globals.Unlock() } }
res, err := next(ctx)
var exitStatus string if err != nil { exitStatus = existStatusFailure
if globals, ok := ctx.Value(ctxKey).(*Globals); ok { globals.Lock() globals.Failed = true globals.Unlock() } } else { exitStatus = exitStatusSuccess }
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus} timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
return res, err } }
// RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
// overhead is less than 10µs.
func RequestMiddleware() graphql.RequestMiddleware { return func(ctx context.Context, next func(ctx context.Context) []byte) []byte { requestStartedCounter.Inc()
// Store some values in a global context.
globals := Globals{}
// Measure the time to serve the request
observerStart := time.Now() res := next(context.WithValue(ctx, ctxKey, &globals)) observedDuration := time.Since(observerStart) observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond)
reqCtx := graphql.GetRequestContext(ctx)
// exitStatus label
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess} if globals.Failed { exitStatusLabel["exitStatus"] = existStatusFailure }
// queries label
sort.Strings(globals.Queries) queriesStr := strings.Join(globals.Queries, ";") queriesLabel := prometheusclient.Labels{ "queries": queriesStr, }
// Write metrics
timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS) requestCompletedCounter.Inc() if len(globals.Queries) > 0 { if globals.Failed { requestFailureCounter.With(queriesLabel).Inc() } else { requestSuccessCounter.With(queriesLabel).Inc() }
timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS)) timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS) responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res)))) responseSizeTotal.With(queriesLabel).Add(float64(len(res))) } requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity))
return res } }
func movingAvgLatency(key string, sample float64) float64 { latencyMapMutex.Lock() latency, ok := latencyMap[key] if ok { latency = (latency * 0.9) + (sample * 0.1) latencyMap[key] = latency } else { latencyMap[key] = sample latency = sample } latencyMapMutex.Unlock()
return latency }
func argsLabel(args map[string]interface{}) string { result := ""
for key, value := range args { if len(result) > 0 { result += "," }
result = key
rv := reflect.ValueOf(value) for rv.Kind() == reflect.Ptr { rv = rv.Elem() }
if rv.Kind() != reflect.Struct { continue }
result += "{"
first := true for i := 0; i < rv.NumField(); i++ { rf := rv.Field(i) if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() { continue }
rtf := rv.Type().Field(i)
// Empty limits are as good as no limit
if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 { continue }
if !first { result += "," } first = false
jsonTag := rtf.Tag.Get("json") if jsonTag != "" { result += jsonTag } else { result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:] } }
result += "}" }
return result }
|