|
@ -2,6 +2,10 @@ package instrumentation |
|
|
|
|
|
|
|
|
import ( |
|
|
import ( |
|
|
"context" |
|
|
"context" |
|
|
|
|
|
"reflect" |
|
|
|
|
|
"sort" |
|
|
|
|
|
"strings" |
|
|
|
|
|
"sync" |
|
|
"time" |
|
|
"time" |
|
|
|
|
|
|
|
|
"github.com/99designs/gqlgen/graphql" |
|
|
"github.com/99designs/gqlgen/graphql" |
|
@ -13,15 +17,37 @@ const ( |
|
|
exitStatusSuccess = "success" |
|
|
exitStatusSuccess = "success" |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
var latencyMapMutex sync.Mutex |
|
|
|
|
|
var latencyMap = make(map[string]float64, 128) |
|
|
|
|
|
|
|
|
|
|
|
type ctxKeyType string |
|
|
|
|
|
|
|
|
|
|
|
var ctxKey = ctxKeyType("metricglobal") |
|
|
|
|
|
|
|
|
var ( |
|
|
var ( |
|
|
requestStartedCounter prometheusclient.Counter |
|
|
requestStartedCounter prometheusclient.Counter |
|
|
requestCompletedCounter prometheusclient.Counter |
|
|
requestCompletedCounter prometheusclient.Counter |
|
|
|
|
|
requestFailureCounter *prometheusclient.CounterVec |
|
|
|
|
|
requestSuccessCounter *prometheusclient.CounterVec |
|
|
|
|
|
requestComplexityHisto prometheusclient.Histogram |
|
|
resolverStartedCounter *prometheusclient.CounterVec |
|
|
resolverStartedCounter *prometheusclient.CounterVec |
|
|
resolverCompletedCounter *prometheusclient.CounterVec |
|
|
resolverCompletedCounter *prometheusclient.CounterVec |
|
|
timeToResolveField *prometheusclient.HistogramVec |
|
|
timeToResolveField *prometheusclient.HistogramVec |
|
|
timeToHandleRequest *prometheusclient.HistogramVec |
|
|
timeToHandleRequest *prometheusclient.HistogramVec |
|
|
|
|
|
timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec |
|
|
|
|
|
timeToHandleRequestQueryTotal *prometheusclient.CounterVec |
|
|
|
|
|
responseSizeMAvg *prometheusclient.GaugeVec |
|
|
|
|
|
responseSizeTotal *prometheusclient.CounterVec |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
// Globals is monitoring data.
|
|
|
|
|
|
type Globals struct { |
|
|
|
|
|
sync.Mutex |
|
|
|
|
|
|
|
|
|
|
|
Queries []string |
|
|
|
|
|
Failed bool |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// Register registers the prometheus metrics.
|
|
|
// Register registers the prometheus metrics.
|
|
|
func Register() { |
|
|
func Register() { |
|
|
requestStartedCounter = prometheusclient.NewCounter( |
|
|
requestStartedCounter = prometheusclient.NewCounter( |
|
@ -38,6 +64,29 @@ func Register() { |
|
|
}, |
|
|
}, |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
requestFailureCounter = prometheusclient.NewCounterVec( |
|
|
|
|
|
prometheusclient.CounterOpts{ |
|
|
|
|
|
Name: "graphql_request_failures", |
|
|
|
|
|
Help: "Number of requests with either success or failure status.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
requestSuccessCounter = prometheusclient.NewCounterVec( |
|
|
|
|
|
prometheusclient.CounterOpts{ |
|
|
|
|
|
Name: "graphql_request_successes", |
|
|
|
|
|
Help: "Number of requests with either success or failure status.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
requestComplexityHisto = prometheusclient.NewHistogram( |
|
|
|
|
|
prometheusclient.HistogramOpts{ |
|
|
|
|
|
Name: "graphql_request_complexity", |
|
|
|
|
|
Help: "The complexity value of incoming queries.", |
|
|
|
|
|
Buckets: prometheusclient.LinearBuckets(100, 25, 9), |
|
|
|
|
|
}, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
resolverStartedCounter = prometheusclient.NewCounterVec( |
|
|
resolverStartedCounter = prometheusclient.NewCounterVec( |
|
|
prometheusclient.CounterOpts{ |
|
|
prometheusclient.CounterOpts{ |
|
|
Name: "graphql_resolver_started_total", |
|
|
Name: "graphql_resolver_started_total", |
|
@ -66,13 +115,50 @@ func Register() { |
|
|
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), |
|
|
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), |
|
|
}, []string{"exitStatus"}) |
|
|
}, []string{"exitStatus"}) |
|
|
|
|
|
|
|
|
|
|
|
timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec( |
|
|
|
|
|
prometheusclient.GaugeOpts{ |
|
|
|
|
|
Name: "graphql_request_query_duration_mavg_ms", |
|
|
|
|
|
Help: "Moving average of graphql request query duration for a query set.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec( |
|
|
|
|
|
prometheusclient.CounterOpts{ |
|
|
|
|
|
Name: "graphql_request_query_duration_total_ms", |
|
|
|
|
|
Help: "Total duration spent responding to a query.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
responseSizeMAvg = prometheusclient.NewGaugeVec( |
|
|
|
|
|
prometheusclient.GaugeOpts{ |
|
|
|
|
|
Name: "graphql_response_size_mavg_bytes", |
|
|
|
|
|
Help: "Moving average of graphql response size for a query set.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
responseSizeTotal = prometheusclient.NewCounterVec( |
|
|
|
|
|
prometheusclient.CounterOpts{ |
|
|
|
|
|
Name: "graphql_response_size_total_bytes", |
|
|
|
|
|
Help: "Total bytes of response data to a query.", |
|
|
|
|
|
}, |
|
|
|
|
|
[]string{"queries"}, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
prometheusclient.MustRegister( |
|
|
prometheusclient.MustRegister( |
|
|
requestStartedCounter, |
|
|
requestStartedCounter, |
|
|
requestCompletedCounter, |
|
|
requestCompletedCounter, |
|
|
|
|
|
requestSuccessCounter, |
|
|
|
|
|
requestFailureCounter, |
|
|
|
|
|
requestComplexityHisto, |
|
|
resolverStartedCounter, |
|
|
resolverStartedCounter, |
|
|
resolverCompletedCounter, |
|
|
resolverCompletedCounter, |
|
|
timeToResolveField, |
|
|
timeToResolveField, |
|
|
timeToHandleRequest, |
|
|
timeToHandleRequest, |
|
|
|
|
|
timeToHandleRequestQueryMAvg, |
|
|
|
|
|
timeToHandleRequestQueryTotal, |
|
|
|
|
|
responseSizeMAvg, |
|
|
|
|
|
responseSizeTotal, |
|
|
) |
|
|
) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -80,10 +166,17 @@ func Register() { |
|
|
func UnRegister() { |
|
|
func UnRegister() { |
|
|
prometheusclient.Unregister(requestStartedCounter) |
|
|
prometheusclient.Unregister(requestStartedCounter) |
|
|
prometheusclient.Unregister(requestCompletedCounter) |
|
|
prometheusclient.Unregister(requestCompletedCounter) |
|
|
|
|
|
prometheusclient.Unregister(requestSuccessCounter) |
|
|
|
|
|
prometheusclient.Unregister(requestFailureCounter) |
|
|
|
|
|
prometheusclient.Unregister(requestComplexityHisto) |
|
|
prometheusclient.Unregister(resolverStartedCounter) |
|
|
prometheusclient.Unregister(resolverStartedCounter) |
|
|
prometheusclient.Unregister(resolverCompletedCounter) |
|
|
prometheusclient.Unregister(resolverCompletedCounter) |
|
|
prometheusclient.Unregister(timeToResolveField) |
|
|
prometheusclient.Unregister(timeToResolveField) |
|
|
prometheusclient.Unregister(timeToHandleRequest) |
|
|
prometheusclient.Unregister(timeToHandleRequest) |
|
|
|
|
|
prometheusclient.Unregister(timeToHandleRequestQueryMAvg) |
|
|
|
|
|
prometheusclient.Unregister(timeToHandleRequestQueryTotal) |
|
|
|
|
|
prometheusclient.Unregister(responseSizeMAvg) |
|
|
|
|
|
prometheusclient.Unregister(responseSizeTotal) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
|
|
|
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
|
|
@ -95,17 +188,36 @@ func ResolverMiddleware() graphql.FieldMiddleware { |
|
|
|
|
|
|
|
|
observerStart := time.Now() |
|
|
observerStart := time.Now() |
|
|
|
|
|
|
|
|
|
|
|
path := rctx.Path() |
|
|
|
|
|
if len(path) == 1 { |
|
|
|
|
|
if globals, ok := ctx.Value(ctxKey).(*Globals); ok { |
|
|
|
|
|
globals.Lock() |
|
|
|
|
|
if rctx.Field.Field != nil { |
|
|
|
|
|
globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")") |
|
|
|
|
|
} else if name, ok := path[0].(string); ok { |
|
|
|
|
|
globals.Queries = append(globals.Queries, name) |
|
|
|
|
|
} |
|
|
|
|
|
globals.Unlock() |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
res, err := next(ctx) |
|
|
res, err := next(ctx) |
|
|
|
|
|
|
|
|
var exitStatus string |
|
|
var exitStatus string |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
exitStatus = existStatusFailure |
|
|
exitStatus = existStatusFailure |
|
|
|
|
|
|
|
|
|
|
|
if globals, ok := ctx.Value(ctxKey).(*Globals); ok { |
|
|
|
|
|
globals.Lock() |
|
|
|
|
|
globals.Failed = true |
|
|
|
|
|
globals.Unlock() |
|
|
|
|
|
} |
|
|
} else { |
|
|
} else { |
|
|
exitStatus = exitStatusSuccess |
|
|
exitStatus = exitStatusSuccess |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
timeToResolveField.With(prometheusclient.Labels{"exitStatus": exitStatus}). |
|
|
|
|
|
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond))) |
|
|
|
|
|
|
|
|
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus} |
|
|
|
|
|
timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond))) |
|
|
|
|
|
|
|
|
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc() |
|
|
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc() |
|
|
|
|
|
|
|
@ -113,31 +225,122 @@ func ResolverMiddleware() graphql.FieldMiddleware { |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// RequestMiddleware is a request middleware that logs metrics for prometheus.
|
|
|
|
|
|
|
|
|
// RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
|
|
|
|
|
|
// overhead is less than 10µs.
|
|
|
func RequestMiddleware() graphql.RequestMiddleware { |
|
|
func RequestMiddleware() graphql.RequestMiddleware { |
|
|
return func(ctx context.Context, next func(ctx context.Context) []byte) []byte { |
|
|
return func(ctx context.Context, next func(ctx context.Context) []byte) []byte { |
|
|
requestStartedCounter.Inc() |
|
|
requestStartedCounter.Inc() |
|
|
|
|
|
|
|
|
observerStart := time.Now() |
|
|
|
|
|
|
|
|
// Store some values in a global context.
|
|
|
|
|
|
globals := Globals{} |
|
|
|
|
|
|
|
|
res := next(ctx) |
|
|
|
|
|
|
|
|
// Measure the time to serve the request
|
|
|
|
|
|
observerStart := time.Now() |
|
|
|
|
|
res := next(context.WithValue(ctx, ctxKey, &globals)) |
|
|
|
|
|
observedDuration := time.Since(observerStart) |
|
|
|
|
|
observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond) |
|
|
|
|
|
|
|
|
rctx := graphql.GetResolverContext(ctx) |
|
|
|
|
|
reqCtx := graphql.GetRequestContext(ctx) |
|
|
reqCtx := graphql.GetRequestContext(ctx) |
|
|
errList := reqCtx.GetErrors(rctx) |
|
|
|
|
|
|
|
|
|
|
|
var exitStatus string |
|
|
|
|
|
if len(errList) > 0 { |
|
|
|
|
|
exitStatus = existStatusFailure |
|
|
|
|
|
} else { |
|
|
|
|
|
exitStatus = exitStatusSuccess |
|
|
|
|
|
|
|
|
// exitStatus label
|
|
|
|
|
|
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess} |
|
|
|
|
|
if globals.Failed { |
|
|
|
|
|
exitStatusLabel["exitStatus"] = existStatusFailure |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
timeToHandleRequest.With(prometheusclient.Labels{"exitStatus": exitStatus}). |
|
|
|
|
|
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond))) |
|
|
|
|
|
|
|
|
// queries label
|
|
|
|
|
|
sort.Strings(globals.Queries) |
|
|
|
|
|
queriesStr := strings.Join(globals.Queries, ";") |
|
|
|
|
|
queriesLabel := prometheusclient.Labels{ |
|
|
|
|
|
"queries": queriesStr, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Write metrics
|
|
|
|
|
|
timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS) |
|
|
requestCompletedCounter.Inc() |
|
|
requestCompletedCounter.Inc() |
|
|
|
|
|
if len(globals.Queries) > 0 { |
|
|
|
|
|
if globals.Failed { |
|
|
|
|
|
requestFailureCounter.With(queriesLabel).Inc() |
|
|
|
|
|
} else { |
|
|
|
|
|
requestSuccessCounter.With(queriesLabel).Inc() |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS)) |
|
|
|
|
|
timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS) |
|
|
|
|
|
responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res)))) |
|
|
|
|
|
responseSizeTotal.With(queriesLabel).Add(float64(len(res))) |
|
|
|
|
|
} |
|
|
|
|
|
requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity)) |
|
|
|
|
|
|
|
|
return res |
|
|
return res |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func movingAvgLatency(key string, sample float64) float64 { |
|
|
|
|
|
latencyMapMutex.Lock() |
|
|
|
|
|
latency, ok := latencyMap[key] |
|
|
|
|
|
if ok { |
|
|
|
|
|
latency = (latency * 0.9) + (sample * 0.1) |
|
|
|
|
|
latencyMap[key] = latency |
|
|
|
|
|
} else { |
|
|
|
|
|
latencyMap[key] = sample |
|
|
|
|
|
latency = sample |
|
|
|
|
|
} |
|
|
|
|
|
latencyMapMutex.Unlock() |
|
|
|
|
|
|
|
|
|
|
|
return latency |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func argsLabel(args map[string]interface{}) string { |
|
|
|
|
|
result := "" |
|
|
|
|
|
|
|
|
|
|
|
for key, value := range args { |
|
|
|
|
|
if len(result) > 0 { |
|
|
|
|
|
result += "," |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
result = key |
|
|
|
|
|
|
|
|
|
|
|
rv := reflect.ValueOf(value) |
|
|
|
|
|
for rv.Kind() == reflect.Ptr { |
|
|
|
|
|
rv = rv.Elem() |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if rv.Kind() != reflect.Struct { |
|
|
|
|
|
continue |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
result += "{" |
|
|
|
|
|
|
|
|
|
|
|
first := true |
|
|
|
|
|
for i := 0; i < rv.NumField(); i++ { |
|
|
|
|
|
rf := rv.Field(i) |
|
|
|
|
|
if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() { |
|
|
|
|
|
continue |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
rtf := rv.Type().Field(i) |
|
|
|
|
|
|
|
|
|
|
|
// Empty limits are as good as no limit
|
|
|
|
|
|
if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 { |
|
|
|
|
|
continue |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if !first { |
|
|
|
|
|
result += "," |
|
|
|
|
|
} |
|
|
|
|
|
first = false |
|
|
|
|
|
|
|
|
|
|
|
jsonTag := rtf.Tag.Get("json") |
|
|
|
|
|
if jsonTag != "" { |
|
|
|
|
|
result += jsonTag |
|
|
|
|
|
} else { |
|
|
|
|
|
result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:] |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
result += "}" |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return result |
|
|
|
|
|
} |