Browse Source

Instrumented the living s*** out of the graphql.

thegreatrefactor 1.5.0
Gisle Aune 5 years ago
parent
commit
6fa9f64f2d
  1. 243
      internal/instrumentation/prometheus.go

243
internal/instrumentation/prometheus.go

@ -2,6 +2,10 @@ package instrumentation
import (
"context"
"reflect"
"sort"
"strings"
"sync"
"time"
"github.com/99designs/gqlgen/graphql"
@ -13,15 +17,37 @@ const (
exitStatusSuccess = "success"
)
var latencyMapMutex sync.Mutex
var latencyMap = make(map[string]float64, 128)
type ctxKeyType string
var ctxKey = ctxKeyType("metricglobal")
var (
requestStartedCounter prometheusclient.Counter
requestCompletedCounter prometheusclient.Counter
resolverStartedCounter *prometheusclient.CounterVec
resolverCompletedCounter *prometheusclient.CounterVec
timeToResolveField *prometheusclient.HistogramVec
timeToHandleRequest *prometheusclient.HistogramVec
requestStartedCounter prometheusclient.Counter
requestCompletedCounter prometheusclient.Counter
requestFailureCounter *prometheusclient.CounterVec
requestSuccessCounter *prometheusclient.CounterVec
requestComplexityHisto prometheusclient.Histogram
resolverStartedCounter *prometheusclient.CounterVec
resolverCompletedCounter *prometheusclient.CounterVec
timeToResolveField *prometheusclient.HistogramVec
timeToHandleRequest *prometheusclient.HistogramVec
timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec
timeToHandleRequestQueryTotal *prometheusclient.CounterVec
responseSizeMAvg *prometheusclient.GaugeVec
responseSizeTotal *prometheusclient.CounterVec
)
// Globals is monitoring data.
type Globals struct {
sync.Mutex
Queries []string
Failed bool
}
// Register registers the prometheus metrics.
func Register() {
requestStartedCounter = prometheusclient.NewCounter(
@ -38,6 +64,29 @@ func Register() {
},
)
requestFailureCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_failures",
Help: "Number of requests with either success or failure status.",
},
[]string{"queries"},
)
requestSuccessCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_successes",
Help: "Number of requests with either success or failure status.",
},
[]string{"queries"},
)
requestComplexityHisto = prometheusclient.NewHistogram(
prometheusclient.HistogramOpts{
Name: "graphql_request_complexity",
Help: "The complexity value of incoming queries.",
Buckets: prometheusclient.LinearBuckets(100, 25, 9),
},
)
resolverStartedCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_resolver_started_total",
@ -66,13 +115,50 @@ func Register() {
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
}, []string{"exitStatus"})
timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec(
prometheusclient.GaugeOpts{
Name: "graphql_request_query_duration_mavg_ms",
Help: "Moving average of graphql request query duration for a query set.",
},
[]string{"queries"},
)
timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_query_duration_total_ms",
Help: "Total duration spent responding to a query.",
},
[]string{"queries"},
)
responseSizeMAvg = prometheusclient.NewGaugeVec(
prometheusclient.GaugeOpts{
Name: "graphql_response_size_mavg_bytes",
Help: "Moving average of graphql response size for a query set.",
},
[]string{"queries"},
)
responseSizeTotal = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_response_size_total_bytes",
Help: "Total bytes of response data to a query.",
},
[]string{"queries"},
)
prometheusclient.MustRegister(
requestStartedCounter,
requestCompletedCounter,
requestSuccessCounter,
requestFailureCounter,
requestComplexityHisto,
resolverStartedCounter,
resolverCompletedCounter,
timeToResolveField,
timeToHandleRequest,
timeToHandleRequestQueryMAvg,
timeToHandleRequestQueryTotal,
responseSizeMAvg,
responseSizeTotal,
)
}
@ -80,10 +166,17 @@ func Register() {
func UnRegister() {
prometheusclient.Unregister(requestStartedCounter)
prometheusclient.Unregister(requestCompletedCounter)
prometheusclient.Unregister(requestSuccessCounter)
prometheusclient.Unregister(requestFailureCounter)
prometheusclient.Unregister(requestComplexityHisto)
prometheusclient.Unregister(resolverStartedCounter)
prometheusclient.Unregister(resolverCompletedCounter)
prometheusclient.Unregister(timeToResolveField)
prometheusclient.Unregister(timeToHandleRequest)
prometheusclient.Unregister(timeToHandleRequestQueryMAvg)
prometheusclient.Unregister(timeToHandleRequestQueryTotal)
prometheusclient.Unregister(responseSizeMAvg)
prometheusclient.Unregister(responseSizeTotal)
}
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
@ -95,17 +188,36 @@ func ResolverMiddleware() graphql.FieldMiddleware {
observerStart := time.Now()
path := rctx.Path()
if len(path) == 1 {
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
globals.Lock()
if rctx.Field.Field != nil {
globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")")
} else if name, ok := path[0].(string); ok {
globals.Queries = append(globals.Queries, name)
}
globals.Unlock()
}
}
res, err := next(ctx)
var exitStatus string
if err != nil {
exitStatus = existStatusFailure
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
globals.Lock()
globals.Failed = true
globals.Unlock()
}
} else {
exitStatus = exitStatusSuccess
}
timeToResolveField.With(prometheusclient.Labels{"exitStatus": exitStatus}).
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus}
timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
@ -113,31 +225,122 @@ func ResolverMiddleware() graphql.FieldMiddleware {
}
}
// RequestMiddleware is a request middleware that logs metrics for prometheus.
// RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
// overhead is less than 10µs.
func RequestMiddleware() graphql.RequestMiddleware {
return func(ctx context.Context, next func(ctx context.Context) []byte) []byte {
requestStartedCounter.Inc()
observerStart := time.Now()
// Store some values in a global context.
globals := Globals{}
res := next(ctx)
// Measure the time to serve the request
observerStart := time.Now()
res := next(context.WithValue(ctx, ctxKey, &globals))
observedDuration := time.Since(observerStart)
observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond)
rctx := graphql.GetResolverContext(ctx)
reqCtx := graphql.GetRequestContext(ctx)
errList := reqCtx.GetErrors(rctx)
var exitStatus string
if len(errList) > 0 {
exitStatus = existStatusFailure
} else {
exitStatus = exitStatusSuccess
// exitStatus label
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess}
if globals.Failed {
exitStatusLabel["exitStatus"] = existStatusFailure
}
timeToHandleRequest.With(prometheusclient.Labels{"exitStatus": exitStatus}).
Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
// queries label
sort.Strings(globals.Queries)
queriesStr := strings.Join(globals.Queries, ";")
queriesLabel := prometheusclient.Labels{
"queries": queriesStr,
}
// Write metrics
timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS)
requestCompletedCounter.Inc()
if len(globals.Queries) > 0 {
if globals.Failed {
requestFailureCounter.With(queriesLabel).Inc()
} else {
requestSuccessCounter.With(queriesLabel).Inc()
}
timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS))
timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS)
responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res))))
responseSizeTotal.With(queriesLabel).Add(float64(len(res)))
}
requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity))
return res
}
}
func movingAvgLatency(key string, sample float64) float64 {
latencyMapMutex.Lock()
latency, ok := latencyMap[key]
if ok {
latency = (latency * 0.9) + (sample * 0.1)
latencyMap[key] = latency
} else {
latencyMap[key] = sample
latency = sample
}
latencyMapMutex.Unlock()
return latency
}
func argsLabel(args map[string]interface{}) string {
result := ""
for key, value := range args {
if len(result) > 0 {
result += ","
}
result = key
rv := reflect.ValueOf(value)
for rv.Kind() == reflect.Ptr {
rv = rv.Elem()
}
if rv.Kind() != reflect.Struct {
continue
}
result += "{"
first := true
for i := 0; i < rv.NumField(); i++ {
rf := rv.Field(i)
if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() {
continue
}
rtf := rv.Type().Field(i)
// Empty limits are as good as no limit
if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 {
continue
}
if !first {
result += ","
}
first = false
jsonTag := rtf.Tag.Get("json")
if jsonTag != "" {
result += jsonTag
} else {
result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:]
}
}
result += "}"
}
return result
}
Loading…
Cancel
Save