You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
346 lines
9.7 KiB
346 lines
9.7 KiB
package instrumentation
|
|
|
|
import (
|
|
"context"
|
|
"reflect"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/99designs/gqlgen/graphql"
|
|
prometheusclient "github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
const (
|
|
existStatusFailure = "failure"
|
|
exitStatusSuccess = "success"
|
|
)
|
|
|
|
var latencyMapMutex sync.Mutex
|
|
var latencyMap = make(map[string]float64, 128)
|
|
|
|
type ctxKeyType string
|
|
|
|
var ctxKey = ctxKeyType("metricglobal")
|
|
|
|
var (
|
|
requestStartedCounter prometheusclient.Counter
|
|
requestCompletedCounter prometheusclient.Counter
|
|
requestFailureCounter *prometheusclient.CounterVec
|
|
requestSuccessCounter *prometheusclient.CounterVec
|
|
requestComplexityHisto prometheusclient.Histogram
|
|
resolverStartedCounter *prometheusclient.CounterVec
|
|
resolverCompletedCounter *prometheusclient.CounterVec
|
|
timeToResolveField *prometheusclient.HistogramVec
|
|
timeToHandleRequest *prometheusclient.HistogramVec
|
|
timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec
|
|
timeToHandleRequestQueryTotal *prometheusclient.CounterVec
|
|
responseSizeMAvg *prometheusclient.GaugeVec
|
|
responseSizeTotal *prometheusclient.CounterVec
|
|
)
|
|
|
|
// Globals is monitoring data.
|
|
type Globals struct {
|
|
sync.Mutex
|
|
|
|
Queries []string
|
|
Failed bool
|
|
}
|
|
|
|
// Register registers the prometheus metrics.
|
|
func Register() {
|
|
requestStartedCounter = prometheusclient.NewCounter(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_request_started_total",
|
|
Help: "Total number of requests started on the graphql server.",
|
|
},
|
|
)
|
|
|
|
requestCompletedCounter = prometheusclient.NewCounter(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_request_completed_total",
|
|
Help: "Total number of requests completed on the graphql server.",
|
|
},
|
|
)
|
|
|
|
requestFailureCounter = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_request_failures",
|
|
Help: "Number of requests with either success or failure status.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
requestSuccessCounter = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_request_successes",
|
|
Help: "Number of requests with either success or failure status.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
|
|
requestComplexityHisto = prometheusclient.NewHistogram(
|
|
prometheusclient.HistogramOpts{
|
|
Name: "graphql_request_complexity",
|
|
Help: "The complexity value of incoming queries.",
|
|
Buckets: prometheusclient.LinearBuckets(100, 25, 9),
|
|
},
|
|
)
|
|
|
|
resolverStartedCounter = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_resolver_started_total",
|
|
Help: "Total number of resolver started on the graphql server.",
|
|
},
|
|
[]string{"object", "field"},
|
|
)
|
|
|
|
resolverCompletedCounter = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_resolver_completed_total",
|
|
Help: "Total number of resolver completed on the graphql server.",
|
|
},
|
|
[]string{"object", "field"},
|
|
)
|
|
|
|
timeToResolveField = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
|
|
Name: "graphql_resolver_duration_ms",
|
|
Help: "The time taken to resolve a field by graphql server.",
|
|
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
|
|
}, []string{"exitStatus"})
|
|
|
|
timeToHandleRequest = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
|
|
Name: "graphql_request_duration_ms",
|
|
Help: "The time taken to handle a request by graphql server.",
|
|
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
|
|
}, []string{"exitStatus"})
|
|
|
|
timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec(
|
|
prometheusclient.GaugeOpts{
|
|
Name: "graphql_request_query_duration_mavg_ms",
|
|
Help: "Moving average of graphql request query duration for a query set.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_request_query_duration_total_ms",
|
|
Help: "Total duration spent responding to a query.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
|
|
responseSizeMAvg = prometheusclient.NewGaugeVec(
|
|
prometheusclient.GaugeOpts{
|
|
Name: "graphql_response_size_mavg_bytes",
|
|
Help: "Moving average of graphql response size for a query set.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
responseSizeTotal = prometheusclient.NewCounterVec(
|
|
prometheusclient.CounterOpts{
|
|
Name: "graphql_response_size_total_bytes",
|
|
Help: "Total bytes of response data to a query.",
|
|
},
|
|
[]string{"queries"},
|
|
)
|
|
|
|
prometheusclient.MustRegister(
|
|
requestStartedCounter,
|
|
requestCompletedCounter,
|
|
requestSuccessCounter,
|
|
requestFailureCounter,
|
|
requestComplexityHisto,
|
|
resolverStartedCounter,
|
|
resolverCompletedCounter,
|
|
timeToResolveField,
|
|
timeToHandleRequest,
|
|
timeToHandleRequestQueryMAvg,
|
|
timeToHandleRequestQueryTotal,
|
|
responseSizeMAvg,
|
|
responseSizeTotal,
|
|
)
|
|
}
|
|
|
|
// UnRegister unregisters
|
|
func UnRegister() {
|
|
prometheusclient.Unregister(requestStartedCounter)
|
|
prometheusclient.Unregister(requestCompletedCounter)
|
|
prometheusclient.Unregister(requestSuccessCounter)
|
|
prometheusclient.Unregister(requestFailureCounter)
|
|
prometheusclient.Unregister(requestComplexityHisto)
|
|
prometheusclient.Unregister(resolverStartedCounter)
|
|
prometheusclient.Unregister(resolverCompletedCounter)
|
|
prometheusclient.Unregister(timeToResolveField)
|
|
prometheusclient.Unregister(timeToHandleRequest)
|
|
prometheusclient.Unregister(timeToHandleRequestQueryMAvg)
|
|
prometheusclient.Unregister(timeToHandleRequestQueryTotal)
|
|
prometheusclient.Unregister(responseSizeMAvg)
|
|
prometheusclient.Unregister(responseSizeTotal)
|
|
}
|
|
|
|
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
|
|
func ResolverMiddleware() graphql.FieldMiddleware {
|
|
return func(ctx context.Context, next graphql.Resolver) (interface{}, error) {
|
|
rctx := graphql.GetResolverContext(ctx)
|
|
|
|
resolverStartedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
|
|
|
|
observerStart := time.Now()
|
|
|
|
path := rctx.Path()
|
|
if len(path) == 1 {
|
|
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
|
|
globals.Lock()
|
|
if rctx.Field.Field != nil {
|
|
globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")")
|
|
} else if name, ok := path[0].(string); ok {
|
|
globals.Queries = append(globals.Queries, name)
|
|
}
|
|
globals.Unlock()
|
|
}
|
|
}
|
|
|
|
res, err := next(ctx)
|
|
|
|
var exitStatus string
|
|
if err != nil {
|
|
exitStatus = existStatusFailure
|
|
|
|
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
|
|
globals.Lock()
|
|
globals.Failed = true
|
|
globals.Unlock()
|
|
}
|
|
} else {
|
|
exitStatus = exitStatusSuccess
|
|
}
|
|
|
|
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus}
|
|
timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
|
|
|
|
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
|
|
|
|
return res, err
|
|
}
|
|
}
|
|
|
|
// RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
|
|
// overhead is less than 10µs.
|
|
func RequestMiddleware() graphql.RequestMiddleware {
|
|
return func(ctx context.Context, next func(ctx context.Context) []byte) []byte {
|
|
requestStartedCounter.Inc()
|
|
|
|
// Store some values in a global context.
|
|
globals := Globals{}
|
|
|
|
// Measure the time to serve the request
|
|
observerStart := time.Now()
|
|
res := next(context.WithValue(ctx, ctxKey, &globals))
|
|
observedDuration := time.Since(observerStart)
|
|
observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond)
|
|
|
|
reqCtx := graphql.GetRequestContext(ctx)
|
|
|
|
// exitStatus label
|
|
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess}
|
|
if globals.Failed {
|
|
exitStatusLabel["exitStatus"] = existStatusFailure
|
|
}
|
|
|
|
// queries label
|
|
sort.Strings(globals.Queries)
|
|
queriesStr := strings.Join(globals.Queries, ";")
|
|
queriesLabel := prometheusclient.Labels{
|
|
"queries": queriesStr,
|
|
}
|
|
|
|
// Write metrics
|
|
timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS)
|
|
requestCompletedCounter.Inc()
|
|
if len(globals.Queries) > 0 {
|
|
if globals.Failed {
|
|
requestFailureCounter.With(queriesLabel).Inc()
|
|
} else {
|
|
requestSuccessCounter.With(queriesLabel).Inc()
|
|
}
|
|
|
|
timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS))
|
|
timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS)
|
|
responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res))))
|
|
responseSizeTotal.With(queriesLabel).Add(float64(len(res)))
|
|
}
|
|
requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity))
|
|
|
|
return res
|
|
}
|
|
}
|
|
|
|
func movingAvgLatency(key string, sample float64) float64 {
|
|
latencyMapMutex.Lock()
|
|
latency, ok := latencyMap[key]
|
|
if ok {
|
|
latency = (latency * 0.9) + (sample * 0.1)
|
|
latencyMap[key] = latency
|
|
} else {
|
|
latencyMap[key] = sample
|
|
latency = sample
|
|
}
|
|
latencyMapMutex.Unlock()
|
|
|
|
return latency
|
|
}
|
|
|
|
func argsLabel(args map[string]interface{}) string {
|
|
result := ""
|
|
|
|
for key, value := range args {
|
|
if len(result) > 0 {
|
|
result += ","
|
|
}
|
|
|
|
result = key
|
|
|
|
rv := reflect.ValueOf(value)
|
|
for rv.Kind() == reflect.Ptr {
|
|
rv = rv.Elem()
|
|
}
|
|
|
|
if rv.Kind() != reflect.Struct {
|
|
continue
|
|
}
|
|
|
|
result += "{"
|
|
|
|
first := true
|
|
for i := 0; i < rv.NumField(); i++ {
|
|
rf := rv.Field(i)
|
|
if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() {
|
|
continue
|
|
}
|
|
|
|
rtf := rv.Type().Field(i)
|
|
|
|
// Empty limits are as good as no limit
|
|
if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 {
|
|
continue
|
|
}
|
|
|
|
if !first {
|
|
result += ","
|
|
}
|
|
first = false
|
|
|
|
jsonTag := rtf.Tag.Get("json")
|
|
if jsonTag != "" {
|
|
result += jsonTag
|
|
} else {
|
|
result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:]
|
|
}
|
|
}
|
|
|
|
result += "}"
|
|
}
|
|
|
|
return result
|
|
}
|