GraphQL API and utilities for the rpdata project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

346 lines
9.7 KiB

package instrumentation
import (
"context"
"reflect"
"sort"
"strings"
"sync"
"time"
"github.com/99designs/gqlgen/graphql"
prometheusclient "github.com/prometheus/client_golang/prometheus"
)
const (
existStatusFailure = "failure"
exitStatusSuccess = "success"
)
var latencyMapMutex sync.Mutex
var latencyMap = make(map[string]float64, 128)
type ctxKeyType string
var ctxKey = ctxKeyType("metricglobal")
var (
requestStartedCounter prometheusclient.Counter
requestCompletedCounter prometheusclient.Counter
requestFailureCounter *prometheusclient.CounterVec
requestSuccessCounter *prometheusclient.CounterVec
requestComplexityHisto prometheusclient.Histogram
resolverStartedCounter *prometheusclient.CounterVec
resolverCompletedCounter *prometheusclient.CounterVec
timeToResolveField *prometheusclient.HistogramVec
timeToHandleRequest *prometheusclient.HistogramVec
timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec
timeToHandleRequestQueryTotal *prometheusclient.CounterVec
responseSizeMAvg *prometheusclient.GaugeVec
responseSizeTotal *prometheusclient.CounterVec
)
// Globals is monitoring data.
type Globals struct {
sync.Mutex
Queries []string
Failed bool
}
// Register registers the prometheus metrics.
func Register() {
requestStartedCounter = prometheusclient.NewCounter(
prometheusclient.CounterOpts{
Name: "graphql_request_started_total",
Help: "Total number of requests started on the graphql server.",
},
)
requestCompletedCounter = prometheusclient.NewCounter(
prometheusclient.CounterOpts{
Name: "graphql_request_completed_total",
Help: "Total number of requests completed on the graphql server.",
},
)
requestFailureCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_failures",
Help: "Number of requests with either success or failure status.",
},
[]string{"queries"},
)
requestSuccessCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_successes",
Help: "Number of requests with either success or failure status.",
},
[]string{"queries"},
)
requestComplexityHisto = prometheusclient.NewHistogram(
prometheusclient.HistogramOpts{
Name: "graphql_request_complexity",
Help: "The complexity value of incoming queries.",
Buckets: prometheusclient.LinearBuckets(100, 25, 9),
},
)
resolverStartedCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_resolver_started_total",
Help: "Total number of resolver started on the graphql server.",
},
[]string{"object", "field"},
)
resolverCompletedCounter = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_resolver_completed_total",
Help: "Total number of resolver completed on the graphql server.",
},
[]string{"object", "field"},
)
timeToResolveField = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
Name: "graphql_resolver_duration_ms",
Help: "The time taken to resolve a field by graphql server.",
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
}, []string{"exitStatus"})
timeToHandleRequest = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
Name: "graphql_request_duration_ms",
Help: "The time taken to handle a request by graphql server.",
Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
}, []string{"exitStatus"})
timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec(
prometheusclient.GaugeOpts{
Name: "graphql_request_query_duration_mavg_ms",
Help: "Moving average of graphql request query duration for a query set.",
},
[]string{"queries"},
)
timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_request_query_duration_total_ms",
Help: "Total duration spent responding to a query.",
},
[]string{"queries"},
)
responseSizeMAvg = prometheusclient.NewGaugeVec(
prometheusclient.GaugeOpts{
Name: "graphql_response_size_mavg_bytes",
Help: "Moving average of graphql response size for a query set.",
},
[]string{"queries"},
)
responseSizeTotal = prometheusclient.NewCounterVec(
prometheusclient.CounterOpts{
Name: "graphql_response_size_total_bytes",
Help: "Total bytes of response data to a query.",
},
[]string{"queries"},
)
prometheusclient.MustRegister(
requestStartedCounter,
requestCompletedCounter,
requestSuccessCounter,
requestFailureCounter,
requestComplexityHisto,
resolverStartedCounter,
resolverCompletedCounter,
timeToResolveField,
timeToHandleRequest,
timeToHandleRequestQueryMAvg,
timeToHandleRequestQueryTotal,
responseSizeMAvg,
responseSizeTotal,
)
}
// UnRegister unregisters
func UnRegister() {
prometheusclient.Unregister(requestStartedCounter)
prometheusclient.Unregister(requestCompletedCounter)
prometheusclient.Unregister(requestSuccessCounter)
prometheusclient.Unregister(requestFailureCounter)
prometheusclient.Unregister(requestComplexityHisto)
prometheusclient.Unregister(resolverStartedCounter)
prometheusclient.Unregister(resolverCompletedCounter)
prometheusclient.Unregister(timeToResolveField)
prometheusclient.Unregister(timeToHandleRequest)
prometheusclient.Unregister(timeToHandleRequestQueryMAvg)
prometheusclient.Unregister(timeToHandleRequestQueryTotal)
prometheusclient.Unregister(responseSizeMAvg)
prometheusclient.Unregister(responseSizeTotal)
}
// ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
func ResolverMiddleware() graphql.FieldMiddleware {
return func(ctx context.Context, next graphql.Resolver) (interface{}, error) {
rctx := graphql.GetResolverContext(ctx)
resolverStartedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
observerStart := time.Now()
path := rctx.Path()
if len(path) == 1 {
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
globals.Lock()
if rctx.Field.Field != nil {
globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")")
} else if name, ok := path[0].(string); ok {
globals.Queries = append(globals.Queries, name)
}
globals.Unlock()
}
}
res, err := next(ctx)
var exitStatus string
if err != nil {
exitStatus = existStatusFailure
if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
globals.Lock()
globals.Failed = true
globals.Unlock()
}
} else {
exitStatus = exitStatusSuccess
}
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus}
timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
return res, err
}
}
// RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
// overhead is less than 10µs.
func RequestMiddleware() graphql.RequestMiddleware {
return func(ctx context.Context, next func(ctx context.Context) []byte) []byte {
requestStartedCounter.Inc()
// Store some values in a global context.
globals := Globals{}
// Measure the time to serve the request
observerStart := time.Now()
res := next(context.WithValue(ctx, ctxKey, &globals))
observedDuration := time.Since(observerStart)
observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond)
reqCtx := graphql.GetRequestContext(ctx)
// exitStatus label
exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess}
if globals.Failed {
exitStatusLabel["exitStatus"] = existStatusFailure
}
// queries label
sort.Strings(globals.Queries)
queriesStr := strings.Join(globals.Queries, ";")
queriesLabel := prometheusclient.Labels{
"queries": queriesStr,
}
// Write metrics
timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS)
requestCompletedCounter.Inc()
if len(globals.Queries) > 0 {
if globals.Failed {
requestFailureCounter.With(queriesLabel).Inc()
} else {
requestSuccessCounter.With(queriesLabel).Inc()
}
timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS))
timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS)
responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res))))
responseSizeTotal.With(queriesLabel).Add(float64(len(res)))
}
requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity))
return res
}
}
func movingAvgLatency(key string, sample float64) float64 {
latencyMapMutex.Lock()
latency, ok := latencyMap[key]
if ok {
latency = (latency * 0.9) + (sample * 0.1)
latencyMap[key] = latency
} else {
latencyMap[key] = sample
latency = sample
}
latencyMapMutex.Unlock()
return latency
}
func argsLabel(args map[string]interface{}) string {
result := ""
for key, value := range args {
if len(result) > 0 {
result += ","
}
result = key
rv := reflect.ValueOf(value)
for rv.Kind() == reflect.Ptr {
rv = rv.Elem()
}
if rv.Kind() != reflect.Struct {
continue
}
result += "{"
first := true
for i := 0; i < rv.NumField(); i++ {
rf := rv.Field(i)
if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() {
continue
}
rtf := rv.Type().Field(i)
// Empty limits are as good as no limit
if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 {
continue
}
if !first {
result += ","
}
first = false
jsonTag := rtf.Tag.Get("json")
if jsonTag != "" {
result += jsonTag
} else {
result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:]
}
}
result += "}"
}
return result
}