package instrumentation import ( "context" "reflect" "sort" "strings" "sync" "time" "github.com/99designs/gqlgen/graphql" prometheusclient "github.com/prometheus/client_golang/prometheus" ) const ( existStatusFailure = "failure" exitStatusSuccess = "success" ) var latencyMapMutex sync.Mutex var latencyMap = make(map[string]float64, 128) type ctxKeyType string var ctxKey = ctxKeyType("metricglobal") var ( requestStartedCounter prometheusclient.Counter requestCompletedCounter prometheusclient.Counter requestFailureCounter *prometheusclient.CounterVec requestSuccessCounter *prometheusclient.CounterVec requestComplexityHisto prometheusclient.Histogram resolverStartedCounter *prometheusclient.CounterVec resolverCompletedCounter *prometheusclient.CounterVec timeToResolveField *prometheusclient.HistogramVec timeToHandleRequest *prometheusclient.HistogramVec timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec timeToHandleRequestQueryTotal *prometheusclient.CounterVec responseSizeMAvg *prometheusclient.GaugeVec responseSizeTotal *prometheusclient.CounterVec ) // Globals is monitoring data. type Globals struct { sync.Mutex Queries []string Failed bool } // Register registers the prometheus metrics. func Register() { requestStartedCounter = prometheusclient.NewCounter( prometheusclient.CounterOpts{ Name: "graphql_request_started_total", Help: "Total number of requests started on the graphql server.", }, ) requestCompletedCounter = prometheusclient.NewCounter( prometheusclient.CounterOpts{ Name: "graphql_request_completed_total", Help: "Total number of requests completed on the graphql server.", }, ) requestFailureCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_failures", Help: "Number of requests with either success or failure status.", }, []string{"queries"}, ) requestSuccessCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_successes", Help: "Number of requests with either success or failure status.", }, []string{"queries"}, ) requestComplexityHisto = prometheusclient.NewHistogram( prometheusclient.HistogramOpts{ Name: "graphql_request_complexity", Help: "The complexity value of incoming queries.", Buckets: prometheusclient.LinearBuckets(100, 25, 9), }, ) resolverStartedCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_resolver_started_total", Help: "Total number of resolver started on the graphql server.", }, []string{"object", "field"}, ) resolverCompletedCounter = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_resolver_completed_total", Help: "Total number of resolver completed on the graphql server.", }, []string{"object", "field"}, ) timeToResolveField = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{ Name: "graphql_resolver_duration_ms", Help: "The time taken to resolve a field by graphql server.", Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), }, []string{"exitStatus"}) timeToHandleRequest = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{ Name: "graphql_request_duration_ms", Help: "The time taken to handle a request by graphql server.", Buckets: prometheusclient.ExponentialBuckets(1, 2, 11), }, []string{"exitStatus"}) timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec( prometheusclient.GaugeOpts{ Name: "graphql_request_query_duration_mavg_ms", Help: "Moving average of graphql request query duration for a query set.", }, []string{"queries"}, ) timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_request_query_duration_total_ms", Help: "Total duration spent responding to a query.", }, []string{"queries"}, ) responseSizeMAvg = prometheusclient.NewGaugeVec( prometheusclient.GaugeOpts{ Name: "graphql_response_size_mavg_bytes", Help: "Moving average of graphql response size for a query set.", }, []string{"queries"}, ) responseSizeTotal = prometheusclient.NewCounterVec( prometheusclient.CounterOpts{ Name: "graphql_response_size_total_bytes", Help: "Total bytes of response data to a query.", }, []string{"queries"}, ) prometheusclient.MustRegister( requestStartedCounter, requestCompletedCounter, requestSuccessCounter, requestFailureCounter, requestComplexityHisto, resolverStartedCounter, resolverCompletedCounter, timeToResolveField, timeToHandleRequest, timeToHandleRequestQueryMAvg, timeToHandleRequestQueryTotal, responseSizeMAvg, responseSizeTotal, ) } // UnRegister unregisters func UnRegister() { prometheusclient.Unregister(requestStartedCounter) prometheusclient.Unregister(requestCompletedCounter) prometheusclient.Unregister(requestSuccessCounter) prometheusclient.Unregister(requestFailureCounter) prometheusclient.Unregister(requestComplexityHisto) prometheusclient.Unregister(resolverStartedCounter) prometheusclient.Unregister(resolverCompletedCounter) prometheusclient.Unregister(timeToResolveField) prometheusclient.Unregister(timeToHandleRequest) prometheusclient.Unregister(timeToHandleRequestQueryMAvg) prometheusclient.Unregister(timeToHandleRequestQueryTotal) prometheusclient.Unregister(responseSizeMAvg) prometheusclient.Unregister(responseSizeTotal) } // ResolverMiddleware is a resolver middleware that logs metrics for prometheus. func ResolverMiddleware() graphql.FieldMiddleware { return func(ctx context.Context, next graphql.Resolver) (interface{}, error) { rctx := graphql.GetResolverContext(ctx) resolverStartedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc() observerStart := time.Now() path := rctx.Path() if len(path) == 1 { if globals, ok := ctx.Value(ctxKey).(*Globals); ok { globals.Lock() if rctx.Field.Field != nil { globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")") } else if name, ok := path[0].(string); ok { globals.Queries = append(globals.Queries, name) } globals.Unlock() } } res, err := next(ctx) var exitStatus string if err != nil { exitStatus = existStatusFailure if globals, ok := ctx.Value(ctxKey).(*Globals); ok { globals.Lock() globals.Failed = true globals.Unlock() } } else { exitStatus = exitStatusSuccess } exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus} timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond))) resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc() return res, err } } // RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the // overhead is less than 10µs. func RequestMiddleware() graphql.RequestMiddleware { return func(ctx context.Context, next func(ctx context.Context) []byte) []byte { requestStartedCounter.Inc() // Store some values in a global context. globals := Globals{} // Measure the time to serve the request observerStart := time.Now() res := next(context.WithValue(ctx, ctxKey, &globals)) observedDuration := time.Since(observerStart) observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond) reqCtx := graphql.GetRequestContext(ctx) // exitStatus label exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess} if globals.Failed { exitStatusLabel["exitStatus"] = existStatusFailure } // queries label sort.Strings(globals.Queries) queriesStr := strings.Join(globals.Queries, ";") queriesLabel := prometheusclient.Labels{ "queries": queriesStr, } // Write metrics timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS) requestCompletedCounter.Inc() if len(globals.Queries) > 0 { if globals.Failed { requestFailureCounter.With(queriesLabel).Inc() } else { requestSuccessCounter.With(queriesLabel).Inc() } timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS)) timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS) responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res)))) responseSizeTotal.With(queriesLabel).Add(float64(len(res))) } requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity)) return res } } func movingAvgLatency(key string, sample float64) float64 { latencyMapMutex.Lock() latency, ok := latencyMap[key] if ok { latency = (latency * 0.9) + (sample * 0.1) latencyMap[key] = latency } else { latencyMap[key] = sample latency = sample } latencyMapMutex.Unlock() return latency } func argsLabel(args map[string]interface{}) string { result := "" for key, value := range args { if len(result) > 0 { result += "," } result = key rv := reflect.ValueOf(value) for rv.Kind() == reflect.Ptr { rv = rv.Elem() } if rv.Kind() != reflect.Struct { continue } result += "{" first := true for i := 0; i < rv.NumField(); i++ { rf := rv.Field(i) if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() { continue } rtf := rv.Type().Field(i) // Empty limits are as good as no limit if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 { continue } if !first { result += "," } first = false jsonTag := rtf.Tag.Get("json") if jsonTag != "" { result += jsonTag } else { result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:] } } result += "}" } return result }