GraphQL API and utilities for the rpdata project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

346 lines
9.7 KiB

  1. package instrumentation
  2. import (
  3. "context"
  4. "reflect"
  5. "sort"
  6. "strings"
  7. "sync"
  8. "time"
  9. "github.com/99designs/gqlgen/graphql"
  10. prometheusclient "github.com/prometheus/client_golang/prometheus"
  11. )
  12. const (
  13. existStatusFailure = "failure"
  14. exitStatusSuccess = "success"
  15. )
  16. var latencyMapMutex sync.Mutex
  17. var latencyMap = make(map[string]float64, 128)
  18. type ctxKeyType string
  19. var ctxKey = ctxKeyType("metricglobal")
  20. var (
  21. requestStartedCounter prometheusclient.Counter
  22. requestCompletedCounter prometheusclient.Counter
  23. requestFailureCounter *prometheusclient.CounterVec
  24. requestSuccessCounter *prometheusclient.CounterVec
  25. requestComplexityHisto prometheusclient.Histogram
  26. resolverStartedCounter *prometheusclient.CounterVec
  27. resolverCompletedCounter *prometheusclient.CounterVec
  28. timeToResolveField *prometheusclient.HistogramVec
  29. timeToHandleRequest *prometheusclient.HistogramVec
  30. timeToHandleRequestQueryMAvg *prometheusclient.GaugeVec
  31. timeToHandleRequestQueryTotal *prometheusclient.CounterVec
  32. responseSizeMAvg *prometheusclient.GaugeVec
  33. responseSizeTotal *prometheusclient.CounterVec
  34. )
  35. // Globals is monitoring data.
  36. type Globals struct {
  37. sync.Mutex
  38. Queries []string
  39. Failed bool
  40. }
  41. // Register registers the prometheus metrics.
  42. func Register() {
  43. requestStartedCounter = prometheusclient.NewCounter(
  44. prometheusclient.CounterOpts{
  45. Name: "graphql_request_started_total",
  46. Help: "Total number of requests started on the graphql server.",
  47. },
  48. )
  49. requestCompletedCounter = prometheusclient.NewCounter(
  50. prometheusclient.CounterOpts{
  51. Name: "graphql_request_completed_total",
  52. Help: "Total number of requests completed on the graphql server.",
  53. },
  54. )
  55. requestFailureCounter = prometheusclient.NewCounterVec(
  56. prometheusclient.CounterOpts{
  57. Name: "graphql_request_failures",
  58. Help: "Number of requests with either success or failure status.",
  59. },
  60. []string{"queries"},
  61. )
  62. requestSuccessCounter = prometheusclient.NewCounterVec(
  63. prometheusclient.CounterOpts{
  64. Name: "graphql_request_successes",
  65. Help: "Number of requests with either success or failure status.",
  66. },
  67. []string{"queries"},
  68. )
  69. requestComplexityHisto = prometheusclient.NewHistogram(
  70. prometheusclient.HistogramOpts{
  71. Name: "graphql_request_complexity",
  72. Help: "The complexity value of incoming queries.",
  73. Buckets: prometheusclient.LinearBuckets(100, 25, 9),
  74. },
  75. )
  76. resolverStartedCounter = prometheusclient.NewCounterVec(
  77. prometheusclient.CounterOpts{
  78. Name: "graphql_resolver_started_total",
  79. Help: "Total number of resolver started on the graphql server.",
  80. },
  81. []string{"object", "field"},
  82. )
  83. resolverCompletedCounter = prometheusclient.NewCounterVec(
  84. prometheusclient.CounterOpts{
  85. Name: "graphql_resolver_completed_total",
  86. Help: "Total number of resolver completed on the graphql server.",
  87. },
  88. []string{"object", "field"},
  89. )
  90. timeToResolveField = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
  91. Name: "graphql_resolver_duration_ms",
  92. Help: "The time taken to resolve a field by graphql server.",
  93. Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
  94. }, []string{"exitStatus"})
  95. timeToHandleRequest = prometheusclient.NewHistogramVec(prometheusclient.HistogramOpts{
  96. Name: "graphql_request_duration_ms",
  97. Help: "The time taken to handle a request by graphql server.",
  98. Buckets: prometheusclient.ExponentialBuckets(1, 2, 11),
  99. }, []string{"exitStatus"})
  100. timeToHandleRequestQueryMAvg = prometheusclient.NewGaugeVec(
  101. prometheusclient.GaugeOpts{
  102. Name: "graphql_request_query_duration_mavg_ms",
  103. Help: "Moving average of graphql request query duration for a query set.",
  104. },
  105. []string{"queries"},
  106. )
  107. timeToHandleRequestQueryTotal = prometheusclient.NewCounterVec(
  108. prometheusclient.CounterOpts{
  109. Name: "graphql_request_query_duration_total_ms",
  110. Help: "Total duration spent responding to a query.",
  111. },
  112. []string{"queries"},
  113. )
  114. responseSizeMAvg = prometheusclient.NewGaugeVec(
  115. prometheusclient.GaugeOpts{
  116. Name: "graphql_response_size_mavg_bytes",
  117. Help: "Moving average of graphql response size for a query set.",
  118. },
  119. []string{"queries"},
  120. )
  121. responseSizeTotal = prometheusclient.NewCounterVec(
  122. prometheusclient.CounterOpts{
  123. Name: "graphql_response_size_total_bytes",
  124. Help: "Total bytes of response data to a query.",
  125. },
  126. []string{"queries"},
  127. )
  128. prometheusclient.MustRegister(
  129. requestStartedCounter,
  130. requestCompletedCounter,
  131. requestSuccessCounter,
  132. requestFailureCounter,
  133. requestComplexityHisto,
  134. resolverStartedCounter,
  135. resolverCompletedCounter,
  136. timeToResolveField,
  137. timeToHandleRequest,
  138. timeToHandleRequestQueryMAvg,
  139. timeToHandleRequestQueryTotal,
  140. responseSizeMAvg,
  141. responseSizeTotal,
  142. )
  143. }
  144. // UnRegister unregisters
  145. func UnRegister() {
  146. prometheusclient.Unregister(requestStartedCounter)
  147. prometheusclient.Unregister(requestCompletedCounter)
  148. prometheusclient.Unregister(requestSuccessCounter)
  149. prometheusclient.Unregister(requestFailureCounter)
  150. prometheusclient.Unregister(requestComplexityHisto)
  151. prometheusclient.Unregister(resolverStartedCounter)
  152. prometheusclient.Unregister(resolverCompletedCounter)
  153. prometheusclient.Unregister(timeToResolveField)
  154. prometheusclient.Unregister(timeToHandleRequest)
  155. prometheusclient.Unregister(timeToHandleRequestQueryMAvg)
  156. prometheusclient.Unregister(timeToHandleRequestQueryTotal)
  157. prometheusclient.Unregister(responseSizeMAvg)
  158. prometheusclient.Unregister(responseSizeTotal)
  159. }
  160. // ResolverMiddleware is a resolver middleware that logs metrics for prometheus.
  161. func ResolverMiddleware() graphql.FieldMiddleware {
  162. return func(ctx context.Context, next graphql.Resolver) (interface{}, error) {
  163. rctx := graphql.GetResolverContext(ctx)
  164. resolverStartedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
  165. observerStart := time.Now()
  166. path := rctx.Path()
  167. if len(path) == 1 {
  168. if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
  169. globals.Lock()
  170. if rctx.Field.Field != nil {
  171. globals.Queries = append(globals.Queries, rctx.Field.Name+"("+argsLabel(rctx.Args)+")")
  172. } else if name, ok := path[0].(string); ok {
  173. globals.Queries = append(globals.Queries, name)
  174. }
  175. globals.Unlock()
  176. }
  177. }
  178. res, err := next(ctx)
  179. var exitStatus string
  180. if err != nil {
  181. exitStatus = existStatusFailure
  182. if globals, ok := ctx.Value(ctxKey).(*Globals); ok {
  183. globals.Lock()
  184. globals.Failed = true
  185. globals.Unlock()
  186. }
  187. } else {
  188. exitStatus = exitStatusSuccess
  189. }
  190. exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatus}
  191. timeToResolveField.With(exitStatusLabel).Observe(float64(time.Since(observerStart).Nanoseconds() / int64(time.Millisecond)))
  192. resolverCompletedCounter.WithLabelValues(rctx.Object, rctx.Field.Name).Inc()
  193. return res, err
  194. }
  195. }
  196. // RequestMiddleware is a request middleware that logs metrics for prometheus. It looks busy, but the
  197. // overhead is less than 10µs.
  198. func RequestMiddleware() graphql.RequestMiddleware {
  199. return func(ctx context.Context, next func(ctx context.Context) []byte) []byte {
  200. requestStartedCounter.Inc()
  201. // Store some values in a global context.
  202. globals := Globals{}
  203. // Measure the time to serve the request
  204. observerStart := time.Now()
  205. res := next(context.WithValue(ctx, ctxKey, &globals))
  206. observedDuration := time.Since(observerStart)
  207. observedDurationMS := float64(observedDuration.Nanoseconds()) / float64(time.Millisecond)
  208. reqCtx := graphql.GetRequestContext(ctx)
  209. // exitStatus label
  210. exitStatusLabel := prometheusclient.Labels{"exitStatus": exitStatusSuccess}
  211. if globals.Failed {
  212. exitStatusLabel["exitStatus"] = existStatusFailure
  213. }
  214. // queries label
  215. sort.Strings(globals.Queries)
  216. queriesStr := strings.Join(globals.Queries, ";")
  217. queriesLabel := prometheusclient.Labels{
  218. "queries": queriesStr,
  219. }
  220. // Write metrics
  221. timeToHandleRequest.With(exitStatusLabel).Observe(observedDurationMS)
  222. requestCompletedCounter.Inc()
  223. if len(globals.Queries) > 0 {
  224. if globals.Failed {
  225. requestFailureCounter.With(queriesLabel).Inc()
  226. } else {
  227. requestSuccessCounter.With(queriesLabel).Inc()
  228. }
  229. timeToHandleRequestQueryMAvg.With(queriesLabel).Set(movingAvgLatency("ms:"+queriesStr, observedDurationMS))
  230. timeToHandleRequestQueryTotal.With(queriesLabel).Add(observedDurationMS)
  231. responseSizeMAvg.With(queriesLabel).Set(movingAvgLatency("bytes:"+queriesStr, float64(len(res))))
  232. responseSizeTotal.With(queriesLabel).Add(float64(len(res)))
  233. }
  234. requestComplexityHisto.Observe(float64(reqCtx.OperationComplexity))
  235. return res
  236. }
  237. }
  238. func movingAvgLatency(key string, sample float64) float64 {
  239. latencyMapMutex.Lock()
  240. latency, ok := latencyMap[key]
  241. if ok {
  242. latency = (latency * 0.9) + (sample * 0.1)
  243. latencyMap[key] = latency
  244. } else {
  245. latencyMap[key] = sample
  246. latency = sample
  247. }
  248. latencyMapMutex.Unlock()
  249. return latency
  250. }
  251. func argsLabel(args map[string]interface{}) string {
  252. result := ""
  253. for key, value := range args {
  254. if len(result) > 0 {
  255. result += ","
  256. }
  257. result = key
  258. rv := reflect.ValueOf(value)
  259. for rv.Kind() == reflect.Ptr {
  260. rv = rv.Elem()
  261. }
  262. if rv.Kind() != reflect.Struct {
  263. continue
  264. }
  265. result += "{"
  266. first := true
  267. for i := 0; i < rv.NumField(); i++ {
  268. rf := rv.Field(i)
  269. if (rf.Kind() == reflect.Ptr || rf.Kind() == reflect.Interface) && rf.IsNil() {
  270. continue
  271. }
  272. rtf := rv.Type().Field(i)
  273. // Empty limits are as good as no limit
  274. if rtf.Name == "Limit" && rf.Type().Kind() == reflect.Int && rf.Int() <= 0 {
  275. continue
  276. }
  277. if !first {
  278. result += ","
  279. }
  280. first = false
  281. jsonTag := rtf.Tag.Get("json")
  282. if jsonTag != "" {
  283. result += jsonTag
  284. } else {
  285. result += strings.ToLower(rtf.Name[0:1]) + rtf.Name[1:]
  286. }
  287. }
  288. result += "}"
  289. }
  290. return result
  291. }