Skip to content

Commit 426a3f5

Browse files
committed
add metrics tags search support to prometheus
Signed-off-by: Harshil Gupta <[email protected]>
1 parent dc21563 commit 426a3f5

File tree

4 files changed

+104
-12
lines changed

4 files changed

+104
-12
lines changed

cmd/query/app/query_parser.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ func (p *queryParser) parseMetricsQueryParams(r *http.Request) (bqp metricstore.
275275
if err != nil {
276276
return bqp, err
277277
}
278+
279+
tags, err := p.parseTags(r.Form[tagParam], r.Form[tagsParam])
280+
if err != nil {
281+
return bqp, err
282+
}
283+
if len(tags) > 0 {
284+
bqp.Tags = tags
285+
}
286+
278287
bqp.EndTime = &endTs
279288
bqp.Lookback = &lookback
280289
bqp.Step = &step

cmd/query/app/query_parser_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,25 @@ func TestParameterErrors(t *testing.T) {
370370
})
371371
}
372372
}
373+
374+
func TestParseMetricsTags(t *testing.T) {
375+
t.Run("simple tags only", func(t *testing.T) {
376+
request, err := http.NewRequest(http.MethodGet, "x?service=foo&step=1000&tag=key1:value1,key2:value2", http.NoBody)
377+
require.NoError(t, err)
378+
parser := &queryParser{
379+
timeNow: time.Now,
380+
}
381+
mqp, err := parser.parseMetricsQueryParams(request)
382+
require.NoError(t, err)
383+
assert.Equal(t, time.Second, *mqp.Step)
384+
})
385+
t.Run("malformed simple tag", func(t *testing.T) {
386+
request, err := http.NewRequest(http.MethodGet, "x?service=foo&step=1000&tag=keyWithoutValue", http.NoBody)
387+
require.NoError(t, err)
388+
parser := &queryParser{
389+
timeNow: time.Now,
390+
}
391+
_, err = parser.parseMetricsQueryParams(request)
392+
require.Error(t, err)
393+
})
394+
}

internal/storage/metricstore/prometheus/metricstore/reader.go

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type (
5151
spanKindFilter string
5252
serviceFilter string
5353
rate string
54+
tagFilters []string
5455
}
5556

5657
metricsQueryParams struct {
@@ -134,13 +135,25 @@ func (m MetricsReader) GetLatencies(ctx context.Context, requestParams *metricst
134135
metricName: "service_latencies",
135136
metricDesc: fmt.Sprintf("%.2fth quantile latency, grouped by service", requestParams.Quantile),
136137
buildPromQuery: func(p promQueryParams) string {
138+
// Build filter string including service_name, span_kind, and tags
139+
filters := []string{fmt.Sprintf(`service_name =~ %q`, p.serviceFilter)}
140+
141+
if p.spanKindFilter != "" {
142+
filters = append(filters, p.spanKindFilter)
143+
}
144+
145+
// Add tag filters if there are any
146+
if len(p.tagFilters) > 0 {
147+
filters = append(filters, p.tagFilters...)
148+
}
149+
150+
filterStr := strings.Join(filters, ", ")
151+
137152
return fmt.Sprintf(
138-
// Note: p.spanKindFilter can be ""; trailing commas are okay within a timeseries selection.
139-
`histogram_quantile(%.2f, sum(rate(%s_bucket{service_name =~ %q, %s}[%s])) by (%s))`,
153+
`histogram_quantile(%.2f, sum(rate(%s_bucket{%s}[%s])) by (%s))`,
140154
requestParams.Quantile,
141155
m.latencyMetricName,
142-
p.serviceFilter,
143-
p.spanKindFilter,
156+
filterStr,
144157
p.rate,
145158
p.groupBy,
146159
)
@@ -177,12 +190,24 @@ func (m MetricsReader) GetCallRates(ctx context.Context, requestParams *metricst
177190
metricName: "service_call_rate",
178191
metricDesc: "calls/sec, grouped by service",
179192
buildPromQuery: func(p promQueryParams) string {
193+
// Build filter string including service_name, span_kind, and tags
194+
filters := []string{fmt.Sprintf(`service_name =~ %q`, p.serviceFilter)}
195+
196+
if p.spanKindFilter != "" {
197+
filters = append(filters, p.spanKindFilter)
198+
}
199+
200+
// Add tag filters if there are any
201+
if len(p.tagFilters) > 0 {
202+
filters = append(filters, p.tagFilters...)
203+
}
204+
205+
filterStr := strings.Join(filters, ", ")
206+
180207
return fmt.Sprintf(
181-
// Note: p.spanKindFilter can be ""; trailing commas are okay within a timeseries selection.
182-
`sum(rate(%s{service_name =~ %q, %s}[%s])) by (%s)`,
208+
`sum(rate(%s{%s}[%s])) by (%s)`,
183209
m.callsMetricName,
184-
p.serviceFilter,
185-
p.spanKindFilter,
210+
filterStr,
186211
p.rate,
187212
p.groupBy,
188213
)
@@ -211,11 +236,32 @@ func (m MetricsReader) GetErrorRates(ctx context.Context, requestParams *metrics
211236
metricName: "service_error_rate",
212237
metricDesc: "error rate, computed as a fraction of errors/sec over calls/sec, grouped by service",
213238
buildPromQuery: func(p promQueryParams) string {
239+
// Build base filters for all queries (service_name)
240+
baseFilters := []string{fmt.Sprintf(`service_name =~ %q`, p.serviceFilter)}
241+
242+
// Add status_code filter only for error rate numerator, must be right after service_name to match test expectations
243+
errorFilters := append([]string{}, baseFilters...)
244+
errorFilters = append(errorFilters, `status_code = "STATUS_CODE_ERROR"`)
245+
246+
// Add span_kind filter
247+
if p.spanKindFilter != "" {
248+
baseFilters = append(baseFilters, p.spanKindFilter)
249+
errorFilters = append(errorFilters, p.spanKindFilter)
250+
}
251+
252+
// Add tag filters if there are any
253+
if len(p.tagFilters) > 0 {
254+
baseFilters = append(baseFilters, p.tagFilters...)
255+
errorFilters = append(errorFilters, p.tagFilters...)
256+
}
257+
258+
errorFilterStr := strings.Join(errorFilters, ", ")
259+
baseFilterStr := strings.Join(baseFilters, ", ")
260+
214261
return fmt.Sprintf(
215-
// Note: p.spanKindFilter can be ""; trailing commas are okay within a timeseries selection.
216-
`sum(rate(%s{service_name =~ %q, status_code = "STATUS_CODE_ERROR", %s}[%s])) by (%s) / sum(rate(%s{service_name =~ %q, %s}[%s])) by (%s)`,
217-
m.callsMetricName, p.serviceFilter, p.spanKindFilter, p.rate, p.groupBy,
218-
m.callsMetricName, p.serviceFilter, p.spanKindFilter, p.rate, p.groupBy,
262+
`sum(rate(%s{%s}[%s])) by (%s) / sum(rate(%s{%s}[%s])) by (%s)`,
263+
m.callsMetricName, errorFilterStr, p.rate, p.groupBy,
264+
m.callsMetricName, baseFilterStr, p.rate, p.groupBy,
219265
)
220266
},
221267
}
@@ -308,11 +354,24 @@ func (m MetricsReader) buildPromQuery(metricsParams metricsQueryParams) string {
308354
if len(metricsParams.SpanKinds) > 0 {
309355
spanKindFilter = fmt.Sprintf(`span_kind =~ %q`, strings.Join(metricsParams.SpanKinds, "|"))
310356
}
357+
358+
// Build tag filters
359+
var tagFilters []string
360+
if len(metricsParams.Tags) > 0 {
361+
for k, v := range metricsParams.Tags {
362+
// Escape dots in key names for Prometheus compatibility
363+
escapedKey := strings.ReplaceAll(k, ".", "_")
364+
tagFilters = append(tagFilters, fmt.Sprintf(`%s=%q`, escapedKey, v))
365+
}
366+
}
367+
368+
fmt.Println(">>>>>>>>> tagfilters", tagFilters)
311369
promParams := promQueryParams{
312370
serviceFilter: strings.Join(metricsParams.ServiceNames, "|"),
313371
spanKindFilter: spanKindFilter,
314372
rate: promqlDurationString(metricsParams.RatePer),
315373
groupBy: strings.Join(groupBy, ","),
374+
tagFilters: tagFilters,
316375
}
317376
return metricsParams.buildPromQuery(promParams)
318377
}

internal/storage/v1/api/metricstore/interface.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ type BaseQueryParameters struct {
4545
RatePer *time.Duration
4646
// SpanKinds is the list of span kinds to include (logical OR) in the resulting metrics aggregation.
4747
SpanKinds []string
48+
// Tags is a map of tag keys and values to filter the metrics by.
49+
Tags map[string]string
4850
}
4951

5052
// LatenciesQueryParameters contains the parameters required for latency metrics queries.

0 commit comments

Comments
 (0)