(metrics) Get prometheus up out of disrepair

* Fix bad labels
* Add nodeId where appropriate
* Hopefully fix histogram buckets for index query times
This commit is contained in:
Viktor Lofgren 2023-11-08 14:01:28 +01:00
parent ef16502159
commit 858357a246
3 changed files with 45 additions and 24 deletions

View File

@ -26,8 +26,14 @@ import java.util.concurrent.ConcurrentHashMap;
@Singleton
public class QueryClient extends AbstractDynamicClient {
private static final Summary wmsa_search_index_api_delegate_time = Summary.build().name("wmsa_search_index_api_delegate_time").help("-").register();
private static final Summary wmsa_search_index_api_search_time = Summary.build().name("wmsa_search_index_api_search_time").help("-").register();
private static final Summary wmsa_qs_api_delegate_time = Summary.build()
.name("wmsa_qs_api_delegate_time")
.help("query service delegate time")
.register();
private static final Summary wmsa_qs_api_search_time = Summary.build()
.name("wmsa_qs_api_search_time")
.help("query service search time")
.register();
private final Map<ServiceAndNode, ManagedChannel> channels = new ConcurrentHashMap<>();
private final Map<ServiceAndNode, QueryApiGrpc.QueryApiBlockingStub > queryApis = new ConcurrentHashMap<>();
@ -64,14 +70,16 @@ public class QueryClient extends AbstractDynamicClient {
/** Delegate an Index API style query directly to the index service */
@CheckReturnValue
public SearchResultSet delegate(Context ctx, SearchSpecification specs) {
return wmsa_search_index_api_delegate_time.time(
return wmsa_qs_api_delegate_time.time(
() -> this.postGet(ctx, 0, "/delegate/", specs, SearchResultSet.class).blockingFirst()
);
}
@CheckReturnValue
public QueryResponse search(Context ctx, QueryParams params) {
return QueryProtobufCodec.convertQueryResponse(queryApi(0).query(QueryProtobufCodec.convertQueryParams(params)));
return wmsa_qs_api_search_time.time(
() -> QueryProtobufCodec.convertQueryResponse(queryApi(0).query(QueryProtobufCodec.convertQueryParams(params)))
);
}
}

View File

@ -24,28 +24,30 @@ public class Service {
private final Initialization initialization;
private final static Counter request_counter = Counter.build("wmsa_service_in_request_counter", "Request Counter")
.labelNames("service")
private final static Counter request_counter = Counter.build("wmsa_request_counter", "Request Counter")
.labelNames("service", "node")
.register();
private final static Counter request_counter_good = Counter.build("wmsa_service_good_request_counter", "Good Requests")
.labelNames("service")
private final static Counter request_counter_good = Counter.build("wmsa_request_counter_good", "Good Requests")
.labelNames("service", "node")
.register();
private final static Counter request_counter_bad = Counter.build("wmsa_service_bad_request_counter", "Bad Requests")
.labelNames("service")
private final static Counter request_counter_bad = Counter.build("wmsa_request_counter_bad", "Bad Requests")
.labelNames("service", "node")
.register();
private final static Counter request_counter_err = Counter.build("wmsa_service_error_request_counter", "Error Requests")
.labelNames("service")
private final static Counter request_counter_err = Counter.build("wmsa_request_counter_err", "Error Requests")
.labelNames("service", "node")
.register();
private final String serviceName;
private static volatile boolean initialized = false;
protected final MqInboxIf messageQueueInbox;
private final int node;
public Service(BaseServiceParams params,
Runnable configureStaticFiles
) {
this.initialization = params.initialization;
var config = params.configuration;
node = config.node();
String inboxName = config.serviceName();
logger.info("Inbox name: {}", inboxName);
@ -150,7 +152,7 @@ public class Service {
// Paint context
paintThreadName(request, "req:");
request_counter.labels(serviceName).inc();
request_counter.labels(serviceName, Integer.toString(node)).inc();
}
private void auditRequestOut(Request request, Response response) {
@ -158,10 +160,10 @@ public class Service {
paintThreadName(request, "rsp:");
if (response.status() < 400) {
request_counter_good.labels(serviceName).inc();
request_counter_good.labels(serviceName, Integer.toString(node)).inc();
}
else {
request_counter_bad.labels(serviceName).inc();
request_counter_bad.labels(serviceName, Integer.toString(node)).inc();
}
logResponse(request, response);
@ -174,7 +176,7 @@ public class Service {
}
protected void handleException(Exception ex, Request request, Response response) {
request_counter_err.labels(serviceName).inc();
request_counter_err.labels(serviceName, Integer.toString(node)).inc();
if (ex instanceof MessagingException) {
logger.error("{} {}", ex.getClass().getSimpleName(), ex.getMessage());
}

View File

@ -44,8 +44,6 @@ import java.sql.SQLException;
import java.util.*;
import java.util.stream.Collectors;
import static io.grpc.stub.ServerCalls.asyncUnimplementedUnaryCall;
@Singleton
public class IndexQueryService extends IndexApiImplBase {
@ -55,9 +53,22 @@ public class IndexQueryService extends IndexApiImplBase {
// so that they can be filtered out in the production logging configuration
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
private static final Counter wmsa_edge_index_query_timeouts = Counter.build().name("wmsa_edge_index_query_timeouts").help("-").register();
private static final Gauge wmsa_edge_index_query_cost = Gauge.build().name("wmsa_edge_index_query_cost").help("-").register();
private static final Histogram wmsa_edge_index_query_time = Histogram.build().name("wmsa_edge_index_query_time").linearBuckets(25/1000., 25/1000., 15).help("-").register();
private static final Counter wmsa_query_timeouts = Counter.build()
.name("wmsa_query_timeouts")
.help("Query timeout counter")
.subsystem("index-"+System.getenv("WMSA_SERVICE_NODE"))
.register();
private static final Gauge wmsa_query_cost = Gauge.build()
.name("wmsa_query_cost")
.help("Computational cost of query")
.subsystem("index-"+System.getenv("WMSA_SERVICE_NODE"))
.register();
private static final Histogram wmsa_query_time = Histogram.build()
.name("wmsa_query_time")
.linearBuckets(50., 50., 15)
.subsystem("index-"+System.getenv("WMSA_SERVICE_NODE"))
.help("Index-side query time")
.register();
private final IndexQueryExecutor queryExecutor;
private final Gson gson = GsonFactory.get();
@ -120,16 +131,16 @@ public class IndexQueryService extends IndexApiImplBase {
}
try {
return wmsa_edge_index_query_time.time(() -> {
return wmsa_query_time.time(() -> {
var params = new SearchParameters(specsSet, getSearchSet(specsSet));
SearchResultSet results = executeSearch(params);
logger.info(queryMarker, "Index Result Count: {}", results.size());
wmsa_edge_index_query_cost.set(params.getDataCost());
wmsa_query_cost.set(params.getDataCost());
if (!params.hasTimeLeft()) {
wmsa_edge_index_query_timeouts.inc();
wmsa_query_timeouts.inc();
}
return results;