package differ
File metrics contains all metrics that needs to be exposed to Prometheus and
indirectly to Grafana.
Generated documentation is available at:
Documentation in literate-programming-style is available at:
import (
Metrics names
const (
FetchContentErrorsName = "fetch_content_errors"
ReadClusterListErrorsName = "read_cluster_list_errors"
ReadReportedErrorsName = "read_reported_errors"
ProducerSetupErrorsName = "producer_setup_errors"
StorageSetupErrorsName = "storage_setup_errors"
ReadReportForClusterErrorsName = "read_report_for_cluster_errors"
DeserializeReportErrorsName = "deserialize_report_errors"
ReportWithHighImpactName = "report_with_high_impact"
NotificationNotSentSameStateName = "notification_not_sent_same_state"
NotificationNotSentErrorStateName = "notification_not_sent_error_state"
NotificationSentName = "notification_sent"
NoSeverityTotalRiskName = "total_risk_no_severity"
Metrics helps
const (
FetchContentErrorsHelp = "The total number of errors during fetch from content service"
ReadClusterListErrorsHelp = "The total number of errors when reading cluster list from new_reports table"
ReadReportedErrorsHelp = "The total number of errors when reading previously reported reports fpr given clusters from reported table"
ProducerSetupErrorsHelp = "The total number of errors when setting up Kafka producer"
StorageSetupErrorsHelp = "The total number of errors when setting up storage connection"
ReadReportForClusterErrorsHelp = "The total number of errors when getting latest report for a given cluster ID"
DeserializeReportErrorsHelp = "The total number of errors when deserializing a report retrieved from the new_reports table"
ReportWithHighImpactHelp = "The total number of reports with total risk higher than the configured threshold"
NotificationNotSentSameStateHelp = "The total number of notifications not sent because we parsed the same report"
NotificationNotSentErrorStateHelp = "The total number of notifications not sent because of a Kafka producer error"
NotificationSentHelp = "The total number of notifications sent"
NoSeverityTotalRiskHelp = "The total number of times we handled a total risk that does not have an equivalent service log severity level"
PushGatewayClient is a simple wrapper over http.Client so that prometheus
can do HTTP requests with the given authentication header
type PushGatewayClient struct {
AuthToken string
httpClient http . Client
Do is a simple wrapper over http.Client.Do method that includes
the authentication header configured in the PushGatewayClient instance
func ( pgc * PushGatewayClient ) Do ( request * http . Request ) ( * http . Response , error ) {
if pgc . AuthToken != "" {
log . Debug ( ) . Msg ( "Adding authorization header to HTTP request" )
request . Header . Set ( "Authorization" , "Basic " + pgc . AuthToken )
} else {
log . Debug ( ) . Msg ( "No authorization token provided. Making HTTP request without credentials." )
log . Debug ( ) . Str ( "request" , request . URL . String ( ) ) . Str ( "method" , request . Method ) . Msg ( "Pushing metrics to Prometheus push gateway" )
resp , err := pgc . httpClient . Do ( request )
if resp != nil {
log . Debug ( ) . Int ( "code" , resp . StatusCode ) . Msg ( "Returned status code" )
return resp , err
FetchContentErrors shows number of errors during fetch from content service
var FetchContentErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : FetchContentErrorsName ,
Help : FetchContentErrorsHelp ,
} )
ReadClusterListErrors shows number of errors when reading cluster list from new_reports table
var ReadClusterListErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : ReadClusterListErrorsName ,
Help : ReadClusterListErrorsHelp ,
} )
ReadReportedErrors shows number of errors when getting previously notified reports from reported table
var ReadReportedErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : ReadReportedErrorsName ,
Help : ReadReportedErrorsHelp ,
} )
ProducerSetupErrors shows number of errors when setting up Kafka producer
var ProducerSetupErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : ProducerSetupErrorsName ,
Help : ProducerSetupErrorsHelp ,
} )
StorageSetupErrors shows number of errors when setting up storage
var StorageSetupErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : StorageSetupErrorsName ,
Help : StorageSetupErrorsHelp ,
} )
ReadReportForClusterErrors shows number of errors when getting latest report for a given cluster
var ReadReportForClusterErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : ReadReportForClusterErrorsName ,
Help : ReadReportForClusterErrorsHelp ,
} )
DeserializeReportErrors shows number of errors when deserializing a report retrieved from the new_reports table
var DeserializeReportErrors = promauto . NewCounter ( prometheus . CounterOpts {
Name : DeserializeReportErrorsName ,
Help : DeserializeReportErrorsHelp ,
} )
ReportWithHighImpact shows number of reports with total risk higher than the configured threshold
var ReportWithHighImpact = promauto . NewCounter ( prometheus . CounterOpts {
Name : ReportWithHighImpactName ,
Help : ReportWithHighImpactHelp ,
} )
NotificationNotSentSameState shows number of notifications not sent because we parsed the same report
var NotificationNotSentSameState = promauto . NewCounter ( prometheus . CounterOpts {
Name : NotificationNotSentSameStateName ,
Help : NotificationNotSentSameStateHelp ,
} )
NotificationNotSentErrorState shows number of notifications not sent because of a Kafka producer error
var NotificationNotSentErrorState = promauto . NewCounter ( prometheus . CounterOpts {
Name : NotificationNotSentErrorStateName ,
Help : NotificationNotSentErrorStateHelp ,
} )
NotificationSent shows number notifications sent to the configured Kafka topic
var NotificationSent = promauto . NewCounter ( prometheus . CounterOpts {
Name : NotificationSentName ,
Help : NotificationSentHelp ,
} )
NoSeverityTotalRisk shows how many times a total risk not mapped to a service log severity is received
var NoSeverityTotalRisk = promauto . NewCounter ( prometheus . CounterOpts {
Name : NoSeverityTotalRiskName ,
Help : NoSeverityTotalRiskHelp ,
} )
AddMetricsWithNamespaceAndSubsystem register the desired metrics using a given namespace
func AddMetricsWithNamespaceAndSubsystem ( namespace , subsystem string ) {
exposed metrics
Unregister all metrics and registrer them again
prometheus . Unregister ( FetchContentErrors )
prometheus . Unregister ( ReadClusterListErrors )
prometheus . Unregister ( ProducerSetupErrors )
prometheus . Unregister ( StorageSetupErrors )
prometheus . Unregister ( ReadReportForClusterErrors )
prometheus . Unregister ( DeserializeReportErrors )
prometheus . Unregister ( ReportWithHighImpact )
prometheus . Unregister ( NotificationNotSentSameState )
prometheus . Unregister ( NotificationNotSentErrorState )
prometheus . Unregister ( NotificationSent )
prometheus . Unregister ( NoSeverityTotalRisk )
FetchContentErrors shows number of errors during fetch from content service
FetchContentErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : FetchContentErrorsName ,
Help : FetchContentErrorsHelp ,
} )
ReadClusterListErrors shows number of errors when reading cluster list from new_reports table
ReadClusterListErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : ReadClusterListErrorsName ,
Help : ReadClusterListErrorsHelp ,
} )
ProducerSetupErrors shows number of errors when setting up Kafka producer
ProducerSetupErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : ProducerSetupErrorsName ,
Help : ProducerSetupErrorsHelp ,
} )
StorageSetupErrors shows number of errors when setting up storage
StorageSetupErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : StorageSetupErrorsName ,
Help : StorageSetupErrorsHelp ,
} )
ReadReportForClusterErrors shows number of errors when getting latest report for a given cluster
ReadReportForClusterErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : ReadReportForClusterErrorsName ,
Help : ReadReportForClusterErrorsHelp ,
} )
DeserializeReportErrors shows number of errors when deserializing a report retrieved from the new_reports table
DeserializeReportErrors = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : DeserializeReportErrorsName ,
Help : DeserializeReportErrorsHelp ,
} )
ReportWithHighImpact shows number of reports with total risk higher than the configured threshold
ReportWithHighImpact = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : ReportWithHighImpactName ,
Help : ReportWithHighImpactHelp ,
} )
NotificationNotSentSameState shows number of notifications not sent because we parsed the same report
NotificationNotSentSameState = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : NotificationNotSentSameStateName ,
Help : NotificationNotSentSameStateHelp ,
} )
NotificationNotSentErrorState shows number of notifications not sent because of a Kafka producer error
NotificationNotSentErrorState = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : NotificationNotSentErrorStateName ,
Help : NotificationNotSentErrorStateHelp ,
} )
NotificationSent shows number notifications sent to the configured Kafka topic
NotificationSent = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : NotificationSentName ,
Help : NotificationSentHelp ,
} )
NoSeverityTotalRisk shows how many times a total risk not mapped to a service log severity is received
NoSeverityTotalRisk = promauto . NewCounter ( prometheus . CounterOpts {
Namespace : namespace ,
Subsystem : subsystem ,
Name : NoSeverityTotalRiskName ,
Help : NoSeverityTotalRiskHelp ,
} )
PushCollectedMetrics function pushes the metrics to the configured prometheus push
func PushCollectedMetrics ( metricsConf * conf . MetricsConfiguration ) error {
client := PushGatewayClient { metricsConf . GatewayAuthToken , http . Client { } }
Creates a pusher to the gateway "$PUSHGWURL/metrics/job/$(jobname)
return push . New ( metricsConf . GatewayURL , metricsConf . Job ) .
Collector ( FetchContentErrors ) .
Collector ( ReadClusterListErrors ) .
Collector ( ReadReportedErrors ) .
Collector ( ProducerSetupErrors ) .
Collector ( StorageSetupErrors ) .
Collector ( ReadReportForClusterErrors ) .
Collector ( DeserializeReportErrors ) .
Collector ( ReportWithHighImpact ) .
Collector ( NotificationNotSentSameState ) .
Collector ( NotificationNotSentErrorState ) .
Collector ( NotificationSent ) .
Collector ( NoSeverityTotalRisk ) .
Client ( & client ) .
Push ( )
PushMetricsInLoop pushes the metrics in a loop until context is done
func PushMetricsInLoop ( ctx context . Context , metricsConf * conf . MetricsConfiguration ) {
if metricsConf . Namespace != "" && metricsConf . GatewayAuthToken != "" {
log . Info ( ) . Msgf ( "Metrics will be pushed in loop each %f seconds" , metricsConf . GatewayTimeBetweenPush . Seconds ( ) )
ticker := time . NewTicker ( metricsConf . GatewayTimeBetweenPush )
for {
select {
case <- ticker . C :
log . Debug ( ) . Msg ( "Pushing metrics" )
err := PushCollectedMetrics ( metricsConf )
if err != nil {
log . Error ( ) . Err ( err ) . Msg ( "Error pushing the metrics in loop" )
log . Debug ( ) . Msg ( "Metrics pushed" )
case <- ctx . Done ( ) :