-- cpu_usage sum by (instance) (rate(container_cpu_usage_seconds_total{id="/"}[5m])) / sum by (instance) (machine_cpu_cores{}) * 100-- cpu usage (GCP)max(kubernetes_io:node_cpu_allocatable_utilization{project_id="..."})-- ram_usage sum by (instance) (container_memory_working_set_bytes{id="/"}) / sum by (instance) (machine_memory_bytes{}) * 100sum by (instance_name)(avg_over_time(compute_googleapis_com:instance_memory_balloon_ram_used{monitored_resource="gce_instance", project_id="sit-varnish-lvzt-run-eaab"}[5m]))/sum by (instance_name)(avg_over_time(compute_googleapis_com:instance_memory_balloon_ram_size{monitored_resource="gce_instance", project_id="sit-varnish-lvzt-run-eaab"}[5m]))
Disk Usage
-- Persistent Volumes in Kubernetes sum by (instance,namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{}) / sum by (instance,namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{}) * 100-- Disk Usage in Kubernetes sum by (instance,device) (container_fs_usage_bytes{id="/", device=~"/dev/.*"}) / sum by (instance,device) (container_fs_limit_bytes{id="/", device=~"/dev/.*"}) * 100-- Disk Usage in GKEsum by (node_name) (kubernetes_io:node_ephemeral_storage_used_bytes{cluster_name="$cluster"}) /sum by (node_name) (kubernetes_io:node_ephemeral_storage_allocatable_bytes{cluster_name="$cluster"})
GCP Uptime Check
sum(sum_over_time(monitoring_googleapis_com:uptime_check_check_passed{monitored_resource="uptime_url"}[15m])) by (checked_resource_id)/sum(count_over_time(monitoring_googleapis_com:uptime_check_check_passed{monitored_resource="uptime_url"}[15m])) by (checked_resource_id)
Error Rate
sum(rate(request_count{is_error="true"}[15m])) by (service) / sum(rate(request_count{ }[15m])) by (service)
Latency
histogram_quantile(0.95, sum(rate(response_time_bucket{}[60m])) by (service, le))
RabbitMQ
-- 0 if queue is empty, 1 - otherwize(rabbitmq_queue_messages{vhost="/", queue="queue_name"} > bool 0)