job "prometheus" { datacenters = ["homelab"] priority = 50 type = "service" constraint { attribute = "${attr.cpu.arch}" value = "amd64" } group "prometheus" { count = 1 network { port "prometheus_ui" { static = 9090 } } restart { attempts = 2 interval = "30m" delay = "15s" mode = "fail" } vault { policies = ["prometheus"] } ephemeral_disk { size = 300 } task "prometheus" { template { change_mode = "noop" destination = "local/prometheus.yml" data = < 0 for: 0m labels: severity: warning annotations: summary: Nomad blocked evaluation (instance {{ $labels.instance }}) description: "Nomad blocked evaluation\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: NomadJobQueued expr: nomad_nomad_job_summary_queued > 0 for: 2m labels: severity: warning annotations: summary: Nomad job queued (instance {{ $labels.instance }}) description: "Nomad job queued\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: NomadBatchError expr: nomad_nomad_job_summary_failed{parent_id=~".+"}>0 labels: severity: warning annotations: summary: Nomad batch {{ $labels.parent_id}} error - alert: test gitea expr: nomad_nomad_job_summary_running{exported_job="git"}==0 labels: severity: warning - name: nut_alerts rules: - alert: UPSonBattery expr: network_ups_tools_ups_status{flag="OB"}==1 labels: severity: warning annotations: summary: UPS switched on battery - alert: UPSLowBattery expr: network_ups_tools_ups_status{flag="LB"}==1 labels: severity: critical annotations: summary: UPS is now on low battery please shutdown all device - alert: "UPS Battery needed to be replaced" expr: network_ups_tools_ups_status{flag="RB"}==1 labels: severity: warning annotations: summary: UPS battery is detected to replace EOH } driver = "docker" config { image = "prom/prometheus:latest" args = [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", "--storage.tsdb.retention.time=15d", ] volumes = [ "local/nomad-alert-rules.yml:/etc/prometheus/nomad-alert-rules.yml", "local/prometheus.yml:/etc/prometheus/prometheus.yml", "/mnt/diskstation/nomad/prometheus:/prometheus" ] ports = ["prometheus_ui"] } service { name = "prometheus" tags = ["urlprefix-/", "homer.enable=true", "homer.name=Prometheus", "homer.service=Monitoring", "homer.type=Prometheus", "homer.logo=https://upload.wikimedia.org/wikipedia/commons/thumb/3/38/Prometheus_software_logo.svg/173px-Prometheus_software_logo.svg.png", "homer.target=_blank", "homer.url=http://${NOMAD_ADDR_prometheus_ui}", ] port = "prometheus_ui" check { name = "prometheus_ui port alive" type = "http" path = "/-/healthy" interval = "10s" timeout = "2s" } } resources { memory = 250 } } } }