diff --git a/nomad-job/alertmanager.nomad b/nomad-job/alertmanager.nomad index 3795207..5972c3c 100644 --- a/nomad-job/alertmanager.nomad +++ b/nomad-job/alertmanager.nomad @@ -65,7 +65,8 @@ route: receivers: - name: "default" email_configs: - - to: "vincent@ducamps.eu" + - send_resolved: true + to: "vincent@ducamps.eu" EOH destination = "local/alertmanager.yml" } diff --git a/nomad-job/nut_exporter.nomad b/nomad-job/nut_exporter.nomad new file mode 100644 index 0000000..9ba70a0 --- /dev/null +++ b/nomad-job/nut_exporter.nomad @@ -0,0 +1,46 @@ + +job "nut_exporter" { + datacenters = ["homelab"] + priority = 50 + type = "service" + meta { + forcedeploy = "0" + } + + group "nut_exporter"{ + network { + mode = "host" + port "http" { + to = 9199 + } + } + service { + name = "nutexporter" + port= "http" + + check { + name= "nut_exporter_probe" + type= "http" + path= "/ups_metrics" + interval = "60s" + timeout = "2s" + } + } + task "nut_exporter" { + driver = "docker" + config { + image = "ghcr.io/druggeri/nut_exporter" + ports = ["http"] + } + env { + NUT_EXPORTER_SERVER= "192.168.1.10" + NUT_EXPORTER_VARIABLES = "battery.runtime,battery.charge,input.voltage,output.voltage,output.voltage.nominal,ups.load,ups.status,ups.realpower" + } + + resources { + memory = 20 + } + } + + } +} diff --git a/nomad-job/prometheus.nomad b/nomad-job/prometheus.nomad index 77723ab..8896db7 100644 --- a/nomad-job/prometheus.nomad +++ b/nomad-job/prometheus.nomad @@ -112,6 +112,15 @@ scrape_configs: metrics_path: /api/prometheus authorization: credentials: {{ with secret "secrets/data/nomad/prometheus"}}'{{ .Data.data.hass_token }}'{{end}} + - job_name: 'nut' + consul_sd_configs: + - server: 'consul.service.consul:8500' + services: ['nutexporter'] + metrics_path: /ups_metrics + relabel_configs: + - source_labels: ['__meta_consul_dc'] + target_label: instance + @@ -128,14 +137,6 @@ EOH groups: - name: nomad_alerts rules: - - alert: NomadJobFailed - expr: nomad_nomad_job_summary_failed > 0 - for: 0m - labels: - severity: warning - annotations: - summary: Nomad job failed (instance {{ $labels.instance }}) - description: "Nomad job failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: NomadBlockedEvaluation expr: nomad_nomad_blocked_evals_total_blocked > 0 for: 0m @@ -144,14 +145,6 @@ groups: annotations: summary: Nomad blocked evaluation (instance {{ $labels.instance }}) description: "Nomad blocked evaluation\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - alert: NomadJobLost - expr: nomad_nomad_job_summary_lost > 0 - for: 0m - labels: - severity: warning - annotations: - summary: Nomad job lost (instance {{ $labels.instance }}) - description: "Nomad job lost\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: NomadJobQueued expr: nomad_nomad_job_summary_queued > 0 for: 2m @@ -170,6 +163,27 @@ groups: expr: nomad_nomad_job_summary_running{exported_job="git"}==0 labels: severity: warning +- name: nut_alerts + rules: + - alert: UPSonBattery + expr: network_ups_tools_ups_status{flag="OB"}==1 + labels: + severity: warning + annotations: + summary: UPS switched on battery + - alert: UPSLowBattery + expr: network_ups_tools_ups_status{flag="LB"}==1 + labels: + severity: critical + annotations: + summary: UPS is now on low battery please shutdown all device + - alert: "UPS Battery needed to be replaced" + expr: network_ups_tools_ups_status{flag="RB"}==1 + labels: + severity: warning + annotations: + summary: UPS battery is detected to replace + EOH