# alertmanager.yml
route:
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 10s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 30s
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 30m
# A default receiver
receiver: "slack"
# All the above attributes are inherited by all child routes and can
# overwritten on each.
routes:
- receiver: "slack"
group_wait: 10s
match_re:
severity: critical|warning
continue: true
- receiver: "pager"
group_wait: 10s
match_re:
severity: critial
continue: true
receivers:
- name: "slack"
slack_configs:
- api_url: 'https://hooks.slack.com/services/XXXXXXXXX/XXXXXXXXX/xxxxxxxxxxxxxxxxxxxxxxxxxxx'
send_resolved: true
channel: 'monitoring'
text: "{{ range .Alerts }}<!channel> {{ .Annotations.summary }}\n{{ .Annotations.description }}\n{{ end }}"
- name: "pager"
webhook_config:
- url: http://a.b.c.d:8080/send/sms
send_resolved: true
故障排除
如果通知的触发时间过长,请检查以下延迟:
scrape_interval = 20s #(prometheus.yml)
evaluation_interval = 20s #(prometheus.yml)
increase(mysql_global_status_slow_queries[1m]) > 0 #(alerts/example-mysql.yml)
for: 5m #(alerts/example-mysql.yml)
group_wait = 10s #(alertmanager.yml)