Prometheus监控-redis

摘要

本文内容转自网络,个人学习记录使用,请勿传播

redis_exporter

安装

下载地址

1
2
3
4
5
cd /usr/local/src
wget https://github.com/oliver006/redis_exporter/releases/download/v1.39.0/redis_exporter-v1.39.0.linux-amd64.tar.gz

tar xf redis_exporter-v1.39.0.linux-amd64.tar.gz
ln -s /usr/local/src/redis_exporter-v1.39.0.linux-amd64 /usr/local/redis_exporter

配置启动脚本

自行修改redis连接地址和密码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/sh
# author: isme正式在下
# redis_exporter启动脚本

#debug
#set -x
#check unbound variables
#set -u
# Scripts PATH enviroment
export PATH="/usr/bin:/bin:/usr/sbin:/sbin"
# disable core dump
ulimit -c 0
# return value
RETVAL=0
# node_exporter general config
PROG="redis_exporter"
# 修改redis连接地址和密码
REDISADDR="redis://localhost:6379"
REDISPW="xxxx"
PORT=9121
USER="root"
LOGLEVEL="info"
#BASEDIR=${SYMLINK:-/usr/sbin/redis_exporter}
BASEDIR=$(cd $(dirname $0); pwd)
DAEMON="${BASEDIR}/${PROG}"
PIDFILE="${BASEDIR}/${PROG}.pid"
LOCKFILE="${BASEDIR}/$PROG.lock"
LOGFILE="${BASEDIR}/$PROG.log"
# nginx start timeout milliscond
STARTTIME=10000
# nginx stop timeout milliscond
STOPTIME=10000

color_msg(){
local COLOR=$1
local MSG=$2
OFFSET="\033[80G"
NORMAL="\033[0m"
case $COLOR in
red)
COLOR="\033[1;40;31m"
;;
green)
COLOR="\033[1;40;32m"
;;
yellow)
COLOR="\033[1;40;33m"
;;
*)
COLOR="\033[0m"
;;
esac
echo -en "$OFFSET [$COLOR $MSG $NORMAL"
echo "]"
}

start() {
echo -n "Starting $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -n "$PROC_PID" ]; then
echo -n "is already running."
color_msg yellow WARNING
else
nohup $DAEMON -redis.addr=${REDISADDR} -redis.password=${REDISPW} -web.listen-address=0.0.0.0:${PORT} &> ${LOGFILE} &
if [ $? -eq 0 ]; then
echo $! > $PIDFILE
color_msg green SUCCESS && touch $LOCKFILE
else
color_msg red FAILED && exit 1
fi
fi
}

stop() {
echo -n "Stopping $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -z "$PROC_PID" ]; then
echo -n "is not running."
color_msg yellow WARNING
else
kill -TERM ${PROC_PID} >/dev/null 2>&1
while [ "$STOPTIME" -gt 0 ]; do
kill -0 ${PROC_PID} >/dev/null 2>&1 || break
STOPTIME=$(($STOPTIME-1))
echo -n "." && sleep 0.001s
done
if [ "$STOPTIME" -le 0 ]; then
color_msg red TIMEOUT && exit 1
else
color_msg green SUCCESS
rm -f $PIDFILE $LOCKFILE
fi
fi
}

restart() {
echo -n "Restart $PROG : "
echo
echo -en "\t" && stop
echo -en "\t" && start
}

reload() {
echo -n "Reloading $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -n "$PROC_PID" ]; then
kill -HUP ${PROC_PID} >/dev/null 2>&1
if [ $? -eq 0 ]; then
color_msg green SUCCESS
else
color_msg red FAILED && exit 1
fi
else
echo -n "is not running."
color_msg yellow WARNING
fi
}

status() {
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -z "$PROC_PID" ];then
echo "$PROG is stopped"
exit 3
else
echo "$PROG (pid $PROC_PID) is running..."
exit 0
fi
}

case C"$1" in
Cstart)
start
;;
Cstop)
stop
;;
Creload)
reload
;;
Crestart)
restart
;;
Cstatus)
status
;;
*)
echo $"Usage: $0 {start|stop|restart|reload|status}"
exit 3
;;
esac

启动服务

1
sh /usr/local/redis_exporter/control start

配置prometheus任务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# cat prometheus.yml
...
rule_files:
- "rules/redis_exporter_recording_rules.yml"
...
- job_name: 'redis'
scrape_interval: 15s
scrape_timeout: 15s
metrics_path: /metrics
scheme: http
file_sd_configs:
- files:
- 'config/redis_*.yml'
#refresh_interval: 5m
relabel_configs:
- source_labels: [__address__]
#separator: ;
regex: (.*)
#target_label: __metrics_path__
target_label: instance
replacement: $1
action: replace
- source_labels: [__address__]
regex: (.*)
target_label: __address__
replacement: $1:9121
- source_labels: [host]
regex: (.*)
target_label: host
replacement: $1
...

# cat config/redis_loki.yml
- labels:
app: loki
targets:
- 10.0.0.1

# cat rules/redis_exporter_recording_rules.yml
groups:
- name: redis
rules:
- alert: RedisDown
annotations:
description: |-
Redis instance is down
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Redis down (instance {{ $labels.instance }})
expr: redis_up == 0
for: 5m
labels:
severity: critical
- alert: RedisOutOfMemory
annotations:
description: |-
Redis is running out of memory (> 90%)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Redis out of memory (instance {{ $labels.instance }})
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90
for: 5m
labels:
severity: warning
- alert: RedisTooManyConnections
annotations:
description: |-
Redis instance has too many connections
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Redis too many connections (instance {{ $labels.instance }})
expr: redis_connected_clients > 100
for: 5m
labels:
severity: warning

prometheus启动脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/bin/sh
# author: isme正式在下
# prometheus启动脚本

#debug
#set -x
#check unbound variables
#set -u
# Scripts PATH enviroment
export PATH="/usr/bin:/bin:/usr/sbin:/sbin"
# disable core dump
ulimit -c 0
# return value
RETVAL=0
# prometheus general config
PROG="prometheus"
PORT=8990
USER="work"
LOGLEVEL="info"
#BASEDIR=${SYMLINK:-/usr/sbin/node_exporter}
BASEDIR=$(cd $(dirname $0); pwd)
DAEMON="${BASEDIR}/${PROG}"
CONFIGFILE="${BASEDIR}/${PROG}.yml"
PIDFILE="${BASEDIR}/${PROG}.pid"
LOCKFILE="${BASEDIR}/$PROG.lock"
LOGFILE="${BASEDIR}/$PROG.log"
# nginx start timeout milliscond
STARTTIME=10000
# nginx stop timeout milliscond
STOPTIME=10000

color_msg(){
local COLOR=$1
local MSG=$2
OFFSET="\033[80G"
NORMAL="\033[0m"
case $COLOR in
red)
COLOR="\033[1;40;31m"
;;
green)
COLOR="\033[1;40;32m"
;;
yellow)
COLOR="\033[1;40;33m"
;;
*)
COLOR="\033[0m"
;;
esac
echo -en "$OFFSET [$COLOR $MSG $NORMAL"
echo "]"
}

start() {
echo -n "Starting $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -n "$PROC_PID" ]; then
echo -n "is already running."
color_msg yellow WARNING
else
nohup $DAEMON \
--web.listen-address=0.0.0.0:${PORT} \
--web.enable-lifecycle \
--config.file=${CONFIGFILE} \
--storage.tsdb.retention=90d \
--query.max-concurrency=32 \
--log.level=${LOGLEVEL} &> ${LOGFILE} &
if [ $? -eq 0 ]; then
echo $! > $PIDFILE
color_msg green SUCCESS && touch $LOCKFILE
else
color_msg red FAILED && exit 1
fi
fi
}

stop() {
echo -n "Stopping $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -z "$PROC_PID" ]; then
echo -n "is not running."
color_msg yellow WARNING
else
kill -TERM ${PROC_PID} >/dev/null 2>&1
while [ "$STOPTIME" -gt 0 ]; do
kill -0 ${PROC_PID} >/dev/null 2>&1 || break
STOPTIME=$(($STOPTIME-1))
echo -n "." && sleep 0.001s
done
if [ "$STOPTIME" -le 0 ]; then
color_msg red TIMEOUT && exit 1
else
color_msg green SUCCESS
rm -f $PIDFILE $LOCKFILE
fi
fi
}

restart() {
echo -n "Restart $PROG : "
echo
echo -en "\t" && stop
echo -en "\t" && start
}

reload() {
echo -n "Reloading $PROG : "
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -n "$PROC_PID" ]; then
kill -HUP ${PROC_PID} >/dev/null 2>&1
if [ $? -eq 0 ]; then
color_msg green SUCCESS
else
color_msg red FAILED && exit 1
fi
else
echo -n "is not running."
color_msg yellow WARNING
fi
}

status() {
PROC_PID=$(pgrep -P 1 -u $USER ^$PROG)
if [ -z "$PROC_PID" ];then
echo "$PROG is stopped"
exit 3
else
echo "$PROG (pid $PROC_PID) is running..."
exit 0
fi
}

case C"$1" in
Cstart)
start
;;
Cstop)
stop
;;
Creload)
reload
;;
Crestart)
restart
;;
Cstatus)
status
;;
*)
echo $"Usage: $0 {start|stop|restart|reload|status}"
exit 3
;;
esac

重启prometheus

1
sh /usr/local/prometheus/control restart

grafana导入dashboard

dashboard-json

最终效果

image-20220609110120880

为什么内存使用情况显示为∞?

因为没有给Redis设置最大内存,所以redis_memory_max_bytes是0,所以计算结果是无穷大。下面是计算公式:100 * (redis_memory_used_bytes / redis_memory_max_bytes)