部署kube-state-metrics, kube-state-metrics用来获取k8s集群所有资源的状态:
准备镜像:
[root@hdss7-200 ~]# docker pull quay.io/coreos/kube-state-metrics:v1.5.0
v1.5.0: Pulling from coreos/kube-state-metrics
cd784148e348: Pull complete
f622528a393e: Pull complete
Digest: sha256:b7a3143bd1eb7130759c9259073b9f239d0eeda09f5210f1cd31f1a530599ea1
Status: Downloaded newer image for quay.io/coreos/kube-state-metrics:v1.5.0
quay.io/coreos/kube-state-metrics:v1.5.0
[root@hdss7-200 ~]# docker images|grep kube-state-metrics
quay.io/coreos/kube-state-metrics v1.5.0 91599517197a 15 months ago 31.8MB
[root@hdss7-200 ~]# docker tag 91599517197a harbor.od.com/public/kube-state-metrics:v1.5.0
[root@hdss7-200 ~]# docker push harbor.od.com/public/kube-state-metrics:v1.5.0
The push refers to repository [harbor.od.com/public/kube-state-metrics]
5b3c36501a0a: Pushed
7bff100f35cb: Pushed
v1.5.0: digest: sha256:16e9a1d63e80c19859fc1e2727ab7819f89aeae5f8ab5c3380860c2f88fe0a58 size: 739
准备资源配置清单:
[root@hdss7-200 kube-state-metrics]# cat rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
[root@hdss7-200 kube-state-metrics]# cat dp.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "2"
labels:
grafanak8sapp: "true"
app: kube-state-metrics
name: kube-state-metrics
namespace: kube-system
spec:
selector:
matchLabels:
grafanak8sapp: "true"
app: kube-state-metrics
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
grafanak8sapp: "true"
app: kube-state-metrics
spec:
containers:
- name: kube-state-metrics
image: harbor.od.com/public/kube-state-metrics:v1.5.0
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8080
name: http-metrics
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 8080
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
serviceAccountName: kube-state-metrics
应用资源配置清单:
[root@hdss7-21 ~]# kubectl apply -f http://k8s-yaml.od.com/kube-state-metrics/rbac.yaml
serviceaccount/kube-state-metrics created
clusterrole.rbac.authorization.k8s.io/kube-state-metrics created
clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics created
[root@hdss7-21 ~]# kubectl apply -f http://k8s-yaml.od.com/kube-state-metrics/dp.yaml
deployment.extensions/kube-state-metrics created
[root@hdss7-21 ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-6b6c4f9648-rrgfx 1/1 Running 0 49m
kube-state-metrics-8669f776c6-gb6nd 0/1 Running 0 15s
kubernetes-dashboard-76dcdb4677-c847s 1/1 Running 0 38m
traefik-ingress-h2jpc 1/1 Running 0 2d
traefik-ingress-k5hgk 1/1 Running 0 2d
[root@hdss7-21 ~]# curl 172.7.21.9:8080/healthz
ok
部署node-exporter,node-exporter是帮我们监控宿主机的资源:
准备镜像:
[root@hdss7-200 ~]# docker pull prom/node-exporter:v0.15.0
v0.15.0: Pulling from prom/node-exporter
Image docker.io/prom/node-exporter:v0.15.0 uses outdated schema1 manifest format. Please upgrade to a schema2 image for better future compatibility. More information at https://docs.docker.com/registry/spec/deprecated-schema-v1/
aa3e9481fcae: Pull complete
a3ed95caeb02: Pull complete
afc308b02dc6: Pull complete
4cafbffc9d4f: Pull complete
Digest: sha256:a59d1f22610da43490532d5398b3911c90bfa915951d3b3e5c12d3c0bf8771c3
Status: Downloaded newer image for prom/node-exporter:v0.15.0
docker.io/prom/node-exporter:v0.15.0
[root@hdss7-200 ~]# docker images|grep node-exporter:v0.15.0
[root@hdss7-200 ~]# docker images|grep node-exporter
prom/node-exporter v0.15.0 12d51ffa2b22 2 years ago 22.8MB
[root@hdss7-200 ~]# docker tag 12d51ffa2b22 harbor.od.com/public/node-exporter:v0.15.0
[root@hdss7-200 ~]# docker push harbor.od.com/public/node-exporter:v0.15.0
The push refers to repository [harbor.od.com/public/node-exporter]
5f70bf18a086: Mounted from public/pause
1c7f6350717e: Pushed
a349adf62fe1: Pushed
c7300f623e77: Pushed
v0.15.0: digest: sha256:57d9b335b593e4d0da1477d7c5c05f23d9c3dc6023b3e733deb627076d4596ed size: 1979
准备资源配置清单:
[root@hdss7-200 node-exporter]# cat ds.yaml
kind: DaemonSet
apiVersion: extensions/v1beta1
metadata:
name: node-exporter
namespace: kube-system
labels:
daemon: "node-exporter"
grafanak8sapp: "true"
spec:
selector:
matchLabels:
daemon: "node-exporter"
grafanak8sapp: "true"
template:
metadata:
name: node-exporter
labels:
daemon: "node-exporter"
grafanak8sapp: "true"
spec:
volumes:
- name: proc
hostPath:
path: /proc
type: ""
- name: sys
hostPath:
path: /sys
type: ""
containers:
- name: node-exporter
image: harbor.od.com/public/node-exporter:v0.15.0
imagePullPolicy: IfNotPresent
args:
- --path.procfs=/host_proc
- --path.sysfs=/host_sys
ports:
- name: node-exporter
hostPort: 9100
containerPort: 9100
protocol: TCP
volumeMounts:
- name: sys
readOnly: true
mountPath: /host_sys
- name: proc
readOnly: true
mountPath: /host_proc
hostNetwork: true
[root@hdss7-21 ~]# kubectl apply -f http://k8s-yaml.od.com/node-exporter/ds.yaml
daemonset.extensions/node-exporter created
因为node-exporter部署的方式是daemonset,在每个运算节点都会运行一个POD
[root@hdss7-21 ~]# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-6b6c4f9648-rrgfx 1/1 Running 0 63m 172.7.22.7 hdss7-22.host.com <none> <none>
kube-state-metrics-8669f776c6-gb6nd 1/1 Running 0 14m 172.7.21.9 hdss7-21.host.com <none> <none>
kubernetes-dashboard-76dcdb4677-c847s 1/1 Running 0 52m 172.7.22.10 hdss7-22.host.com <none> <none>
node-exporter-gn2hb 1/1 Running 0 3m 10.4.7.22 hdss7-22.host.com <none> <none>
node-exporter-nzww6 1/1 Running 0 3m 10.4.7.21 hdss7-21.host.com <none> <none>
traefik-ingress-h2jpc 1/1 Running 0 2d1h 172.7.21.3 hdss7-21.host.com <none> <none>
traefik-ingress-k5hgk 1/1 Running 0 2d 172.7.22.2 hdss7-22.host.com <none> <none>
部署cadvisor,cadvisor是帮我们去向kubelet要每个容器所消耗的资源:
准备镜像:
[root@hdss7-200 node-exporter]# docker pull google/cadvisor:v0.28.3
v0.28.3: Pulling from google/cadvisor
ab7e51e37a18: Pull complete
a2dc2f1bce51: Pull complete
3b017de60d4f: Pull complete
Digest: sha256:9e347affc725efd3bfe95aa69362cf833aa810f84e6cb9eed1cb65c35216632a
Status: Downloaded newer image for google/cadvisor:v0.28.3
docker.io/google/cadvisor:v0.28.3
[root@hdss7-200 node-exporter]#
[root@hdss7-200 node-exporter]#
[root@hdss7-200 node-exporter]# docker images|grep google/cadvisor
google/cadvisor v0.28.3 75f88e3ec333 2 years ago 62.2MB
[root@hdss7-200 node-exporter]# docker tag 75f88e3ec333 harbor.od.com/public/cadvisor:v0.28.3
[root@hdss7-200 node-exporter]# docker push harbor.od.com/public/cadvisor:v0.28.3
The push refers to repository [harbor.od.com/public/cadvisor]
f60e27acaccf: Pushed
f04a25da66bf: Pushed
52a5560f4ca0: Pushed
v0.28.3: digest: sha256:34d9d683086d7f3b9bbdab0d1df4518b230448896fa823f7a6cf75f66d64ebe1 size: 951
修改所有运算节点软连接:
mount -o remount,rw /sys/fs/cgroup/
ln -s /sys/fs/cgroup/cpu,cpuacct /sys/fs/cgroup/cpuacct,cpu
应用资源配置清单:
[root@hdss7-22 ~]# kubectl apply -f http://k8s-yaml.od.com/cadvisor/ds.yaml
daemonset.apps/cadvisor created
[root@hdss7-22 ~]# kubectl -n kube-system get pod
NAME READY STATUS RESTARTS AGE
cadvisor-cdtzg 1/1 Running 0 12s
cadvisor-pbmb7 1/1 Running 0 12s
coredns-6b6c4f9648-rrgfx 1/1 Running 0 82m
kube-state-metrics-8669f776c6-gb6nd 1/1 Running 0 33m
kubernetes-dashboard-76dcdb4677-c847s 1/1 Running 0 71m
node-exporter-gn2hb 1/1 Running 0 22m
node-exporter-nzww6 1/1 Running 0 22m
traefik-ingress-h2jpc 1/1 Running 0 2d1h
traefik-ingress-k5hgk 1/1 Running 0 2d1h
来源:oschina
链接:https://my.oschina.net/u/4408413/blog/3221306