1 - 案例解析

1.1 - 001创建一个名称为nginx的pod

# 通过命令创建一个pod容器
kubectl run nginx --image=docker.io/library/nginx:1.21.6
pod/nginx created

# 查看default命名空间下的pod
kubectl get pod -o wide
    NAME    READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
    nginx   1/1     Running   0          17s   172.20.177.22   k8s-192-168-0-19   <none>           <none>

# 访问pod
curl 172.20.177.22
<!DOCTYPE html>
    <html>
    <head>
    <title>Welcome to nginx!</title>
    <style>
    html { color-scheme: light dark; }
    body { width: 35em; margin: 0 auto;
    font-family: Tahoma, Verdana, Arial, sans-serif; }
    </style>
    </head>
    <body>
    <h1>Welcome to nginx!</h1>
    <p>If you see this page, the nginx web server is successfully installed and
    working. Further configuration is required.</p>

    <p>For online documentation and support please refer to
    <a href="http://nginx.org/">nginx.org</a>.<br/>
    Commercial support is available at
    <a href="http://nginx.com/">nginx.com</a>.</p>

    <p><em>Thank you for using nginx.</em></p>
    </body>
    </html>

1.2 - 002进入名为nginx的pod

本案例基于案例001

kubectl -it exec nginx -- bash
    root@nginx:/# echo 'Hello K8S' > /usr/share/nginx/html/index.html
    root@nginx:/# exit
    exit

curl 172.20.177.22
    Hello K8S

1.3 - 003创建一个名称为nginx的deployment

本案例基于案例001

kubectl create deployment nginx --image=docker.io/library/nginx:1.21.6
    deployment.apps/nginx created

kubectl get deployment -w
    NAME    READY   UP-TO-DATE   AVAILABLE   AGE
    nginx   0/1     1            0           15s
    nginx   1/1     1            1           15s

kubectl get pod -o wide
    NAME                     READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
    # 注意这里pod名称分段 784757bdfb为rs的hash
    nginx-784757bdfb-z6gd6   1/1     Running   0          45s   172.20.177.24   k8s-192-168-0-19   <none>           <none>

kubectl scale deployment nginx --replicas=2
    deployment.apps/nginx scaled

kubectl get deployment -w
    NAME    READY   UP-TO-DATE   AVAILABLE   AGE
    nginx   1/2     2            1           70s
    nginx   2/2     2            2           75s

kubectl get pod -o wide
    NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
    nginx-784757bdfb-2q58h   1/1     Running   0          25s   172.20.182.149   k8s-192-168-0-11   <none>           <none>
    nginx-784757bdfb-z6gd6   1/1     Running   0          85s   172.20.177.24    k8s-192-168-0-19   <none>           <none>

1.4 - 004模拟一次deployment的上线发布回滚

创建一个名为nginx的deployment初始副本为2,然后修改nginx的镜像tag,最后在回滚到之前的版本
root@k8s-192-168-0-17:/home/node1# kubectl create deployment nginx --image=docker.io/library/nginx:1.21.6 --replicas=2
deployment.apps/nginx created
root@k8s-192-168-0-17:/home/node1# kubectl get deployment nginx -o wide -w
NAME    READY   UP-TO-DATE   AVAILABLE   AGE   CONTAINERS   IMAGES                           SELECTOR
nginx   2/2     2            2           22s   nginx        docker.io/library/nginx:1.21.6   app=nginx
root@k8s-192-168-0-17:/home/node1# kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
nginx-784757bdfb-bs4rt   1/1     Running   0          42s   172.20.182.150   k8s-192-168-0-11   <none>           <none>
nginx-784757bdfb-jjmzv   1/1     Running   0          43s   172.20.177.25    k8s-192-168-0-19   <none>           <none>
# 注意这里的版本号
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.150/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.21.6</center>
</body>
</html>
root@k8s-192-168-0-17:/home/node1# kubectl set image deployment/nginx  nginx=docker.io/library/nginx:1.25.1
deployment.apps/nginx image updated
root@k8s-192-168-0-17:/home/node1# kubectl annotate deployment/nginx kubernetes.io/change-cause="image updated to 1.25.1"
deployment.apps/nginx annotated
root@k8s-192-168-0-17:/home/node1# kubectl rollout history deployment nginx
deployment.apps/nginx 
REVISION  CHANGE-CAUSE
1         <none>
2         image updated to 1.25.1
# 注意这里的版本号 已经换成 1.25.1 了
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.153/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.25.1</center>
</body>
</html>
root@k8s-192-168-0-17:/home/node1# kubectl set image deployments/nginx nginx=nginx:1.21.6
deployment.apps/nginx image updated
root@k8s-192-168-0-17:/home/node1# kubectl annotate deployment/nginx kubernetes.io/change-cause="image updated to 1.21.6"
deployment.apps/nginx annotated
root@k8s-192-168-0-17:/home/node1# kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
nginx-796bdc6f77-66hcm   1/1     Running   0          35s   172.20.182.154   k8s-192-168-0-11   <none>           <none>
nginx-796bdc6f77-n5wng   1/1     Running   0          49s   172.20.177.29    k8s-192-168-0-19   <none>           <none>
# 注意这里的版本号 已经换成 1.21.6 了
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.154/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.21.6</center>
</body>
</html>

## 假设我们这次升级版本出现了问题 那么我们查看历史,我们要回滚到1.25.1
root@k8s-192-168-0-17:/home/node1# kubectl rollout history deployment nginx
deployment.apps/nginx 
REVISION  CHANGE-CAUSE
1         <none>
2         image updated to 1.25.1
3         image updated to 1.21.6
# --to-revision=2 就是上一步的索引
root@k8s-192-168-0-17:/home/node1# kubectl rollout undo deployment nginx --to-revision=2
deployment.apps/nginx rolled back
# 这里版本号已经回滚到1.25.1
root@k8s-192-168-0-17:/home/node1# curl 172.20.177.30/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.25.1</center>
</body>
</html>
# 我们再查询rs
root@k8s-192-168-0-17:/home/node1# kubectl get rs
NAME               DESIRED   CURRENT   READY   AGE
nginx-784757bdfb   0         0         0       19m
nginx-796bdc6f77   0         0         0       16m
nginx-79df7c55d7   2         2         2       19m

1.5 - 005手动创建一个一定会发生故障推出的pod并跟踪这个pod

```shell
root@k8s-192-168-0-17:~# kubectl run  busybox --image=busybox --dry-run=client -o yaml > testHealthz.yaml
root@k8s-192-168-0-17:~# vim testHealthz.yaml

apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
    run: busybox
name: busybox
spec:
containers:
- image: busybox
    name: busybox
    resources: {}
    # 添加启动参数模拟启动后10s就以返回码1退出
    args:
    - /bin/sh
    - -c
    - sleep 10; exit 1
dnsPolicy: ClusterFirst
# 将默认的Always 改成 OnFailure
restartPolicy: OnFailure
status: {}

root@k8s-192-168-0-17:~# kubectl apply -f testHealthz.yaml
pod/busybox created

root@k8s-192-168-0-17:~# kubectl  get pod -o wide -w
NAME                     READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
busybox                  1/1     Running   0          22s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
nginx-796bdc6f77-7r5ts   1/1     Running   0          10m   172.20.177.31   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error     0          32s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running   1 (8s ago)   38s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error     1 (19s ago)   49s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   1 (14s ago)   61s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            2 (19s ago)   66s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              2 (30s ago)   77s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   2 (13s ago)   89s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            3 (34s ago)   110s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              3 (44s ago)   2m     172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   3 (16s ago)   2m14s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            4 (48s ago)   2m46s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              4 (58s ago)   2m56s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   4 (15s ago)   3m10s   172.20.177.32   k8s-192-168-0-19   <none>           <none>

# 一直再running error CrashLoopBackOff 并且kubelet会以指数级的退避延迟(10s,20s,40s等)重新启动它们,上限为5分钟
```

1.6 - 005手动创建一个一定会发生故障推出的pod并跟踪这个pod

部署一个mytest的 Deployment 副本数量为10,之后模拟一次发版导致了失败,我们用Readiness来保证不健康的pod不被请求
1. 先准备两个Deployment配置

```yaml
# cat myapp-v1.yaml 是可以通过健康检查

apiVersion: apps/v1
kind: Deployment
metadata:
name: mytest
spec:
replicas: 10     # 这里准备10个数量的pod
selector:
    matchLabels:
    app: mytest
template:
    metadata:
    labels:
        app: mytest
    spec:
    containers:
    - name: mytest
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - sleep 10; touch /tmp/healthy; sleep 30000
        readinessProbe:
        exec:
            command:
            - cat
            - /tmp/healthy
        initialDelaySeconds: 10
        periodSeconds: 5

# cat myapp-v2.yaml v2是不能通过检测的 模拟升级发版失败

apiVersion: apps/v1
kind: Deployment
metadata:
name: mytest
spec:
strategy:
    rollingUpdate:
    maxSurge: 35%   # 滚动更新的副本总数最大值(以10的基数为例):10 + 10 * 35% = 13.5 --> 14
    maxUnavailable: 35%  # 可用副本数最大值(默认值两个都是25%): 10 - 10 * 35% = 6.5  --> 7
replicas: 10
selector:
    matchLabels:
    app: mytest
template:
    metadata:
    labels:
        app: mytest
    spec:
    containers:
    - name: mytest
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - sleep 30000   # 可见这里并没有生成/tmp/healthy这个文件,所以下面的检测必然失败
        readinessProbe:
        exec:
            command:
            - cat
            - /tmp/healthy
        initialDelaySeconds: 10
        periodSeconds: 5

```

2. 启动myapp-v1.yaml

```shell
kubectl apply -f myapp-v1.yaml
# 别忘了加备注
kubectl annotate deployment/mytest kubernetes.io/change-cause="kubectl apply --filename=myapp-v1.yaml"
# 过一会就会看到pod状态为Running
root@k8s-192-168-0-17:~# kubectl get pod -o wide 
NAME                      READY   STATUS    RESTARTS   AGE    IP               NODE               NOMINATED NODE   READINESS GATES
mytest-59887f89f5-fq6hv   1/1     Running   0          112s   172.20.182.159   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-gpsnx   1/1     Running   0          113s   172.20.182.157   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-gwkmg   1/1     Running   0          113s   172.20.177.33    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-ltdw9   1/1     Running   0          115s   172.20.182.156   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-m4vkn   1/1     Running   0          112s   172.20.177.37    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-m9z2t   1/1     Running   0          112s   172.20.182.160   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-mq9n6   1/1     Running   0          113s   172.20.177.35    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-nwsc9   1/1     Running   0          115s   172.20.177.34    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-pzm68   1/1     Running   0          115s   172.20.177.36    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-qd74c   1/1     Running   0          113s   172.20.182.158   k8s-192-168-0-11   <none>           <none>
```

3. 启动myapp-v2.yaml

```shell
kubectl apply -f myapp-v2.yaml
# 别忘了加备注
kubectl annotate deployment/mytest kubernetes.io/change-cause="kubectl apply --filename=myapp-v2.yaml"
# 过一会查看deployment 输出结果 会稳定在以下结果
root@k8s-192-168-0-17:~# kubectl get deployment mytest
NAME     READY   UP-TO-DATE   AVAILABLE   AGE
mytest   7/10    7            7           3m43s
# READY 现在正在运行的只有7个pod
# UP-TO-DATE 表示当前已经完成更新的副本数:即 7 个新副本
# AVAILABLE 表示当前处于 READY 状态的副本数

# 查看pod
root@k8s-192-168-0-17:~# kubectl get pod
NAME                      READY   STATUS    RESTARTS   AGE
mytest-59887f89f5-fq6hv   1/1     Running   0          5m9s
mytest-59887f89f5-gpsnx   1/1     Running   0          5m10s
mytest-59887f89f5-gwkmg   1/1     Running   0          5m10s
mytest-59887f89f5-ltdw9   1/1     Running   0          5m12s
mytest-59887f89f5-m9z2t   1/1     Running   0          5m9s
mytest-59887f89f5-pzm68   1/1     Running   0          5m12s
mytest-59887f89f5-qd74c   1/1     Running   0          5m10s
mytest-8586c6547d-6sqwt   0/1     Running   0          2m19s
mytest-8586c6547d-b9kql   0/1     Running   0          2m20s
mytest-8586c6547d-cgkrj   0/1     Running   0          2m7s
mytest-8586c6547d-dw6kv   0/1     Running   0          2m18s
mytest-8586c6547d-ht4dq   0/1     Running   0          2m19s
mytest-8586c6547d-v7rh9   0/1     Running   0          2m8s
mytest-8586c6547d-vqn6w   0/1     Running   0          2m7s

# 查看deployment的信息
root@k8s-192-168-0-17:~# kubectl describe deployment mytest
...
Replicas:               10 desired | 7 updated | 14 total | 7 available | 7 unavailable
...
Events:
Type    Reason             Age    From                   Message
----    ------             ----   ----                   -------
Normal  ScalingReplicaSet  5m46s  deployment-controller  Scaled up replica set mytest-59887f89f5 from 0 to 10
Normal  ScalingReplicaSet  2m52s  deployment-controller  Scaled up replica set mytest-8586c6547d from 0 to 4
Normal  ScalingReplicaSet  2m50s  deployment-controller  Scaled down replica set mytest-59887f89f5 from 10 to 7
Normal  ScalingReplicaSet  2m45s  deployment-controller  Scaled up replica set mytest-8586c6547d from 4 to 7
```

4. 如此我们保证了集群中有7个可用的pod

下面来解析一下整个过程

maxSurge:

规定了滚动更新过程中pod副本数可以超过总副本数的上限。配置项可以是一个具体的数字也可以是一个比例,如果是比例则会向上取整

我们的例子副本总数是10 maxSurge: 35% 所以 10 + 10 * 35% = 13.5 --> 14

所以对mytest这个deployment的副本描述Replicas: 10 desired | 7 updated | 14 total | 7 available | 7 unavailable

10个目标值 7个已经更新 14为最大值 7个可用 7个不可用

maxUnavailable:

控制滚动过程中最大pod不可用数量。同样可以是一个数字也可以是一个比例。如果是比例则向上取整

我们例子中 maxUnavailable:35%  所以  10 - 10 * 35% = 6.5 --> 7

我们本次滚动更新的完整过程为

1) 根据maxSurge得到最大副本数14 所以 先创建4个新版本的pod副本,使副本总数达到14

2) 然后根据maxUnavailable 的到最大不可用数量为7 14-7(最大不可用数)=7(最小可用数) 所以销毁3个旧版本的pod 

3) 3个旧版本pod销毁完成之后,再创建3个新版本pod使总副本数保持14

4) 当新版本pod通过Readiness检测后,会使可用pod副本超过7个

5) 再销毁更多旧pod使可用副本保持7个。

6) 随着旧pod销毁,新pod会自动创建,使副本数保持14

7) 依此类推一直到全部更新完成。

我们的实际情况在第4步卡住了。新的pod无法通过Readiness的检测。

此时在实际生产环境中我们需要rollout undo 来回滚上一个版本保证集群整体

```shell
root@k8s-192-168-0-17:~# kubectl rollout history deployment mytest
deployment.apps/mytest 
REVISION  CHANGE-CAUSE
1         kubectl apply --filename=myapp-v1.yaml
2         kubectl apply --filename=myapp-v2.yaml

root@k8s-192-168-0-17:~# kubectl rollout undo deployment mytest --to-revision=1
deployment.apps/mytest rolled back

# 然后 观察全局pod的变化过程
kubectl get pod -w
```

1.7 - 007在k8s集群内使用nameServer进行网络访问

1. 准备一个svc的yaml配置
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: null
  labels:
    app: web
  name: web
spec:
  ports:
  - port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: web
status:
  loadBalancer: {}

---

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: web
  name: web
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: web
  template:
    metadata:
      labels:
        app: web
    spec:
      containers:
      - image: nginx:1.21.6
        name: nginx
  1. 启动一个工具pod容器并验证
kubectl run -it --rm busybox --image=registry.cn-shanghai.aliyuncs.com/acs/busybox:v1.29.2 -- sh
# --rm的意思是当推出pod容器 sh 时, pod容器会自动删除

/ # wget -q -O- http://web
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>

<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>

<p><em>Thank you for using nginx.</em></p>
</body>
</html>

1.8 - 008将外部服务纳入到k8s集群网络

案例使用python开放一个http服务,并将其纳入到k8s集群网络
  1. 先启动一个非k8s集群的服务
# 在任意节点启动一个http服务这里用python3
node1@k8s-192-168-0-17:~$ python3 -m http.server 8088 # 这里启动了8088端口
  1. 创建一个svc的yaml
# 注意我这里把两个资源的yaml写在一个文件内,在实际生产中,我们经常会这么做,方便对一个服务的所有资源进行统一管理,不同资源之间用"---"来分隔
apiVersion: v1
kind: Service
metadata:
  name: myhttp
spec:
  ports:
  - name: http-port
    port: 3306         # Service 暴露端口 3306
    protocol: TCP
  type: ClusterIP      # 仅集群内访问

---

apiVersion: discovery.k8s.io/v1
kind: EndpointSlice
metadata:
  name: myhttp-slice # 指定这个endpointslice的名称
  labels:
    kubernetes.io/service-name: myhttp  # 必须关联Service 关联了哪个svc
addressType: IPv4
ports:
- name: http-port     # 与Service端口名一致
  port: 8088           # 外部服务实际端口  这样旧相当于把8088给了 myhttp 这个svc中名称为http-port的port
  protocol: TCP
endpoints:
- addresses:
  - "192.168.0.17"     # 外部http服务IP
  conditions:
    ready: true         # 标记端点可用
  1. 验证
node1@k8s-192-168-0-17:~$ sudo kubectl get svc
NAME         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)        AGE
kubernetes   ClusterIP   10.68.0.1       <none>        443/TCP        5d2h
myhttp       ClusterIP   10.68.48.233    <none>        3306/TCP       9s
new-nginx    NodePort    10.68.194.158   <none>        81:30759/TCP   4h19m
node1@k8s-192-168-0-17:~$ curl 10.68.48.233:3306
<!DOCTYPE HTML>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Directory listing for /</title>
</head>
<body>
<h1>Directory listing for /</h1>
<hr>
<ul>
<li><a href=".ansible/">.ansible/</a></li>
<li><a href=".bash_history">.bash_history</a></li>
<li><a href=".bash_logout">.bash_logout</a></li>
<li><a href=".bashrc">.bashrc</a></li>
<li><a href=".cache/">.cache/</a></li>
<li><a href=".profile">.profile</a></li>
<li><a href=".ssh/">.ssh/</a></li>
<li><a href=".sudo_as_admin_successful">.sudo_as_admin_successful</a></li>
<li><a href=".viminfo">.viminfo</a></li>
<li><a href=".Xauthority">.Xauthority</a></li>
<li><a href="httpproxy.yaml">httpproxy.yaml</a></li>
<li><a href="nginx-svc.yaml">nginx-svc.yaml</a></li>
<li><a href="nginx.yaml">nginx.yaml</a></li>
<li><a href="planet">planet</a></li>
<li><a href="ubuntu-install-k8s/">ubuntu-install-k8s/</a></li>
</ul>
<hr>
</body>
</html>
  1. 备注

这里svc使用的是ClusterIP 如果使用了NodePort

sudo kubectl patch svc myhttp -p ‘{“spec”:{“type”:“NodePort”}}’

那么同样可以 通过集群内任意ip进行访问这里就不重复演示了

1.9 - 009让pod只在具有指定标签的节点上运行

1. 创建deployment的yaml
```yaml
# 修改好yaml配置
apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: nginx
  name: nginx
spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: nginx
    spec:
      containers:
      - image: nginx
        name: nginx
        resources: {}
      nodeSelector:           # <--- 这里
        apps/nginx: "true"    # <--- 基于这个label来选择
status: {}
```

2. 应用这个配置

```shell
kubectl apply -f node-selector.yaml
```

3. 查看pod, 这时没有节点具有 apps/nginx=true

```shell
node1@k8s-192-168-0-17:~$ sudo kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE    IP       NODE     NOMINATED NODE   READINESS GATES
nginx-756c69b65f-7vfv5   0/1     Pending   0          2m5s   <none>   <none>   <none>           <none>
nginx-756c69b65f-8gl9m   0/1     Pending   0          2m4s   <none>   <none>   <none>           <none>
```

4. 节点打label

```shell
# 先尝试给主节点打label
kubectl label node k8s-192-168-0-17 apps/nginx=true
kubectl get node k8s-192-168-0-17 --show-labels 
NAME               STATUS                     ROLES    AGE    VERSION   LABELS
k8s-192-168-0-17   Ready,SchedulingDisabled   master   5d3h   v1.33.1   apps/nginx=true,beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-192-168-0-17,kubernetes.io/os=linux,kubernetes.io/role=master
```

可以看到虽然给17主节点打了标签,但是还是无法调度因为主节点状态是SchedulingDisabled的, 这个状态优先级更高

```shell
# 尝试给worker节点打label
kubectl label node k8s-192-168-0-19 apps/nginx=true
kubectl get node k8s-192-168-0-19 --show-labels 
NAME               STATUS   ROLES   AGE    VERSION   LABELS
k8s-192-168-0-19   Ready    node    5d3h   v1.33.1   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-192-168-0-19,kubernetes.io/os=linux,kubernetes.io/role=node
# 再查看pod, 马上从pending到ContainerCreating又转到了running 并且node都在19这个节点上,如果没有节点标签。pod是基本平均分布的
kubectl get pod
NAME                     READY   STATUS              RESTARTS   AGE
nginx-756c69b65f-7vfv5   0/1     ContainerCreating   0          5m38s
nginx-756c69b65f-8gl9m   0/1     ContainerCreating   0          5m37s
kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE     IP              NODE               NOMINATED NODE   READINESS GATES
nginx-756c69b65f-7vfv5   1/1     Running   0          7m22s   172.20.177.63   k8s-192-168-0-19   <none>           <none>
nginx-756c69b65f-8gl9m   1/1     Running   0          7m21s   172.20.177.62   k8s-192-168-0-19   <none>           <none>
```

1.10 - 010部署ingress-nginx-controller

基于这个ingress-nginx-controller 创建一个nginx应用 然后再配置https访问

打开yaml文件

必看:

  1. 在大规模生产集群上,ingree-nginx 独占一台节点,他就只跑 ingree-nginx 不要再跑其他pod了

  2. kind: ConfigMap 段落的data.worker-processes = 实际服务器ingress-nginx-controller 所在的pod的那个节点的服务器的cpu核数(最好比实际核心数-1)

  3. kind: ConfigMap 段落的data.worker-cpu-affinity 目前配置是空, 留空就行

  4. kind: DaemonSet 如果是自建集群使用DaemonSet类型的控制器。 他会把容器端口映射到宿主机上这样就不用再使用NodePort映射了如果是是云上比如阿里云的ack 集群,使用Deployment类型的控制器,因为ack的pod使用的是云主机的弹性网卡他可以和你的云主机在同一个网络(网段)所以在这一段的内容中默认用了kind: DaemonSet 如果要用kind: Deployment 那么需要检查 “Deployment need” 和 “DaemonSet need"跟随的一些配置项

  5. 基于kind: DaemonSet|Deployment的resources(资源配置)如果limits分配的资源和requests分配的资源是一致的,那么这个pod在k8s集群中的优先级是最高的。当我们集群资源不够时, k8s会驱逐一些优先级低的pod。保证高优先级

  6. 如果日志报错提示 “mount: mounting rw on /proc/sys failed: Permission denied”, 那么就打开 privileged: true、procMount: Default、runAsUser: 0 这三条注释的内容,如果不报错就不用管他

  7. 给对应节点打标签

      nodeSelector:
        boge/ingress-controller-ready: "true"

打标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready=true
查看标签的方法 kubectl get node –show-labels
删除标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready-

  1. 基于ingress-nginx独立一台节点部署的情况。 给这个节点打上标签后。最好再给这个节点标记上污点
    打污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready=“true”:NoExecute
    去掉污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready:NoExecute-
    如果给节点打上了污点需要把下面这段注释打开,
    tolerations:
    - effect: NoExecute # effect: NoExecute:表示节点污点的驱逐效果,会驱逐已运行但不耐受的Pod
    key: boge/ingress-controller-ready
    operator: Equal # 要求value必须完全匹配(若为Exists则只需key存在)
    value: "true"

他的作用是Kubernetes中Pod的容忍度(Toleration)定义,用于控制Pod能否调度到带有特定污点(Taint)的节点上
所以上面这段的配置含义是

  • 允许Pod被调度到带有boge/ingress-controller-ready=true:NoExecute污点的节点上,确保它们只运行在特定节点。

基于ingress-nginx-controller 创建一个nginx应用 然后再配置https访问

新建nginx.yaml

---
kind: Service
apiVersion: v1
metadata:
  name: new-nginx
spec:
  selector:
    app: new-nginx
  ports:
    - name: http-port
      port: 80
      protocol: TCP
      targetPort: 80

---
# 新版本k8s的ingress配置
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: new-nginx
  annotations:
    #kubernetes.io/ingress.class: "nginx" 这是被放弃的api方式
    nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
    nginx.ingress.kubernetes.io/whitelist-source-range: 0.0.0.0/0
    nginx.ingress.kubernetes.io/configuration-snippet: |
      if ($host != 'www.boge.com' ) {
        rewrite ^ http://www.boge.com$request_uri permanent;
      }
spec:
  ingressClassName: nginx-master
  rules:
    - host: boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
    - host: m.boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
    - host: www.boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
#  tls:
#      - hosts:
#          - boge.com
#          - m.boge.com
#          - www.boge.com
#        secretName: boge-com-tls

# tls secret create command:
#   kubectl -n <namespace> create secret tls boge-com-tls --key boge-com.key --cert boge-com.csr

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: new-nginx
  labels:
    app: new-nginx
spec:
  replicas: 3  # 数量可以根据NODE节点数量来定义
  selector:
    matchLabels:
      app: new-nginx
  template:
    metadata:
      labels:
        app: new-nginx
    spec:
      containers:
#--------------------------------------------------
      - name: new-nginx
        image: nginx:1.21.6
        env:
          - name: TZ
            value: Asia/Shanghai
        ports:
        - containerPort: 80
        volumeMounts:
          - name: html-files
            mountPath: "/usr/share/nginx/html"
#--------------------------------------------------
      - name: busybox
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - >
           while :; do
             if [ -f /html/index.html ];then
               echo "[$(date +%F\ %T)] ${MY_POD_NAMESPACE}-${MY_POD_NAME}-${MY_POD_IP}" > /html/index.html
               sleep 1
             else
               touch /html/index.html
             fi
           done
        env:
          - name: TZ
            value: Asia/Shanghai
          - name: MY_POD_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.name
          - name: MY_POD_NAMESPACE
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.namespace
          - name: MY_POD_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.podIP
        volumeMounts:
          - name: html-files
            mountPath: "/html"
          - mountPath: /etc/localtime
            name: tz-config

#--------------------------------------------------
      volumes:
        - name: html-files
          emptyDir:
            medium: Memory
            sizeLimit: 10Mi
        - name: tz-config
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai

---

kubectl apply -f nginx.yaml

kubectl get ingress
NAME        CLASS          HOSTS                              ADDRESS       PORTS   AGE
new-nginx   nginx-master   boge.com,m.boge.com,www.boge.com   10.68.216.0   80      9m51s

kubectl -n kube-system get pod -o wide|grep nginx-ingress
nginx-ingress-controller-6vtl4             1/1     Running     0              3h1m    192.168.0.11     k8s-192-168-0-11   <none>           <none>
nginx-ingress-controller-tg6pq             1/1     Running     0              3h3m    192.168.0.19     k8s-192-168-0-19   <none>           <none>

可以看到pod已经在 11 和19两个节点上运行了

此时我们在集群中的其他节点上修改宿主机的hosts文件,添加配置

192.168.0.19|192.168.0.11 boge.com m.boge.com www.boge.com 都可以

然后用

curl www.boge.com
[2025-07-08 07:09:25] default-new-nginx-6df56b5c4b-hktqc-172.20.177.13

上面已经可以通过域名访问了。然后我们来配置https, 这里就用自签名了

先生成一个私钥

openssl genrsa -out boge.key 2048

再基于key生成tls证书(注意:这里我用的*.boge.com,这是生成泛域名的证书,后面所有新增加的三级域名都是可以用这个证书的)

openssl req -new -x509 -key boge.key -out boge.csr -days 360 -subj /CN=*.boge.com

把证书创建给k8s集群的命名空间中 kubectl -n create secret tls boge-com-tls(这相当于证书的名称) –key boge.key(私钥文件名) –cert boge.csr(证书文件名)

kubectl get secret
NAME           TYPE                DATA   AGE
boge-com-tls   kubernetes.io/tls   2      25m

然后修改nginx.yaml文件

nginx.ingress.kubernetes.io/force-ssl-redirect: "false" # 改成true
rewrite ^ http://www.boge.com$request_uri permanent; # 改成https://
spec:
  tls:
  - hosts:
    - www.boge.com 
    - boge.com
    - m.boge.com
    secretName: boge-com-tls (导入到集群的证书名称)

重新应用nginx.yaml

再在命令行访问http的就会提示301跳转

curl http://www.boge.com
<html>
<head><title>301 Moved Permanently</title></head>
<body>
<center><h1>301 Moved Permanently</h1></center>
<hr><center>nginx</center>
</body>
</html>

所以此时我们用chrome浏览器 访问http://www.boge.com 就会跳转到https://www.boge.com 由于是自签名证书所以是不安全的,直接继续就好了

1.11 - 011基于010实现一次灰度发布

实现一次灰度发布,实现将50%的流量打到旧的nginx 50打到新的nginx
kubectl create deployment old-nginx --image=nginx:1.21.6 --replicas=1
deployment.apps/old-nginx created

kubectl expose deployment old-nginx --port=80 --target-port=80
service/old-nginx exposed

# 修改nginx.yaml
# 在kind: Ingress的matadata.annotations 中添加如下内容
nginx.ingress.kubernetes.io/service-weight: |
    new-nginx: 50, old-nginx: 50

在 spec.rules 中 host: www.boge.com 的部分 http.paths 中添加如下内容 
          - backend:
              service:
                name: old-nginx  # 老版本服务
                port:
                  number: 80
            path: /
            pathType: Prefix

# 最终重新应用 nginx.yaml

kubectl apply -f nginx.yaml 
service/new-nginx unchanged
ingress.networking.k8s.io/new-nginx configured
deployment.apps/new-nginx unchanged

再通过浏览器访问 https://www.boge.com/ 就会发现几乎是一半打到new-nginx上一般打到old-nginx上

1.12 - 012HPA自动水平伸缩pod.md

  1. 先准备一个svc 和 deployment
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: null
  labels:
    app: web
  name: web
spec:
  ports:
  - port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: web
status:
  loadBalancer: {}

---
    
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: web
  name: web
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: web
  template:
    metadata:
      labels:
        app: web
    spec:
      containers:
      - image: nginx:1.21.6
        name: nginx
        resources:
          limits:   # 因为我这里是测试环境,所以这里CPU只分配50毫核(0.05核CPU)和20M的内存
            cpu: "50m"
            memory: 20Mi
          requests: # 保证这个pod初始就能分配这么多资源
            cpu: "50m"
            memory: 20Mi
  1. 创建一个hpa
# autoscale 表示自动伸缩
# web 是hpa的名称
# --max=3 表示最大扩容数量为3
# --min=1 表示最小扩容数量为1   
# --cpu-percent=50 表示当CPU使用率超过50%时扩容
kubectl  autoscale deployment web --max=3 --min=1 --cpu-percent=30
kubectl get hpa -w
  1. 再启动一个终端 启动一个临时pod
kubectl run -it --rm busybox --image=registry.cn-shanghai.aliyuncs.com/acs/busybox:v1.29.2 -- sh
/ # while :;do wget -q -O- http://web;done
  1. 回到前一个终端
# 查看 kubectl get hpa -w 的输出
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         1          30s
web    Deployment/web   cpu: 58%/30%   1         3         1          107s
web    Deployment/web   cpu: 100%/30%   1         3         2          2m4s
web    Deployment/web   cpu: 100%/30%   1         3         3          2m22s
web    Deployment/web   cpu: 95%/30%    1         3         3          2m35s

# 至此可以推出跟踪

# 查看hpa web 的描述
kubectl describe hpa web

Name:                                                  web
Namespace:                                             default
Labels:                                                <none>
Annotations:                                           <none>
CreationTimestamp:                                     Thu, 10 Jul 2025 16:58:31 +0800
Reference:                                             Deployment/web
Metrics:                                               ( current / target )
  resource cpu on pods  (as a percentage of request):  76% (38m) / 30%
Min replicas:                                          1
Max replicas:                                          3
Deployment pods:                                       3 current / 3 desired
Conditions:
  Type            Status  Reason               Message
  ----            ------  ------               -------
  AbleToScale     True    ScaleDownStabilized  recent recommendations were higher than current one, applying the highest recent recommendation
  ScalingActive   True    ValidMetricFound     the HPA was able to successfully calculate a replica count from cpu resource utilization (percentage of request)
  ScalingLimited  True    TooManyReplicas      the desired replica count is more than the maximum replica count
Events:
  Type    Reason             Age   From                       Message
  ----    ------             ----  ----                       -------
  Normal  SuccessfulRescale  101s  horizontal-pod-autoscaler  New size: 2; reason: cpu resource utilization (percentage of request) above target
  Normal  SuccessfulRescale  84s   horizontal-pod-autoscaler  New size: 3; reason: cpu resource utilization (percentage of request) above target
  1. 停掉临时pod中的死循环并监听hpa的变化(这个收缩大概时需要在停止临时pod五分钟后才有效)
kubectl get hpa -w
NAME   REFERENCE        TARGETS        MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 68%/30%   1         3         3          5m47s
web    Deployment/web   cpu: 83%/30%   1         3         3          5m54s
web    Deployment/web   cpu: 68%/30%   1         3         3          6m9s
web    Deployment/web   cpu: 0%/30%    1         3         3          6m24s (6分24s降为0)

kubectl get hpa -w
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         3          9m45s

kubectl get hpa -w
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         3          11m
web    Deployment/web   cpu: 0%/30%   1         3         3          11m
web    Deployment/web   cpu: 0%/30%   1         3         1          11m (11分收缩到1)

2 - K8S案例解析

helm的官方网站: https://helm.sh/zh/

helm是一个包管理器,用于Kubernetes,它可以将Helm Chart安装到Kubernetes集群中。Helm Chart是一个可重复使用的软件包,包含所有必需的资源定义,如Deployment、Service、Ingress、Secret、ConfigMap等。

helm相对k8s来说就像 apt 对Ubuntu yum对CentOS一样。

安装直接看官方文档

这里把命令补全的方式: https://helm.sh/zh/docs/helm/helm_completion/

针对bash 环境

https://helm.sh/zh/docs/helm/helm_completion_bash/ 贴出来

为当前会话提供命令补全

source <(helm completion bash)

为每个新的会话加载命令补全 (其他系统看文档)

Linux: helm completion bash | sudo tee /etc/bash_completion.d/helm

此处提供一个阿里云开发者中心的文档链接 https://developer.aliyun.com/article/1473220

命令演示

# 添加存储库 我这里添加了多个存储库
 
[root@master01 linux-amd64]# helm repo add stable http://mirror.azure.cn/kubernetes/charts
"stable" has been added to your repositories
 
[root@master01 linux-amd64]# helm repo add aliyun https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
"aliyun" has been added to your repositories
 
# 更新所有仓库
 
[root@master01 linux-amd64]# helm repo update
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "aliyun" chart repository
...Successfully got an update from the "stable" chart repository
Update Complete. ⎈Happy Helming!⎈
 
# 列出所有已配置的Helm仓库
 
[root@master01 linux-amd64]# helm repo list
NAME    URL
stable  http://mirror.azure.cn/kubernetes/charts
aliyun  https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
 
 
# 搜索Helm仓库中可部署的Chart列表,Redis 为例
# 显示出了 两个仓库的 Chart 包
 
[root@master01 linux-amd64]# helm search repo redis
NAME                                    CHART VERSION   APP VERSION     DESCRIPTION
aliyun/redis                            1.1.15          4.0.8           Open source, advanced key-value store. It is of...
aliyun/redis-ha                         2.0.1                           Highly available Redis cluster with multiple se...
stable/prometheus-redis-exporter        3.5.1           1.3.4           DEPRECATED Prometheus exporter for Redis metrics
stable/redis                            10.5.7          5.0.7           DEPRECATED Open source, advanced key-value stor...
stable/redis-ha                         4.4.6           5.0.6           DEPRECATED - Highly available Kubernetes implem...
aliyun/sensu                            0.2.0                           Sensu monitoring framework backed by the Redis ...
stable/sensu                            0.2.5           0.28            DEPRECATED Sensu monitoring framework backed by...
 
# 删除仓库
helm repo remove aliyun

3 - K8S应用笔记

K8S资源

  1. Namespace(命名空间):

    k8s通过命名空间实现资源隔离, 简称为ns

    我们在获取任何资源时,如果不明确指定命名空间那么会缺省的使用default命名空间

  2. Pod:

    K8s的最小运行单元(不太恰当的比喻,一个pod就详单与用docker启动了一个容器)

  3. Controller(控制器):

    一般来说生产环境都是通过各种Controller来管理pod的生命周期。在控制器中每一个副本都是一个pod 通过控制副本数来控制pod的创建和销毁

    常用的Controller有Deployment、DaemonSet、StatefulSet、Job、CronJob

    最常用的是 Deployment(无状态应用)

  4. replicas:副本数

    Deployment通过replicas属性来指定副本数 简称rs

  5. service: 不论单独创建的pod 还是通过deployment创建的pod 都是一个在k8s集群中的资源,他们的访问只能通过集群内ip访问,不能通过外网访问 所以需要通过service来暴露pod访问

    service常用的四种网络

    1. ClusterIP:集群内部访问,分配了一个稳定的虚拟ip。
    2. NodePort:集群外部访问,会在每个node节点上分配固定的端口(端口范围30000-32767),流量可以从外部转发到service,
    3. LoadBalancer:集群外部访问,主要在云环境中使用。他为service创建一个外部的负载均衡器,可以通过云环境的负载均衡器将流量打到service上
    4. ExternalName:通过返回一个CNAME记录,可以将服务映射到集群外部的服务。
  6. endpoints(v1 Endpoints is deprecated in v1.33+; use discovery.k8s.io/v1 EndpointSlice): 1.33以后被弃用

  7. EndpointSlice: 用于描述Service的实际访问点。它包含了提供服务的Pod的IP地址和端口信息。

    这些信息是Kubernetes实现服务发现和流量分发的关键依据

增(创建)

  1. 新增命名空间

    
    kubectl create ns ${nsName}
    
    #新建一个deployment
    #                         应用名             镜像地址                               副本数
    
  2. 创建(运行)一个pod

    kubectl run ${podName} --image=${镜像名称}:${镜像tag}
    
  3. 心中一个deployment 应用

    kubectl create deployment ${deploymentName} --image=${镜像名称}:${镜像tag} --replicas=${副本数}
    
  4. 创建一个deployment的配置文件

    kubectl create deployment nginx --image=nginx --dry-run=client -o yaml > nginx.yaml
    
    apiVersion: apps/v1
    # 资源类型
    kind: Deployment
    # 元数据,其中name是必填项
    metadata:
      creationTimestamp: null
      labels:
        app: nginx
      name: nginx
    # 规格说明
    spec:
      # 副本数
      replicas: 1
      selector:
        matchLabels:
          app: nginx
      strategy: {}
      # 定义 Pod 的模板,这是配置文件的重要部分
      template:
        # metadata 定义 Pod 的元数据,至少要定义一个 label。label 的 key 和 value 可以任意指定
        metadata:
          creationTimestamp: null
          labels:
            app: nginx
        # spec 描述 Pod 的规格,此部分定义 Pod 中每一个容器的属性,name 和 image 是必需的
        spec:
          containers:
          - image: nginx
            name: nginx
            resources: {}
    status: {}
    
  5. 创建一个service (svc)

    # 创建一个新的service, 将流量从端口5000 映射到 pod 80 端口
    kubectl create service clusterip new-nginx --tcp=5000:80 
    
    # 基于一个已存在的(名为new-nginx的)deployment 创建一个svc 网络模式为NodePort --port 是svc对外端口
    kubectl expose deployment new-nginx --type=NodePort --port=81 --target-port=80 --dry-run=client -o yaml > nginx-svc.yaml
    
    apiVersion: v1       # <<<<<<  v1 是 Service 的 apiVersion
    kind: Service        # <<<<<<  指明当前资源的类型为 Service
    metadata:
      creationTimestamp: null
      labels:
        app: new-nginx
      name: new-nginx       # <<<<<<  Service 的名字为 nginx
    spec:
      ports:
      - port: 81        # <<<<<<  将 Service 的 80 端口映射到 Pod 的 80 端口,使用 TCP 协议
        protocol: TCP
        targetPort: 80
      selector:
        app: new-nginx     # <<<<<<  selector 指明挑选那些 label 为 run: nginx 的 Pod 作为 Service 的后端
    status:
      loadBalancer: {}
    
  6. 给命名空间添加tls证书

kubectl -n <namespace> create secret tls boge-com-tls(这相当于证书的名称) --key boge.key(私钥路径) --cert boge.csr(证书路径)

  1. 通用表达式

    # 删除资源时 资源类型和名称必填
    kubectl delete ${资源类型} ${资源名称}
    

删除命名空间时,命名空间一直terminating

deleteK8sNamespace() {
    set -eo pipefail

    die() { echo "$*" 1>&2 ; exit 1; }

    need() {
            which "$1" &>/dev/null || die "Binary '$1' is missing but required"
    }

    # checking pre-reqs

    need "jq"
    need "curl"
    need "kubectl"

    PROJECT="$1"
    shift

    test -n "$PROJECT" || die "Missing arguments: kill-ns <namespace>"

    kubectl proxy &>/dev/null &
    PROXY_PID=$!
    killproxy () {
            kill $PROXY_PID
    }
    trap killproxy EXIT

    sleep 1 # give the proxy a second

    kubectl get namespace "$PROJECT" -o json | jq 'del(.spec.finalizers[] | select("kubernetes"))' | curl -s -k -H "Content-Type: application/json" -X PUT -o /dev/null --data-binary @- http://localhost:8001/api/v1/namespaces/$PROJECT/finalize && echo "Killed namespace: $PROJECT"

}
deleteK8sNamespace 要被删除的命名空间

  1. 修改deployment pod 的副本数

    kubectl scale deployment nginx --replicas=2
    
  2. 修改deployment 镜像地址

    root@k8s-192-168-0-17:/home/node1# kubectl get deployment -o wide
    NAME    READY   UP-TO-DATE   AVAILABLE   AGE   CONTAINERS   IMAGES                           SELECTOR
    nginx   2/2     2            2           11m   nginx        docker.io/library/nginx:1.25.1   app=nginx
    # ${deploymentName}值得是上面结果中NAME列 ${containerName}为CONTAINERS列的值
    kubectl set image deployments/${deploymentName} ${containerName}=${镜像地址}:${镜像版本}
    # 所以本次修改镜像的命令为
    # 原来 kubectl set image ...... --replicas (--replicas 标志在 Kubernetes v1.18.0 后已弃用)
    # 1. 先修改镜像 
    kubectl set image deployments/nginx nginx=nginx:1.21.6
    # 2. 再添加注释
    kubectl annotate deployment/nginx kubernetes.io/change-cause="image updated to 1.21.6"
    
  3. 回滚

    # 1. 先查询发布历史
    kubectl rollout history deployment ${deploymentName}
    deployment.apps/nginx 
    REVISION  CHANGE-CAUSE
    1         <none>
    2         image updated to 1.25.1
    3         image updated to 1.21.6
    # 2. 根据历史索引进行回滚
    kubectl rollout undo deployment ${deploymentName} --to-revision=2
    
  4. 修改svc的网络模式

    kubectl patch svc ${svcName} -p '{"spec":{"type":"NodePort"}}'
    
  5. 给节点打标签(label)

    #kubectl label node 10.0.1.201 apps/nginx=true
    kubectl label node ${nodeName} ${labelName}=${labelValue}
    kubectl label ${资源名称} ${nodeName} ${labelName}=${labelValue}
    
  6. 给节点删除一个标签(Label)

    kubectl label node ${nodeName} ${labelName}-
    

  1. 返回一种资源类型列表

    ## 返回一种资源类型列表,资源名称选填
    ## 除了查询命名空间意外,其他资源类型都可以添加 -n 参数指定命名空间。
    kubectl (-n ${nsName}) get ${资源类型} (${资源名称}) -o wide
    
  2. 返回多种资源类型列表

    kubectl (-n ${nsName}) get ${资源类型},${资源类型}  -o wide
    

    kubectl describe ${资源类型} ${资源名称}

  3. 进入到某个pod容器内

    kubectl -it exec ${podName} -- bash(一般用bash|sh,也可以执行其他命令)
    
  4. 查看当前某种资源的yaml

    sudo kubectl get ${资源类型} ${资源名称} -o yaml
    
  5. 查看标签(label)

    kubectl get node ${nodeName} --show-labels
    
  6. 查看集群的证书

健康检查

当容器退出时返回码非0 则认为容器发生了故障,k8s会根据restartPolicy策略来执行后续操作

restartPolicy的美剧

Always:总是重启 OnFailure:容器退出时返回码非0,则重启 Never:不重启

k8s的pod可以通过 Liveness 和 Readiness 来检查pod是否健康

  1. 通过Liveness进行健康检查

    apiVersion: v1
    kind: Pod
    metadata:
    labels:
        test: liveness
    name: liveness
    spec:
    restartPolicy: OnFailure
    containers:
    - name: liveness
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        # 容器启动后立即创建/tmp/healthy 文件,30s后 删除
        args:
        - /bin/sh
        - -c
        - touch /tmp/healthy; sleep 30; rm -f /tmp/healthy; sleep 600
        livenessProbe:
        exec:
            command:
            - cat
            - /tmp/healthy
        # 容器启动 10 秒后开始检测
        initialDelaySeconds: 10   # 容器启动 10 秒之后开始检测
        # 每5s检查执行一次 cat /tmp/healthy
        periodSeconds: 5          # 每隔 5 秒再检测一次
    
  2. 通过Readiness进行健康检查

    我们可以通过Readiness检测来告诉K8s什么时候可以将pod加入到服务Service的负载均衡池中,对外提供服务,这个在生产场景服务发布新版本时非常重要,当我们上线的新版本发生程序错误时,Readiness会通过检测发布,从而不导入流量到pod内,将服务的故障控制在内部,在生产场景中,建议这个是必加的,Liveness不加都可以,因为有时候我们需要保留服务出错的现场来查询日志,定位问题,告之开发来修复程序。

    Liveness 检测和 Readiness 检测是两种 Health Check 机制,如果不特意配置,Kubernetes 将对两种检测采取相同的默认行为,即通过判断容器启动进程的返回值是否为零来判断检测是否成功。

    两种检测的配置方法完全一样,支持的配置参数也一样。

    不同之处在于检测失败后的行为:Liveness 检测是重启容器;Readiness 检测则是将容器设置为不可用,不接收 Service 转发的请求。

    Liveness 检测和 Readiness 检测是独立执行的,二者之间没有依赖,所以可以单独使用,也可以同时使用。

    用 Liveness 检测判断容器是否需要重启以实现自愈;用 Readiness 检测判断容器是否已经准备好对外提供服务。

    Readiness 检测的配置语法与 Liveness 检测完全一样

    apiVersion: v1
    kind: Pod
    metadata:
      labels:
        test: readiness
      name: readiness
    spec:
      restartPolicy: OnFailure
      containers:
      - name: readiness
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - touch /tmp/healthy; sleep 30; rm -f /tmp/healthy; sleep 600
        readinessProbe:    # 这里将livenessProbe换成readinessProbe即可,其它配置都一样
          exec:
            command:
            - cat
            - /tmp/healthy
          #initialDelaySeconds: 10   # 容器启动 10 秒之后开始检测
          periodSeconds: 5          # 每隔 5 秒再检测一次
        startupProbe:  # 启动探针,更灵活,完美代替initialDelaySeconds强制等待时间配置,启动时每3秒检测一次,一共检测100次
          exec:
            command:
            - cat
            - /tmp/healthy
          failureThreshold: 100
          periodSeconds: 3
          timeoutSeconds: 1
    

部署一个生产环境下的Ingress-Nginx 控制器

打开yaml文件

必看:

  1. 在大规模生产集群上,ingree-nginx 独占一台节点,他就只跑 ingree-nginx 不要再跑其他pod了

  2. kind: ConfigMap 段落的data.worker-processes = 实际服务器ingress-nginx-controller 所在的pod的那个节点的服务器的cpu核数(最好比实际核心数-1)

  3. kind: ConfigMap 段落的data.worker-cpu-affinity 目前配置是空, 留空就行

  4. kind: DaemonSet 如果是自建集群使用DaemonSet类型的控制器。 他会把容器端口映射到宿主机上这样就不用再使用NodePort映射了如果是是云上比如阿里云的ack 集群,使用Deployment类型的控制器,因为ack的pod使用的是云主机的弹性网卡他可以和你的云主机在同一个网络(网段)所以在这一段的内容中默认用了kind: DaemonSet 如果要用kind: Deployment 那么需要检查 “Deployment need” 和 “DaemonSet need"跟随的一些配置项

  5. 基于kind: DaemonSet|Deployment的resources(资源配置)如果limits分配的资源和requests分配的资源是一致的,那么这个pod在k8s集群中的优先级是最高的。当我们集群资源不够时, k8s会驱逐一些优先级低的pod。保证高优先级

  6. 如果日志报错提示 “mount: mounting rw on /proc/sys failed: Permission denied”, 那么就打开 privileged: true、procMount: Default、runAsUser: 0 这三条注释的内容,如果不报错就不用管他

  7. 给对应节点打标签

      nodeSelector:
        boge/ingress-controller-ready: "true"

打标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready=true
查看标签的方法 kubectl get node –show-labels
删除标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready-

  1. 基于ingress-nginx独立一台节点部署的情况。 给这个节点打上标签后。最好再给这个节点标记上污点
    打污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready=“true”:NoExecute
    去掉污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready:NoExecute-
    如果给节点打上了污点需要把下面这段注释打开,
    tolerations:
    - effect: NoExecute # effect: NoExecute:表示节点污点的驱逐效果,会驱逐已运行但不耐受的Pod
    key: boge/ingress-controller-ready
    operator: Equal # 要求value必须完全匹配(若为Exists则只需key存在)
    value: "true"

他的作用是Kubernetes中Pod的容忍度(Toleration)定义,用于控制Pod能否调度到带有特定污点(Taint)的节点上
所以上面这段的配置含义是

  • 允许Pod被调度到带有boge/ingress-controller-ready=true:NoExecute污点的节点上,确保它们只运行在特定节点。