1 - 001创建一个名称为nginx的pod

# 通过命令创建一个pod容器
kubectl run nginx --image=docker.io/library/nginx:1.21.6
pod/nginx created

# 查看default命名空间下的pod
kubectl get pod -o wide
    NAME    READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
    nginx   1/1     Running   0          17s   172.20.177.22   k8s-192-168-0-19   <none>           <none>

# 访问pod
curl 172.20.177.22
<!DOCTYPE html>
    <html>
    <head>
    <title>Welcome to nginx!</title>
    <style>
    html { color-scheme: light dark; }
    body { width: 35em; margin: 0 auto;
    font-family: Tahoma, Verdana, Arial, sans-serif; }
    </style>
    </head>
    <body>
    <h1>Welcome to nginx!</h1>
    <p>If you see this page, the nginx web server is successfully installed and
    working. Further configuration is required.</p>

    <p>For online documentation and support please refer to
    <a href="http://nginx.org/">nginx.org</a>.<br/>
    Commercial support is available at
    <a href="http://nginx.com/">nginx.com</a>.</p>

    <p><em>Thank you for using nginx.</em></p>
    </body>
    </html>

2 - 002进入名为nginx的pod

本案例基于案例001

kubectl -it exec nginx -- bash
    root@nginx:/# echo 'Hello K8S' > /usr/share/nginx/html/index.html
    root@nginx:/# exit
    exit

curl 172.20.177.22
    Hello K8S

3 - 003创建一个名称为nginx的deployment

本案例基于案例001

kubectl create deployment nginx --image=docker.io/library/nginx:1.21.6
    deployment.apps/nginx created

kubectl get deployment -w
    NAME    READY   UP-TO-DATE   AVAILABLE   AGE
    nginx   0/1     1            0           15s
    nginx   1/1     1            1           15s

kubectl get pod -o wide
    NAME                     READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
    # 注意这里pod名称分段 784757bdfb为rs的hash
    nginx-784757bdfb-z6gd6   1/1     Running   0          45s   172.20.177.24   k8s-192-168-0-19   <none>           <none>

kubectl scale deployment nginx --replicas=2
    deployment.apps/nginx scaled

kubectl get deployment -w
    NAME    READY   UP-TO-DATE   AVAILABLE   AGE
    nginx   1/2     2            1           70s
    nginx   2/2     2            2           75s

kubectl get pod -o wide
    NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
    nginx-784757bdfb-2q58h   1/1     Running   0          25s   172.20.182.149   k8s-192-168-0-11   <none>           <none>
    nginx-784757bdfb-z6gd6   1/1     Running   0          85s   172.20.177.24    k8s-192-168-0-19   <none>           <none>

4 - 004模拟一次deployment的上线发布回滚

创建一个名为nginx的deployment初始副本为2,然后修改nginx的镜像tag,最后在回滚到之前的版本
root@k8s-192-168-0-17:/home/node1# kubectl create deployment nginx --image=docker.io/library/nginx:1.21.6 --replicas=2
deployment.apps/nginx created
root@k8s-192-168-0-17:/home/node1# kubectl get deployment nginx -o wide -w
NAME    READY   UP-TO-DATE   AVAILABLE   AGE   CONTAINERS   IMAGES                           SELECTOR
nginx   2/2     2            2           22s   nginx        docker.io/library/nginx:1.21.6   app=nginx
root@k8s-192-168-0-17:/home/node1# kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
nginx-784757bdfb-bs4rt   1/1     Running   0          42s   172.20.182.150   k8s-192-168-0-11   <none>           <none>
nginx-784757bdfb-jjmzv   1/1     Running   0          43s   172.20.177.25    k8s-192-168-0-19   <none>           <none>
# 注意这里的版本号
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.150/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.21.6</center>
</body>
</html>
root@k8s-192-168-0-17:/home/node1# kubectl set image deployment/nginx  nginx=docker.io/library/nginx:1.25.1
deployment.apps/nginx image updated
root@k8s-192-168-0-17:/home/node1# kubectl annotate deployment/nginx kubernetes.io/change-cause="image updated to 1.25.1"
deployment.apps/nginx annotated
root@k8s-192-168-0-17:/home/node1# kubectl rollout history deployment nginx
deployment.apps/nginx 
REVISION  CHANGE-CAUSE
1         <none>
2         image updated to 1.25.1
# 注意这里的版本号 已经换成 1.25.1 了
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.153/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.25.1</center>
</body>
</html>
root@k8s-192-168-0-17:/home/node1# kubectl set image deployments/nginx nginx=nginx:1.21.6
deployment.apps/nginx image updated
root@k8s-192-168-0-17:/home/node1# kubectl annotate deployment/nginx kubernetes.io/change-cause="image updated to 1.21.6"
deployment.apps/nginx annotated
root@k8s-192-168-0-17:/home/node1# kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE   IP               NODE               NOMINATED NODE   READINESS GATES
nginx-796bdc6f77-66hcm   1/1     Running   0          35s   172.20.182.154   k8s-192-168-0-11   <none>           <none>
nginx-796bdc6f77-n5wng   1/1     Running   0          49s   172.20.177.29    k8s-192-168-0-19   <none>           <none>
# 注意这里的版本号 已经换成 1.21.6 了
root@k8s-192-168-0-17:/home/node1# curl 172.20.182.154/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.21.6</center>
</body>
</html>

## 假设我们这次升级版本出现了问题 那么我们查看历史,我们要回滚到1.25.1
root@k8s-192-168-0-17:/home/node1# kubectl rollout history deployment nginx
deployment.apps/nginx 
REVISION  CHANGE-CAUSE
1         <none>
2         image updated to 1.25.1
3         image updated to 1.21.6
# --to-revision=2 就是上一步的索引
root@k8s-192-168-0-17:/home/node1# kubectl rollout undo deployment nginx --to-revision=2
deployment.apps/nginx rolled back
# 这里版本号已经回滚到1.25.1
root@k8s-192-168-0-17:/home/node1# curl 172.20.177.30/1
<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx/1.25.1</center>
</body>
</html>
# 我们再查询rs
root@k8s-192-168-0-17:/home/node1# kubectl get rs
NAME               DESIRED   CURRENT   READY   AGE
nginx-784757bdfb   0         0         0       19m
nginx-796bdc6f77   0         0         0       16m
nginx-79df7c55d7   2         2         2       19m

5 - 005手动创建一个一定会发生故障推出的pod并跟踪这个pod

```shell
root@k8s-192-168-0-17:~# kubectl run  busybox --image=busybox --dry-run=client -o yaml > testHealthz.yaml
root@k8s-192-168-0-17:~# vim testHealthz.yaml

apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
    run: busybox
name: busybox
spec:
containers:
- image: busybox
    name: busybox
    resources: {}
    # 添加启动参数模拟启动后10s就以返回码1退出
    args:
    - /bin/sh
    - -c
    - sleep 10; exit 1
dnsPolicy: ClusterFirst
# 将默认的Always 改成 OnFailure
restartPolicy: OnFailure
status: {}

root@k8s-192-168-0-17:~# kubectl apply -f testHealthz.yaml
pod/busybox created

root@k8s-192-168-0-17:~# kubectl  get pod -o wide -w
NAME                     READY   STATUS    RESTARTS   AGE   IP              NODE               NOMINATED NODE   READINESS GATES
busybox                  1/1     Running   0          22s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
nginx-796bdc6f77-7r5ts   1/1     Running   0          10m   172.20.177.31   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error     0          32s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running   1 (8s ago)   38s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error     1 (19s ago)   49s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   1 (14s ago)   61s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            2 (19s ago)   66s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              2 (30s ago)   77s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   2 (13s ago)   89s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            3 (34s ago)   110s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              3 (44s ago)   2m     172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   3 (16s ago)   2m14s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  1/1     Running            4 (48s ago)   2m46s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     Error              4 (58s ago)   2m56s   172.20.177.32   k8s-192-168-0-19   <none>           <none>
busybox                  0/1     CrashLoopBackOff   4 (15s ago)   3m10s   172.20.177.32   k8s-192-168-0-19   <none>           <none>

# 一直再running error CrashLoopBackOff 并且kubelet会以指数级的退避延迟(10s,20s,40s等)重新启动它们,上限为5分钟
```

6 - 005手动创建一个一定会发生故障推出的pod并跟踪这个pod

部署一个mytest的 Deployment 副本数量为10,之后模拟一次发版导致了失败,我们用Readiness来保证不健康的pod不被请求
1. 先准备两个Deployment配置

```yaml
# cat myapp-v1.yaml 是可以通过健康检查

apiVersion: apps/v1
kind: Deployment
metadata:
name: mytest
spec:
replicas: 10     # 这里准备10个数量的pod
selector:
    matchLabels:
    app: mytest
template:
    metadata:
    labels:
        app: mytest
    spec:
    containers:
    - name: mytest
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - sleep 10; touch /tmp/healthy; sleep 30000
        readinessProbe:
        exec:
            command:
            - cat
            - /tmp/healthy
        initialDelaySeconds: 10
        periodSeconds: 5

# cat myapp-v2.yaml v2是不能通过检测的 模拟升级发版失败

apiVersion: apps/v1
kind: Deployment
metadata:
name: mytest
spec:
strategy:
    rollingUpdate:
    maxSurge: 35%   # 滚动更新的副本总数最大值(以10的基数为例):10 + 10 * 35% = 13.5 --> 14
    maxUnavailable: 35%  # 可用副本数最大值(默认值两个都是25%): 10 - 10 * 35% = 6.5  --> 7
replicas: 10
selector:
    matchLabels:
    app: mytest
template:
    metadata:
    labels:
        app: mytest
    spec:
    containers:
    - name: mytest
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - sleep 30000   # 可见这里并没有生成/tmp/healthy这个文件,所以下面的检测必然失败
        readinessProbe:
        exec:
            command:
            - cat
            - /tmp/healthy
        initialDelaySeconds: 10
        periodSeconds: 5

```

2. 启动myapp-v1.yaml

```shell
kubectl apply -f myapp-v1.yaml
# 别忘了加备注
kubectl annotate deployment/mytest kubernetes.io/change-cause="kubectl apply --filename=myapp-v1.yaml"
# 过一会就会看到pod状态为Running
root@k8s-192-168-0-17:~# kubectl get pod -o wide 
NAME                      READY   STATUS    RESTARTS   AGE    IP               NODE               NOMINATED NODE   READINESS GATES
mytest-59887f89f5-fq6hv   1/1     Running   0          112s   172.20.182.159   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-gpsnx   1/1     Running   0          113s   172.20.182.157   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-gwkmg   1/1     Running   0          113s   172.20.177.33    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-ltdw9   1/1     Running   0          115s   172.20.182.156   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-m4vkn   1/1     Running   0          112s   172.20.177.37    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-m9z2t   1/1     Running   0          112s   172.20.182.160   k8s-192-168-0-11   <none>           <none>
mytest-59887f89f5-mq9n6   1/1     Running   0          113s   172.20.177.35    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-nwsc9   1/1     Running   0          115s   172.20.177.34    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-pzm68   1/1     Running   0          115s   172.20.177.36    k8s-192-168-0-19   <none>           <none>
mytest-59887f89f5-qd74c   1/1     Running   0          113s   172.20.182.158   k8s-192-168-0-11   <none>           <none>
```

3. 启动myapp-v2.yaml

```shell
kubectl apply -f myapp-v2.yaml
# 别忘了加备注
kubectl annotate deployment/mytest kubernetes.io/change-cause="kubectl apply --filename=myapp-v2.yaml"
# 过一会查看deployment 输出结果 会稳定在以下结果
root@k8s-192-168-0-17:~# kubectl get deployment mytest
NAME     READY   UP-TO-DATE   AVAILABLE   AGE
mytest   7/10    7            7           3m43s
# READY 现在正在运行的只有7个pod
# UP-TO-DATE 表示当前已经完成更新的副本数:即 7 个新副本
# AVAILABLE 表示当前处于 READY 状态的副本数

# 查看pod
root@k8s-192-168-0-17:~# kubectl get pod
NAME                      READY   STATUS    RESTARTS   AGE
mytest-59887f89f5-fq6hv   1/1     Running   0          5m9s
mytest-59887f89f5-gpsnx   1/1     Running   0          5m10s
mytest-59887f89f5-gwkmg   1/1     Running   0          5m10s
mytest-59887f89f5-ltdw9   1/1     Running   0          5m12s
mytest-59887f89f5-m9z2t   1/1     Running   0          5m9s
mytest-59887f89f5-pzm68   1/1     Running   0          5m12s
mytest-59887f89f5-qd74c   1/1     Running   0          5m10s
mytest-8586c6547d-6sqwt   0/1     Running   0          2m19s
mytest-8586c6547d-b9kql   0/1     Running   0          2m20s
mytest-8586c6547d-cgkrj   0/1     Running   0          2m7s
mytest-8586c6547d-dw6kv   0/1     Running   0          2m18s
mytest-8586c6547d-ht4dq   0/1     Running   0          2m19s
mytest-8586c6547d-v7rh9   0/1     Running   0          2m8s
mytest-8586c6547d-vqn6w   0/1     Running   0          2m7s

# 查看deployment的信息
root@k8s-192-168-0-17:~# kubectl describe deployment mytest
...
Replicas:               10 desired | 7 updated | 14 total | 7 available | 7 unavailable
...
Events:
Type    Reason             Age    From                   Message
----    ------             ----   ----                   -------
Normal  ScalingReplicaSet  5m46s  deployment-controller  Scaled up replica set mytest-59887f89f5 from 0 to 10
Normal  ScalingReplicaSet  2m52s  deployment-controller  Scaled up replica set mytest-8586c6547d from 0 to 4
Normal  ScalingReplicaSet  2m50s  deployment-controller  Scaled down replica set mytest-59887f89f5 from 10 to 7
Normal  ScalingReplicaSet  2m45s  deployment-controller  Scaled up replica set mytest-8586c6547d from 4 to 7
```

4. 如此我们保证了集群中有7个可用的pod

下面来解析一下整个过程

maxSurge:

规定了滚动更新过程中pod副本数可以超过总副本数的上限。配置项可以是一个具体的数字也可以是一个比例,如果是比例则会向上取整

我们的例子副本总数是10 maxSurge: 35% 所以 10 + 10 * 35% = 13.5 --> 14

所以对mytest这个deployment的副本描述Replicas: 10 desired | 7 updated | 14 total | 7 available | 7 unavailable

10个目标值 7个已经更新 14为最大值 7个可用 7个不可用

maxUnavailable:

控制滚动过程中最大pod不可用数量。同样可以是一个数字也可以是一个比例。如果是比例则向上取整

我们例子中 maxUnavailable:35%  所以  10 - 10 * 35% = 6.5 --> 7

我们本次滚动更新的完整过程为

1) 根据maxSurge得到最大副本数14 所以 先创建4个新版本的pod副本,使副本总数达到14

2) 然后根据maxUnavailable 的到最大不可用数量为7 14-7(最大不可用数)=7(最小可用数) 所以销毁3个旧版本的pod 

3) 3个旧版本pod销毁完成之后,再创建3个新版本pod使总副本数保持14

4) 当新版本pod通过Readiness检测后,会使可用pod副本超过7个

5) 再销毁更多旧pod使可用副本保持7个。

6) 随着旧pod销毁,新pod会自动创建,使副本数保持14

7) 依此类推一直到全部更新完成。

我们的实际情况在第4步卡住了。新的pod无法通过Readiness的检测。

此时在实际生产环境中我们需要rollout undo 来回滚上一个版本保证集群整体

```shell
root@k8s-192-168-0-17:~# kubectl rollout history deployment mytest
deployment.apps/mytest 
REVISION  CHANGE-CAUSE
1         kubectl apply --filename=myapp-v1.yaml
2         kubectl apply --filename=myapp-v2.yaml

root@k8s-192-168-0-17:~# kubectl rollout undo deployment mytest --to-revision=1
deployment.apps/mytest rolled back

# 然后 观察全局pod的变化过程
kubectl get pod -w
```

7 - 007在k8s集群内使用nameServer进行网络访问

1. 准备一个svc的yaml配置
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: null
  labels:
    app: web
  name: web
spec:
  ports:
  - port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: web
status:
  loadBalancer: {}

---

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: web
  name: web
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: web
  template:
    metadata:
      labels:
        app: web
    spec:
      containers:
      - image: nginx:1.21.6
        name: nginx
  1. 启动一个工具pod容器并验证
kubectl run -it --rm busybox --image=registry.cn-shanghai.aliyuncs.com/acs/busybox:v1.29.2 -- sh
# --rm的意思是当推出pod容器 sh 时, pod容器会自动删除

/ # wget -q -O- http://web
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>

<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>

<p><em>Thank you for using nginx.</em></p>
</body>
</html>

8 - 008将外部服务纳入到k8s集群网络

案例使用python开放一个http服务,并将其纳入到k8s集群网络
  1. 先启动一个非k8s集群的服务
# 在任意节点启动一个http服务这里用python3
node1@k8s-192-168-0-17:~$ python3 -m http.server 8088 # 这里启动了8088端口
  1. 创建一个svc的yaml
# 注意我这里把两个资源的yaml写在一个文件内,在实际生产中,我们经常会这么做,方便对一个服务的所有资源进行统一管理,不同资源之间用"---"来分隔
apiVersion: v1
kind: Service
metadata:
  name: myhttp
spec:
  ports:
  - name: http-port
    port: 3306         # Service 暴露端口 3306
    protocol: TCP
  type: ClusterIP      # 仅集群内访问

---

apiVersion: discovery.k8s.io/v1
kind: EndpointSlice
metadata:
  name: myhttp-slice # 指定这个endpointslice的名称
  labels:
    kubernetes.io/service-name: myhttp  # 必须关联Service 关联了哪个svc
addressType: IPv4
ports:
- name: http-port     # 与Service端口名一致
  port: 8088           # 外部服务实际端口  这样旧相当于把8088给了 myhttp 这个svc中名称为http-port的port
  protocol: TCP
endpoints:
- addresses:
  - "192.168.0.17"     # 外部http服务IP
  conditions:
    ready: true         # 标记端点可用
  1. 验证
node1@k8s-192-168-0-17:~$ sudo kubectl get svc
NAME         TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)        AGE
kubernetes   ClusterIP   10.68.0.1       <none>        443/TCP        5d2h
myhttp       ClusterIP   10.68.48.233    <none>        3306/TCP       9s
new-nginx    NodePort    10.68.194.158   <none>        81:30759/TCP   4h19m
node1@k8s-192-168-0-17:~$ curl 10.68.48.233:3306
<!DOCTYPE HTML>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Directory listing for /</title>
</head>
<body>
<h1>Directory listing for /</h1>
<hr>
<ul>
<li><a href=".ansible/">.ansible/</a></li>
<li><a href=".bash_history">.bash_history</a></li>
<li><a href=".bash_logout">.bash_logout</a></li>
<li><a href=".bashrc">.bashrc</a></li>
<li><a href=".cache/">.cache/</a></li>
<li><a href=".profile">.profile</a></li>
<li><a href=".ssh/">.ssh/</a></li>
<li><a href=".sudo_as_admin_successful">.sudo_as_admin_successful</a></li>
<li><a href=".viminfo">.viminfo</a></li>
<li><a href=".Xauthority">.Xauthority</a></li>
<li><a href="httpproxy.yaml">httpproxy.yaml</a></li>
<li><a href="nginx-svc.yaml">nginx-svc.yaml</a></li>
<li><a href="nginx.yaml">nginx.yaml</a></li>
<li><a href="planet">planet</a></li>
<li><a href="ubuntu-install-k8s/">ubuntu-install-k8s/</a></li>
</ul>
<hr>
</body>
</html>
  1. 备注

这里svc使用的是ClusterIP 如果使用了NodePort

sudo kubectl patch svc myhttp -p ‘{“spec”:{“type”:“NodePort”}}’

那么同样可以 通过集群内任意ip进行访问这里就不重复演示了

9 - 009让pod只在具有指定标签的节点上运行

1. 创建deployment的yaml
```yaml
# 修改好yaml配置
apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: nginx
  name: nginx
spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: nginx
    spec:
      containers:
      - image: nginx
        name: nginx
        resources: {}
      nodeSelector:           # <--- 这里
        apps/nginx: "true"    # <--- 基于这个label来选择
status: {}
```

2. 应用这个配置

```shell
kubectl apply -f node-selector.yaml
```

3. 查看pod, 这时没有节点具有 apps/nginx=true

```shell
node1@k8s-192-168-0-17:~$ sudo kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE    IP       NODE     NOMINATED NODE   READINESS GATES
nginx-756c69b65f-7vfv5   0/1     Pending   0          2m5s   <none>   <none>   <none>           <none>
nginx-756c69b65f-8gl9m   0/1     Pending   0          2m4s   <none>   <none>   <none>           <none>
```

4. 节点打label

```shell
# 先尝试给主节点打label
kubectl label node k8s-192-168-0-17 apps/nginx=true
kubectl get node k8s-192-168-0-17 --show-labels 
NAME               STATUS                     ROLES    AGE    VERSION   LABELS
k8s-192-168-0-17   Ready,SchedulingDisabled   master   5d3h   v1.33.1   apps/nginx=true,beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-192-168-0-17,kubernetes.io/os=linux,kubernetes.io/role=master
```

可以看到虽然给17主节点打了标签,但是还是无法调度因为主节点状态是SchedulingDisabled的, 这个状态优先级更高

```shell
# 尝试给worker节点打label
kubectl label node k8s-192-168-0-19 apps/nginx=true
kubectl get node k8s-192-168-0-19 --show-labels 
NAME               STATUS   ROLES   AGE    VERSION   LABELS
k8s-192-168-0-19   Ready    node    5d3h   v1.33.1   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=k8s-192-168-0-19,kubernetes.io/os=linux,kubernetes.io/role=node
# 再查看pod, 马上从pending到ContainerCreating又转到了running 并且node都在19这个节点上,如果没有节点标签。pod是基本平均分布的
kubectl get pod
NAME                     READY   STATUS              RESTARTS   AGE
nginx-756c69b65f-7vfv5   0/1     ContainerCreating   0          5m38s
nginx-756c69b65f-8gl9m   0/1     ContainerCreating   0          5m37s
kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE     IP              NODE               NOMINATED NODE   READINESS GATES
nginx-756c69b65f-7vfv5   1/1     Running   0          7m22s   172.20.177.63   k8s-192-168-0-19   <none>           <none>
nginx-756c69b65f-8gl9m   1/1     Running   0          7m21s   172.20.177.62   k8s-192-168-0-19   <none>           <none>
```

10 - 010部署ingress-nginx-controller

基于这个ingress-nginx-controller 创建一个nginx应用 然后再配置https访问

打开yaml文件

必看:

  1. 在大规模生产集群上,ingree-nginx 独占一台节点,他就只跑 ingree-nginx 不要再跑其他pod了

  2. kind: ConfigMap 段落的data.worker-processes = 实际服务器ingress-nginx-controller 所在的pod的那个节点的服务器的cpu核数(最好比实际核心数-1)

  3. kind: ConfigMap 段落的data.worker-cpu-affinity 目前配置是空, 留空就行

  4. kind: DaemonSet 如果是自建集群使用DaemonSet类型的控制器。 他会把容器端口映射到宿主机上这样就不用再使用NodePort映射了如果是是云上比如阿里云的ack 集群,使用Deployment类型的控制器,因为ack的pod使用的是云主机的弹性网卡他可以和你的云主机在同一个网络(网段)所以在这一段的内容中默认用了kind: DaemonSet 如果要用kind: Deployment 那么需要检查 “Deployment need” 和 “DaemonSet need"跟随的一些配置项

  5. 基于kind: DaemonSet|Deployment的resources(资源配置)如果limits分配的资源和requests分配的资源是一致的,那么这个pod在k8s集群中的优先级是最高的。当我们集群资源不够时, k8s会驱逐一些优先级低的pod。保证高优先级

  6. 如果日志报错提示 “mount: mounting rw on /proc/sys failed: Permission denied”, 那么就打开 privileged: true、procMount: Default、runAsUser: 0 这三条注释的内容,如果不报错就不用管他

  7. 给对应节点打标签

      nodeSelector:
        boge/ingress-controller-ready: "true"

打标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready=true
查看标签的方法 kubectl get node –show-labels
删除标签的方法 kubectl label node ${节点的hostname} boge/ingress-controller-ready-

  1. 基于ingress-nginx独立一台节点部署的情况。 给这个节点打上标签后。最好再给这个节点标记上污点
    打污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready=“true”:NoExecute
    去掉污点的方法是 kubectl taint nodes xx.xx.xx.xx boge/ingress-controller-ready:NoExecute-
    如果给节点打上了污点需要把下面这段注释打开,
    tolerations:
    - effect: NoExecute # effect: NoExecute:表示节点污点的驱逐效果,会驱逐已运行但不耐受的Pod
    key: boge/ingress-controller-ready
    operator: Equal # 要求value必须完全匹配(若为Exists则只需key存在)
    value: "true"

他的作用是Kubernetes中Pod的容忍度(Toleration)定义,用于控制Pod能否调度到带有特定污点(Taint)的节点上
所以上面这段的配置含义是

  • 允许Pod被调度到带有boge/ingress-controller-ready=true:NoExecute污点的节点上,确保它们只运行在特定节点。

基于ingress-nginx-controller 创建一个nginx应用 然后再配置https访问

新建nginx.yaml

---
kind: Service
apiVersion: v1
metadata:
  name: new-nginx
spec:
  selector:
    app: new-nginx
  ports:
    - name: http-port
      port: 80
      protocol: TCP
      targetPort: 80

---
# 新版本k8s的ingress配置
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: new-nginx
  annotations:
    #kubernetes.io/ingress.class: "nginx" 这是被放弃的api方式
    nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
    nginx.ingress.kubernetes.io/whitelist-source-range: 0.0.0.0/0
    nginx.ingress.kubernetes.io/configuration-snippet: |
      if ($host != 'www.boge.com' ) {
        rewrite ^ http://www.boge.com$request_uri permanent;
      }
spec:
  ingressClassName: nginx-master
  rules:
    - host: boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
    - host: m.boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
    - host: www.boge.com
      http:
        paths:
          - backend:
              service:
                name: new-nginx
                port:
                  number: 80
            path: /
            pathType: Prefix
#  tls:
#      - hosts:
#          - boge.com
#          - m.boge.com
#          - www.boge.com
#        secretName: boge-com-tls

# tls secret create command:
#   kubectl -n <namespace> create secret tls boge-com-tls --key boge-com.key --cert boge-com.csr

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: new-nginx
  labels:
    app: new-nginx
spec:
  replicas: 3  # 数量可以根据NODE节点数量来定义
  selector:
    matchLabels:
      app: new-nginx
  template:
    metadata:
      labels:
        app: new-nginx
    spec:
      containers:
#--------------------------------------------------
      - name: new-nginx
        image: nginx:1.21.6
        env:
          - name: TZ
            value: Asia/Shanghai
        ports:
        - containerPort: 80
        volumeMounts:
          - name: html-files
            mountPath: "/usr/share/nginx/html"
#--------------------------------------------------
      - name: busybox
        image: registry.cn-hangzhou.aliyuncs.com/acs/busybox:v1.29.2
        args:
        - /bin/sh
        - -c
        - >
           while :; do
             if [ -f /html/index.html ];then
               echo "[$(date +%F\ %T)] ${MY_POD_NAMESPACE}-${MY_POD_NAME}-${MY_POD_IP}" > /html/index.html
               sleep 1
             else
               touch /html/index.html
             fi
           done
        env:
          - name: TZ
            value: Asia/Shanghai
          - name: MY_POD_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.name
          - name: MY_POD_NAMESPACE
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: metadata.namespace
          - name: MY_POD_IP
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: status.podIP
        volumeMounts:
          - name: html-files
            mountPath: "/html"
          - mountPath: /etc/localtime
            name: tz-config

#--------------------------------------------------
      volumes:
        - name: html-files
          emptyDir:
            medium: Memory
            sizeLimit: 10Mi
        - name: tz-config
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai

---

kubectl apply -f nginx.yaml

kubectl get ingress
NAME        CLASS          HOSTS                              ADDRESS       PORTS   AGE
new-nginx   nginx-master   boge.com,m.boge.com,www.boge.com   10.68.216.0   80      9m51s

kubectl -n kube-system get pod -o wide|grep nginx-ingress
nginx-ingress-controller-6vtl4             1/1     Running     0              3h1m    192.168.0.11     k8s-192-168-0-11   <none>           <none>
nginx-ingress-controller-tg6pq             1/1     Running     0              3h3m    192.168.0.19     k8s-192-168-0-19   <none>           <none>

可以看到pod已经在 11 和19两个节点上运行了

此时我们在集群中的其他节点上修改宿主机的hosts文件,添加配置

192.168.0.19|192.168.0.11 boge.com m.boge.com www.boge.com 都可以

然后用

curl www.boge.com
[2025-07-08 07:09:25] default-new-nginx-6df56b5c4b-hktqc-172.20.177.13

上面已经可以通过域名访问了。然后我们来配置https, 这里就用自签名了

先生成一个私钥

openssl genrsa -out boge.key 2048

再基于key生成tls证书(注意:这里我用的*.boge.com,这是生成泛域名的证书,后面所有新增加的三级域名都是可以用这个证书的)

openssl req -new -x509 -key boge.key -out boge.csr -days 360 -subj /CN=*.boge.com

把证书创建给k8s集群的命名空间中 kubectl -n create secret tls boge-com-tls(这相当于证书的名称) –key boge.key(私钥文件名) –cert boge.csr(证书文件名)

kubectl get secret
NAME           TYPE                DATA   AGE
boge-com-tls   kubernetes.io/tls   2      25m

然后修改nginx.yaml文件

nginx.ingress.kubernetes.io/force-ssl-redirect: "false" # 改成true
rewrite ^ http://www.boge.com$request_uri permanent; # 改成https://
spec:
  tls:
  - hosts:
    - www.boge.com 
    - boge.com
    - m.boge.com
    secretName: boge-com-tls (导入到集群的证书名称)

重新应用nginx.yaml

再在命令行访问http的就会提示301跳转

curl http://www.boge.com
<html>
<head><title>301 Moved Permanently</title></head>
<body>
<center><h1>301 Moved Permanently</h1></center>
<hr><center>nginx</center>
</body>
</html>

所以此时我们用chrome浏览器 访问http://www.boge.com 就会跳转到https://www.boge.com 由于是自签名证书所以是不安全的,直接继续就好了

11 - 011基于010实现一次灰度发布

实现一次灰度发布,实现将50%的流量打到旧的nginx 50打到新的nginx
kubectl create deployment old-nginx --image=nginx:1.21.6 --replicas=1
deployment.apps/old-nginx created

kubectl expose deployment old-nginx --port=80 --target-port=80
service/old-nginx exposed

# 修改nginx.yaml
# 在kind: Ingress的matadata.annotations 中添加如下内容
nginx.ingress.kubernetes.io/service-weight: |
    new-nginx: 50, old-nginx: 50

在 spec.rules 中 host: www.boge.com 的部分 http.paths 中添加如下内容 
          - backend:
              service:
                name: old-nginx  # 老版本服务
                port:
                  number: 80
            path: /
            pathType: Prefix

# 最终重新应用 nginx.yaml

kubectl apply -f nginx.yaml 
service/new-nginx unchanged
ingress.networking.k8s.io/new-nginx configured
deployment.apps/new-nginx unchanged

再通过浏览器访问 https://www.boge.com/ 就会发现几乎是一半打到new-nginx上一般打到old-nginx上

12 - 012HPA自动水平伸缩pod.md

  1. 先准备一个svc 和 deployment
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: null
  labels:
    app: web
  name: web
spec:
  ports:
  - port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: web
status:
  loadBalancer: {}

---
    
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: web
  name: web
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: web
  template:
    metadata:
      labels:
        app: web
    spec:
      containers:
      - image: nginx:1.21.6
        name: nginx
        resources:
          limits:   # 因为我这里是测试环境,所以这里CPU只分配50毫核(0.05核CPU)和20M的内存
            cpu: "50m"
            memory: 20Mi
          requests: # 保证这个pod初始就能分配这么多资源
            cpu: "50m"
            memory: 20Mi
  1. 创建一个hpa
# autoscale 表示自动伸缩
# web 是hpa的名称
# --max=3 表示最大扩容数量为3
# --min=1 表示最小扩容数量为1   
# --cpu-percent=50 表示当CPU使用率超过50%时扩容
kubectl  autoscale deployment web --max=3 --min=1 --cpu-percent=30
kubectl get hpa -w
  1. 再启动一个终端 启动一个临时pod
kubectl run -it --rm busybox --image=registry.cn-shanghai.aliyuncs.com/acs/busybox:v1.29.2 -- sh
/ # while :;do wget -q -O- http://web;done
  1. 回到前一个终端
# 查看 kubectl get hpa -w 的输出
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         1          30s
web    Deployment/web   cpu: 58%/30%   1         3         1          107s
web    Deployment/web   cpu: 100%/30%   1         3         2          2m4s
web    Deployment/web   cpu: 100%/30%   1         3         3          2m22s
web    Deployment/web   cpu: 95%/30%    1         3         3          2m35s

# 至此可以推出跟踪

# 查看hpa web 的描述
kubectl describe hpa web

Name:                                                  web
Namespace:                                             default
Labels:                                                <none>
Annotations:                                           <none>
CreationTimestamp:                                     Thu, 10 Jul 2025 16:58:31 +0800
Reference:                                             Deployment/web
Metrics:                                               ( current / target )
  resource cpu on pods  (as a percentage of request):  76% (38m) / 30%
Min replicas:                                          1
Max replicas:                                          3
Deployment pods:                                       3 current / 3 desired
Conditions:
  Type            Status  Reason               Message
  ----            ------  ------               -------
  AbleToScale     True    ScaleDownStabilized  recent recommendations were higher than current one, applying the highest recent recommendation
  ScalingActive   True    ValidMetricFound     the HPA was able to successfully calculate a replica count from cpu resource utilization (percentage of request)
  ScalingLimited  True    TooManyReplicas      the desired replica count is more than the maximum replica count
Events:
  Type    Reason             Age   From                       Message
  ----    ------             ----  ----                       -------
  Normal  SuccessfulRescale  101s  horizontal-pod-autoscaler  New size: 2; reason: cpu resource utilization (percentage of request) above target
  Normal  SuccessfulRescale  84s   horizontal-pod-autoscaler  New size: 3; reason: cpu resource utilization (percentage of request) above target
  1. 停掉临时pod中的死循环并监听hpa的变化(这个收缩大概时需要在停止临时pod五分钟后才有效)
kubectl get hpa -w
NAME   REFERENCE        TARGETS        MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 68%/30%   1         3         3          5m47s
web    Deployment/web   cpu: 83%/30%   1         3         3          5m54s
web    Deployment/web   cpu: 68%/30%   1         3         3          6m9s
web    Deployment/web   cpu: 0%/30%    1         3         3          6m24s (6分24s降为0)

kubectl get hpa -w
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         3          9m45s

kubectl get hpa -w
NAME   REFERENCE        TARGETS       MINPODS   MAXPODS   REPLICAS   AGE
web    Deployment/web   cpu: 0%/30%   1         3         3          11m
web    Deployment/web   cpu: 0%/30%   1         3         3          11m
web    Deployment/web   cpu: 0%/30%   1         3         1          11m (11分收缩到1)