From 8732a5b479505d2c5ae5ce9f26d557380d3ee1a8 Mon Sep 17 00:00:00 2001 From: ysc <1193978188@qq.com> Date: Fri, 1 Dec 2023 16:40:40 +0800 Subject: [PATCH] =?UTF-8?q?docs=EF=BC=9A=E5=A2=9E=E5=8A=A0quickstart?= =?UTF-8?q?=E9=83=A8=E7=BD=B2=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chaos/README.md | 88 +++++++++++++++++-- chaos/backend/chaos-backend-deploy.yaml | 2 +- chaos/manifests/chaos-front-Ingress.yaml | 17 ++++ chaos/manifests/chaos-front-NodePort.yaml | 17 ++++ .../manifests/deploy/0chaos-backend-rabc.yaml | 51 +++++++++++ .../manifests/deploy/1chaos-backend-cfg.yaml | 51 +++++++++++ .../deploy/2chaos-backend-deploy.yaml | 43 +++++++++ .../manifests/deploy/3chaos-backend-svc.yaml | 16 ++++ chaos/manifests/deploy/4chaos-front-cfg.yaml | 37 ++++++++ .../manifests/deploy/5chaos-front-deploy.yaml | 40 +++++++++ .../deploy/6chaos-front-service.yaml | 16 ++++ 11 files changed, 369 insertions(+), 9 deletions(-) create mode 100644 chaos/manifests/chaos-front-Ingress.yaml create mode 100644 chaos/manifests/chaos-front-NodePort.yaml create mode 100644 chaos/manifests/deploy/0chaos-backend-rabc.yaml create mode 100644 chaos/manifests/deploy/1chaos-backend-cfg.yaml create mode 100644 chaos/manifests/deploy/2chaos-backend-deploy.yaml create mode 100644 chaos/manifests/deploy/3chaos-backend-svc.yaml create mode 100644 chaos/manifests/deploy/4chaos-front-cfg.yaml create mode 100644 chaos/manifests/deploy/5chaos-front-deploy.yaml create mode 100644 chaos/manifests/deploy/6chaos-front-service.yaml diff --git a/chaos/README.md b/chaos/README.md index 1250397..3ef31a5 100644 --- a/chaos/README.md +++ b/chaos/README.md @@ -1,5 +1,6 @@ # README -`train-ticket`购票系统是由复旦大学SELab开源业务模拟系统,其具体地址在https://github.com/FudanSELab/train-ticket + +`train-ticket`购票系统是由复旦大学SELab开源业务模拟系统,其具体地址在 `Chaos Mesh`业界流行的混沌工程。 @@ -10,67 +11,138 @@ ![soma-chaos目标](/docs/img/%E6%95%85%E9%9A%9C%E6%A1%88%E4%BE%8B%E9%9B%86%E5%AE%97%E6%97%A8.png) ## soma-chaos效果 + ![soma-chaos效果](/docs/img/%E7%95%8C%E9%9D%A2%E6%95%88%E6%9E%9C%E5%9B%BE.png) ## 项目结构 + - front: 提供用户界面 - backend: 作为后端提供注入故障、展示故障状态等接口,直接与`Kubernetes API-server`交互,对`chaos-mesh`资源进行操作 -构建与部署方式请查阅方式:https://gitee.com/anolis/soma/tree/master/chaos/backend +构建与部署方式请查阅方式: + +## 快速开始 + +下面是在 Kubernetes 集群中搭建故障注入平台的步骤: + +### 1. 前置依赖 + +- train-ticket 微服务系统(需使用 skywalking 对其进行链路追踪) + + >安装过程可参考 [FudanSELab/train-ticket: Train Ticket - A Benchmark Microservice System (github.com)](https://github.com/FudanSELab/train-ticket#Using-Kubernete) + + ``` + git clone --depth=1 https://github.com/FudanSELab/train-ticket.git + cd train-ticket/ + make deploy DeployArgs="--with-tracing" + ``` + +- chaos-mesh 混沌工程平台 + + >安装过程可参考 [使用 Helm 安装 Chaos Mesh | Chaos Mesh (chaos-mesh.org)](https://chaos-mesh.org/zh/docs/production-installation-using-helm/) + + ``` + helm repo add chaos-mesh https://charts.chaos-mesh.org + kubectl create ns chaos-mesh + # 默认使用 docker 容器运行时 + helm install chaos-mesh chaos-mesh/chaos-mesh -n=chaos-mesh --set controllerManager.leaderElection.enabled=false --set controllerManager.replicaCount=1 --version 2.6.2 + ``` + +### 2. 部署故障注入平台 + + git clone https://gitee.com/anolis/soma.git + cd soma/chaos/ + kubectl create ns chaos-injection + kubectl create -f manifests/deploy/ + +运行 `kubectl get pods -n chaos-injection` 检查部署状态 + +### 3. 如何访问 + +默认情况下,服务将使用 ClusterIP Service 类型,只能在集群内部访问。如果您只需要在集群内访问服务,无需对外暴露,那么您已经完成了! +如果您希望将服务暴露到集群外部,可以参考以下方式: + +1. 使用 **NodePort Service**,将服务映射到 30008 端口,使用 `http://[node-ip]:30008` 访问故障注入平台页面 + + ``` + kubectl apply -f manifests/chaos-front-NodePort.yaml + ``` + +2. 使用 **Ingress**,通过 `http://[node-ip]/fault-injecion` 访问故障注入平台页面 + + ``` + kubectl apply -f manifests/chaos-front-Ingress.yaml + ``` + +3. 使用 **port-forward**,通过 `http://127.0.0.1:30008` 访问故障注入平台页面 + + ``` + kubectl port-forward -n chaos-injection deploy/chaos-front 30008:80 + ``` ## 已经支持的故障案例 ### 网络类故障案例 + - 丢包率较高 - 重传率较高 - 带宽限制打满 - DNS故障 - TCP建连延时高 - ### 存储类故障案例 + - IO延时高 ### CPU类故障案例 + - 代码自身CPU使用率高 - 共享环境其它进程抢占CPU ### 内存类故障案例 + - FULL GC频率很高 - 共享环境其他进程抢占Memory ### 代码类故障 + - 代码抛出异常导致错误码返回 - HTTP请求返回错误码 ## 即将支持的故障案例 -### 网络类故障: +### 网络类故障 + - 增加意料之外的网络访问 ### 存储类故障案例 + - 存储磁盘占满 ### 代码类故障案例 + - 并发类故障 - 代码锁 ### k8s类故障案例 + - Pod重启 - Ingress故障 ## 未来将支持的故障案例 -### 中间件类型故障: +### 中间件类型故障 + - redis响应慢 - 数据库连接池打满 - 数据库死锁 -### 代码类故障: +### 代码类故障 + - 微服务雪崩 -### 网络类故障: +### 网络类故障 + - 网络连接断连 - TCP 零窗口 - TCP建连不成功 - diff --git a/chaos/backend/chaos-backend-deploy.yaml b/chaos/backend/chaos-backend-deploy.yaml index 0b7074a..1c36246 100644 --- a/chaos/backend/chaos-backend-deploy.yaml +++ b/chaos/backend/chaos-backend-deploy.yaml @@ -61,7 +61,7 @@ metadata: name: chaos-backend-cluster-role rules: - apiGroups: - - chaos-mesh.org/v1alpha1 + - chaos-mesh.org resources: - awschaos - azurechaos diff --git a/chaos/manifests/chaos-front-Ingress.yaml b/chaos/manifests/chaos-front-Ingress.yaml new file mode 100644 index 0000000..a930e2b --- /dev/null +++ b/chaos/manifests/chaos-front-Ingress.yaml @@ -0,0 +1,17 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: chaos-front-ingress + namespace: chaos-injection +spec: + ingressClassName: nginx + rules: + - http: + paths: + - backend: + service: + name: chaos-front-svc + port: + number: 80 + path: /fault-injection + pathType: Prefix \ No newline at end of file diff --git a/chaos/manifests/chaos-front-NodePort.yaml b/chaos/manifests/chaos-front-NodePort.yaml new file mode 100644 index 0000000..3ffb94b --- /dev/null +++ b/chaos/manifests/chaos-front-NodePort.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: chaos-front-svc + namespace: chaos-injection + labels: + app: chaos-front +spec: + type: NodePort + selector: + app: chaos-front + ports: + - name: http + port: 80 + nodePort: 30008 + protocol: TCP + targetPort: 80 \ No newline at end of file diff --git a/chaos/manifests/deploy/0chaos-backend-rabc.yaml b/chaos/manifests/deploy/0chaos-backend-rabc.yaml new file mode 100644 index 0000000..6773e8d --- /dev/null +++ b/chaos/manifests/deploy/0chaos-backend-rabc.yaml @@ -0,0 +1,51 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: chaos-backend + namespace: chaos-injection +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: chaos-backend-cluster-role +rules: + - apiGroups: + - chaos-mesh.org + resources: + - awschaos + - azurechaos + - blockchaos + - dnschaos + - gcpchaos + - httpchaos + - iochaos + - jvmchaos + - kernelchaos + - networkchaos + - physicalmachinechaos + - physicalmachines + - podchaos + - podhttpchaos + - podiochaos + - podnetworkchaos + - remoteclusters + - schedules + - statuschecks + - stresschaos + - timechaos + - workflownodes + - workflows + verbs: ["get", "list", "watch", "create", "update", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: chaos-backend-cluster-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: chaos-backend-cluster-role +subjects: + - kind: ServiceAccount + name: chaos-backend + namespace: chaos-injection \ No newline at end of file diff --git a/chaos/manifests/deploy/1chaos-backend-cfg.yaml b/chaos/manifests/deploy/1chaos-backend-cfg.yaml new file mode 100644 index 0000000..66b29b7 --- /dev/null +++ b/chaos/manifests/deploy/1chaos-backend-cfg.yaml @@ -0,0 +1,51 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: chaos-backend-config + namespace: chaos-injection +data: + chaos_template.yaml: | + chaos_template_list: + - complex_action: networkchaos#delay + describe: "Pod网络延迟200ms" + - complex_action: networkchaos#loss + describe: "Pod丢包30%" + - complex_action: networkchaos#bandwidth + describe: "Pod网络限速为20kbit/s" + note: + ts-basic-service: "对该节点注入限速故障对请求时间影响不明显" + ts-travel-plan-service: "对该节点注入限速故障对请求时间影响不明显" + ts-train-service: "对该节点注入限速故障对请求时间影响不明显" + ts-seat-service: "注入该故障后请求时延过高" + # 需要修改ChaosMesh,添加对应的API + # - complex_action: networkchaos#tcpdelay + # describe: "TCP建连延迟200ms" + # note: + # common: "train-ticket服务之间保持长连接, 此故障对请求时间影响不明显" + - complex_action: networkchaos#delay#dns + describe: DNS请求延迟200ms + # 需要修改ChaosDaemon镜像,添加对应的JAVA工具包 + # - complex_action: jvmchaos#ruleData#gcrule + # describe: "增加POD FullGC频率" + # check_entry_defined: true + # - complex_action: jvmchaos#ruleData#cpucost + # describe: "增加处理每个请求的CPU消耗" + # check_entry_defined: true + # - complex_action: jvmchaos#ruleData#exception + # describe: "使方法抛出运行时异常" + # check_entry_defined: true + - complex_action: stresschaos#cpu + describe: "运行额外任务抢占Pod可用的CPU资源" + note: + ts-price-service: "非CPU密集型程序, CPU压力对请求时间影响不明显" + ts-train-service: "非CPU密集型程序, CPU压力对请求时间影响不明显" + - complex_action: stresschaos#memory + describe: "运行额外任务抢占Pod可用的memroy资源" + note: + common: "非内存IO密集型程序, 内存压力对请求时间影响不明显" + - complex_action: iochaos#delay + describe: "文件读写延时200ms" + support_services: + - ts-order-service + - complex_action: httpchaos#replace + describe: "返回404错误码" \ No newline at end of file diff --git a/chaos/manifests/deploy/2chaos-backend-deploy.yaml b/chaos/manifests/deploy/2chaos-backend-deploy.yaml new file mode 100644 index 0000000..ecc03f6 --- /dev/null +++ b/chaos/manifests/deploy/2chaos-backend-deploy.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chaos-backend + namespace: chaos-injection + labels: + app: chaos-backend +spec: + selector: + matchLabels: + app: chaos-backend + replicas: 1 + strategy: + type: RollingUpdate + template: + metadata: + labels: + app: chaos-backend + spec: + serviceAccount: chaos-backend + containers: + - name: chaos-backend + command: + - /app/chaos-backend + - --authType=serviceAccount + - --skywalkingUIAddr=http://skywalking.train-ticket:12800/graphql + - --prometheusUIAddr=http://prometheus-k8s.monitoring:9090 + - --metricSource=skywalking + image: docker.io/kindlingproject/chaos-backend:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 8080 + protocol: TCP + volumeMounts: + - name: chaos-backend-config + mountPath: /app/config/chaos_template.yaml + subPath: chaos_template.yaml + volumes: + - name: chaos-backend-config + configMap: + name: chaos-backend-config + defaultMode: 420 \ No newline at end of file diff --git a/chaos/manifests/deploy/3chaos-backend-svc.yaml b/chaos/manifests/deploy/3chaos-backend-svc.yaml new file mode 100644 index 0000000..14f6fb3 --- /dev/null +++ b/chaos/manifests/deploy/3chaos-backend-svc.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: chaos-backend-svc + namespace: chaos-injection + labels: + app: chaos-backend +spec: + type: ClusterIP + selector: + app: chaos-backend + ports: + - name: http + port: 8080 + protocol: TCP + targetPort: 8080 \ No newline at end of file diff --git a/chaos/manifests/deploy/4chaos-front-cfg.yaml b/chaos/manifests/deploy/4chaos-front-cfg.yaml new file mode 100644 index 0000000..1c92cc3 --- /dev/null +++ b/chaos/manifests/deploy/4chaos-front-cfg.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: chaos-front-nginx-config + namespace: chaos-injection +data: + default.conf: | + server { + listen 80; + listen [::]:80; + server_name localhost; + + #access_log /var/log/nginx/host.access.log main; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + } + + location /fault-injection { + root /usr/share/nginx/html; + try_files $uri $uri/ /index.html; + } + + location /api { + proxy_pass http://chaos-backend-svc.kindling:8080; + } + + #error_page 404 /404.html; + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + } diff --git a/chaos/manifests/deploy/5chaos-front-deploy.yaml b/chaos/manifests/deploy/5chaos-front-deploy.yaml new file mode 100644 index 0000000..9c897c7 --- /dev/null +++ b/chaos/manifests/deploy/5chaos-front-deploy.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: chaos-front + name: chaos-front + namespace: chaos-injection +spec: + selector: + matchLabels: + app: chaos-front + replicas: 1 + strategy: + type: Recreate + template: + metadata: + labels: + app: chaos-front + spec: + volumes: + - name: chaos-front-nginx-config + configMap: + name: chaos-front-nginx-config + containers: + - name: chaos-front + image: docker.io/kindlingproject/chaos-front:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 80 + protocol: TCP + resources: + limits: + memory: 500Mi + requests: + memory: 30Mi + volumeMounts: + - name: chaos-front-nginx-config + mountPath: /etc/nginx/conf.d/default.conf + subPath: default.conf \ No newline at end of file diff --git a/chaos/manifests/deploy/6chaos-front-service.yaml b/chaos/manifests/deploy/6chaos-front-service.yaml new file mode 100644 index 0000000..3a8995b --- /dev/null +++ b/chaos/manifests/deploy/6chaos-front-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: chaos-front-svc + namespace: chaos-injection + labels: + app: chaos-front +spec: + type: ClusterIP + selector: + app: chaos-front + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 \ No newline at end of file -- Gitee